diff options
| author | Marijn Tamis <[email protected]> | 2017-07-03 11:49:08 +0200 |
|---|---|---|
| committer | Marijn Tamis <[email protected]> | 2017-07-03 11:49:08 +0200 |
| commit | cfa944ded7370fb5f22b1fb894ecf6b9bd3f7381 (patch) | |
| tree | 5cc014922d20561d87105d279b6f7eb3e628c6d9 | |
| parent | Fix windows line endings in github. (diff) | |
| download | nvcloth-cfa944ded7370fb5f22b1fb894ecf6b9bd3f7381.tar.xz nvcloth-cfa944ded7370fb5f22b1fb894ecf6b9bd3f7381.zip | |
NvCloth 1.1.1 Release. (22392725)v1.1.1
58 files changed, 396 insertions, 544 deletions
diff --git a/NvCloth/CmakeGenerateProjects.bat b/NvCloth/CmakeGenerateProjects.bat index a5891c9..fe50f54 100644 --- a/NvCloth/CmakeGenerateProjects.bat +++ b/NvCloth/CmakeGenerateProjects.bat @@ -30,38 +30,38 @@ REM Generate projects here rmdir /s /q compiler\vc11win32-cmake\ mkdir compiler\vc11win32-cmake\ pushd compiler\vc11win32-cmake\ -cmake ..\cmake\windows -G "Visual Studio 11 2012" -AWin32 -DTARGET_BUILD_PLATFORM=Windows -DPX_GENERATE_GPU_PROJECTS=1 -DCUDA_TOOLKIT_ROOT_DIR="%CUDA_PATH_%" -DSTATIC_WINCRT=1 -DPX_OUTPUT_DLL_DIR=%PX_OUTPUT_ROOT%\Bin\vc11win32-cmake -DPX_OUTPUT_LIB_DIR=%PX_OUTPUT_ROOT%\Lib\vc11win32-cmake -DPX_OUTPUT_EXE_DIR=%PX_OUTPUT_ROOT%\Bin\vc11win32-cmake +cmake ..\cmake\windows -G "Visual Studio 11 2012" -AWin32 -DTARGET_BUILD_PLATFORM=windows -DPX_GENERATE_GPU_PROJECTS=1 -DCUDA_TOOLKIT_ROOT_DIR="%CUDA_PATH_%" -DSTATIC_WINCRT=1 -DPX_OUTPUT_DLL_DIR=%PX_OUTPUT_ROOT%\Bin\vc11win32-cmake -DPX_OUTPUT_LIB_DIR=%PX_OUTPUT_ROOT%\Lib\vc11win32-cmake -DPX_OUTPUT_EXE_DIR=%PX_OUTPUT_ROOT%\Bin\vc11win32-cmake popd rmdir /s /q compiler\vc11win64-cmake\ mkdir compiler\vc11win64-cmake\ pushd compiler\vc11win64-cmake\ -cmake ..\cmake\windows -G "Visual Studio 11 2012" -Ax64 -DTARGET_BUILD_PLATFORM=Windows -DPX_GENERATE_GPU_PROJECTS=1 -DCUDA_TOOLKIT_ROOT_DIR="%CUDA_PATH_%" -DSTATIC_WINCRT=1 -DPX_OUTPUT_DLL_DIR=%PX_OUTPUT_ROOT%\Bin\vc11win64-cmake -DPX_OUTPUT_LIB_DIR=%PX_OUTPUT_ROOT%\Lib\vc11win64-cmake -DPX_OUTPUT_EXE_DIR=%PX_OUTPUT_ROOT%\Bin\vc11win64-cmake +cmake ..\cmake\windows -G "Visual Studio 11 2012" -Ax64 -DTARGET_BUILD_PLATFORM=windows -DPX_GENERATE_GPU_PROJECTS=1 -DCUDA_TOOLKIT_ROOT_DIR="%CUDA_PATH_%" -DSTATIC_WINCRT=1 -DPX_OUTPUT_DLL_DIR=%PX_OUTPUT_ROOT%\Bin\vc11win64-cmake -DPX_OUTPUT_LIB_DIR=%PX_OUTPUT_ROOT%\Lib\vc11win64-cmake -DPX_OUTPUT_EXE_DIR=%PX_OUTPUT_ROOT%\Bin\vc11win64-cmake popd rmdir /s /q compiler\vc12win32-cmake\ mkdir compiler\vc12win32-cmake\ pushd compiler\vc12win32-cmake\ -cmake ..\cmake\windows -G "Visual Studio 12 2013" -AWin32 -DTARGET_BUILD_PLATFORM=Windows -DPX_GENERATE_GPU_PROJECTS=1 -DCUDA_TOOLKIT_ROOT_DIR="%CUDA_PATH_%" -DSTATIC_WINCRT=1 -DPX_OUTPUT_DLL_DIR=%PX_OUTPUT_ROOT%\Bin\vc12win32-cmake -DPX_OUTPUT_LIB_DIR=%PX_OUTPUT_ROOT%\Lib\vc12win32-cmake -DPX_OUTPUT_EXE_DIR=%PX_OUTPUT_ROOT%\Bin\vc12win32-cmake +cmake ..\cmake\windows -G "Visual Studio 12 2013" -AWin32 -DTARGET_BUILD_PLATFORM=windows -DPX_GENERATE_GPU_PROJECTS=1 -DCUDA_TOOLKIT_ROOT_DIR="%CUDA_PATH_%" -DSTATIC_WINCRT=1 -DPX_OUTPUT_DLL_DIR=%PX_OUTPUT_ROOT%\Bin\vc12win32-cmake -DPX_OUTPUT_LIB_DIR=%PX_OUTPUT_ROOT%\Lib\vc12win32-cmake -DPX_OUTPUT_EXE_DIR=%PX_OUTPUT_ROOT%\Bin\vc12win32-cmake popd rmdir /s /q compiler\vc12win64-cmake\ mkdir compiler\vc12win64-cmake\ pushd compiler\vc12win64-cmake\ -cmake ..\cmake\windows -G "Visual Studio 12 2013" -Ax64 -DTARGET_BUILD_PLATFORM=Windows -DPX_GENERATE_GPU_PROJECTS=1 -DCUDA_TOOLKIT_ROOT_DIR="%CUDA_PATH_%" -DSTATIC_WINCRT=1 -DPX_OUTPUT_DLL_DIR=%PX_OUTPUT_ROOT%\Bin\vc12win64-cmake -DPX_OUTPUT_LIB_DIR=%PX_OUTPUT_ROOT%\Lib\vc12win64-cmake -DPX_OUTPUT_EXE_DIR=%PX_OUTPUT_ROOT%\Bin\vc12win64-cmake +cmake ..\cmake\windows -G "Visual Studio 12 2013" -Ax64 -DTARGET_BUILD_PLATFORM=windows -DPX_GENERATE_GPU_PROJECTS=1 -DCUDA_TOOLKIT_ROOT_DIR="%CUDA_PATH_%" -DSTATIC_WINCRT=1 -DPX_OUTPUT_DLL_DIR=%PX_OUTPUT_ROOT%\Bin\vc12win64-cmake -DPX_OUTPUT_LIB_DIR=%PX_OUTPUT_ROOT%\Lib\vc12win64-cmake -DPX_OUTPUT_EXE_DIR=%PX_OUTPUT_ROOT%\Bin\vc12win64-cmake popd rmdir /s /q compiler\vc14win32-cmake\ mkdir compiler\vc14win32-cmake\ pushd compiler\vc14win32-cmake\ -cmake ..\cmake\windows -G "Visual Studio 14 2015" -AWin32 -DTARGET_BUILD_PLATFORM=Windows -DPX_GENERATE_GPU_PROJECTS=1 -DCUDA_TOOLKIT_ROOT_DIR="%CUDA_PATH_%" -DSTATIC_WINCRT=1 -DPX_OUTPUT_DLL_DIR=%PX_OUTPUT_ROOT%\Bin\vc14win32-cmake -DPX_OUTPUT_LIB_DIR=%PX_OUTPUT_ROOT%\Lib\vc14win32-cmake -DPX_OUTPUT_EXE_DIR=%PX_OUTPUT_ROOT%\Bin\vc14win32-cmake +cmake ..\cmake\windows -G "Visual Studio 14 2015" -AWin32 -DTARGET_BUILD_PLATFORM=windows -DPX_GENERATE_GPU_PROJECTS=1 -DCUDA_TOOLKIT_ROOT_DIR="%CUDA_PATH_%" -DSTATIC_WINCRT=1 -DPX_OUTPUT_DLL_DIR=%PX_OUTPUT_ROOT%\Bin\vc14win32-cmake -DPX_OUTPUT_LIB_DIR=%PX_OUTPUT_ROOT%\Lib\vc14win32-cmake -DPX_OUTPUT_EXE_DIR=%PX_OUTPUT_ROOT%\Bin\vc14win32-cmake popd rmdir /s /q compiler\vc14win64-cmake\ mkdir compiler\vc14win64-cmake\ pushd compiler\vc14win64-cmake\ -cmake ..\cmake\windows -G "Visual Studio 14 2015" -Ax64 -DTARGET_BUILD_PLATFORM=Windows -DPX_GENERATE_GPU_PROJECTS=1 -DCUDA_TOOLKIT_ROOT_DIR="%CUDA_PATH_%" -DSTATIC_WINCRT=1 -DPX_OUTPUT_DLL_DIR=%PX_OUTPUT_ROOT%\Bin\vc14win64-cmake -DPX_OUTPUT_LIB_DIR=%PX_OUTPUT_ROOT%\Lib\vc14win64-cmake -DPX_OUTPUT_EXE_DIR=%PX_OUTPUT_ROOT%\Bin\vc14win64-cmake +cmake ..\cmake\windows -G "Visual Studio 14 2015" -Ax64 -DTARGET_BUILD_PLATFORM=windows -DPX_GENERATE_GPU_PROJECTS=1 -DCUDA_TOOLKIT_ROOT_DIR="%CUDA_PATH_%" -DSTATIC_WINCRT=1 -DPX_OUTPUT_DLL_DIR=%PX_OUTPUT_ROOT%\Bin\vc14win64-cmake -DPX_OUTPUT_LIB_DIR=%PX_OUTPUT_ROOT%\Lib\vc14win64-cmake -DPX_OUTPUT_EXE_DIR=%PX_OUTPUT_ROOT%\Bin\vc14win64-cmake popd diff --git a/NvCloth/GenerateProjectsLinux.sh b/NvCloth/GenerateProjectsLinux.sh index e34e51f..56d1788 100644 --- a/NvCloth/GenerateProjectsLinux.sh +++ b/NvCloth/GenerateProjectsLinux.sh @@ -19,23 +19,23 @@ export PX_OUTPUT_ROOT="$PWD" rm -r -f compiler/linux64-debug-cmake/ mkdir compiler/linux64-debug-cmake/ cd compiler/linux64-debug-cmake/ -cmake ../cmake/Linux -G "Unix Makefiles" -DTARGET_BUILD_PLATFORM=Linux -DCMAKE_BUILD_TYPE=debug -DPX_GENERATE_GPU_PROJECTS=0 -DPX_OUTPUT_DLL_DIR=$PX_OUTPUT_ROOT/bin/linux64-cmake -DPX_OUTPUT_LIB_DIR=$PX_OUTPUT_ROOT/lib/linux64-cmake -DPX_OUTPUT_EXE_DIR=$PX_OUTPUT_ROOT/bin/linux64-cmake +cmake ../cmake/Linux -G "Unix Makefiles" -DTARGET_BUILD_PLATFORM=linux -DCMAKE_BUILD_TYPE=debug -DPX_GENERATE_GPU_PROJECTS=0 -DPX_OUTPUT_DLL_DIR=$PX_OUTPUT_ROOT/bin/linux64-cmake -DPX_OUTPUT_LIB_DIR=$PX_OUTPUT_ROOT/lib/linux64-cmake -DPX_OUTPUT_EXE_DIR=$PX_OUTPUT_ROOT/bin/linux64-cmake cd ../../ rm -r -f compiler/linux64-checked-cmake/ mkdir compiler/linux64-checked-cmake/ cd compiler/linux64-checked-cmake/ -cmake ../cmake/Linux -G "Unix Makefiles" -DTARGET_BUILD_PLATFORM=Linux -DCMAKE_BUILD_TYPE=checked -DPX_GENERATE_GPU_PROJECTS=0 -DPX_OUTPUT_DLL_DIR=$PX_OUTPUT_ROOT/bin/linux64-cmake -DPX_OUTPUT_LIB_DIR=$PX_OUTPUT_ROOT/lib/linux64-cmake -DPX_OUTPUT_EXE_DIR=$PX_OUTPUT_ROOT/bin/linux64-cmake +cmake ../cmake/Linux -G "Unix Makefiles" -DTARGET_BUILD_PLATFORM=linux -DCMAKE_BUILD_TYPE=checked -DPX_GENERATE_GPU_PROJECTS=0 -DPX_OUTPUT_DLL_DIR=$PX_OUTPUT_ROOT/bin/linux64-cmake -DPX_OUTPUT_LIB_DIR=$PX_OUTPUT_ROOT/lib/linux64-cmake -DPX_OUTPUT_EXE_DIR=$PX_OUTPUT_ROOT/bin/linux64-cmake cd ../../ rm -r -f compiler/linux64-profile-cmake/ mkdir compiler/linux64-profile-cmake/ cd compiler/linux64-profile-cmake/ -cmake ../cmake/Linux -G "Unix Makefiles" -DTARGET_BUILD_PLATFORM=Linux -DCMAKE_BUILD_TYPE=profile -DPX_GENERATE_GPU_PROJECTS=0 -DPX_OUTPUT_DLL_DIR=$PX_OUTPUT_ROOT/bin/linux64-cmake -DPX_OUTPUT_LIB_DIR=$PX_OUTPUT_ROOT/lib/linux64-cmake -DPX_OUTPUT_EXE_DIR=$PX_OUTPUT_ROOT/bin/linux64-cmake +cmake ../cmake/Linux -G "Unix Makefiles" -DTARGET_BUILD_PLATFORM=linux -DCMAKE_BUILD_TYPE=profile -DPX_GENERATE_GPU_PROJECTS=0 -DPX_OUTPUT_DLL_DIR=$PX_OUTPUT_ROOT/bin/linux64-cmake -DPX_OUTPUT_LIB_DIR=$PX_OUTPUT_ROOT/lib/linux64-cmake -DPX_OUTPUT_EXE_DIR=$PX_OUTPUT_ROOT/bin/linux64-cmake cd ../../ rm -r -f compiler/linux64-release-cmake/ mkdir compiler/linux64-release-cmake/ cd compiler/linux64-release-cmake/ -cmake ../cmake/Linux -G "Unix Makefiles" -DTARGET_BUILD_PLATFORM=Linux -DCMAKE_BUILD_TYPE=release -DPX_GENERATE_GPU_PROJECTS=0 -DPX_OUTPUT_DLL_DIR=$PX_OUTPUT_ROOT/bin/linux64-cmake -DPX_OUTPUT_LIB_DIR=$PX_OUTPUT_ROOT/lib/linux64-cmake -DPX_OUTPUT_EXE_DIR=$PX_OUTPUT_ROOT/bin/linux64-cmake +cmake ../cmake/Linux -G "Unix Makefiles" -DTARGET_BUILD_PLATFORM=linux -DCMAKE_BUILD_TYPE=release -DPX_GENERATE_GPU_PROJECTS=0 -DPX_OUTPUT_DLL_DIR=$PX_OUTPUT_ROOT/bin/linux64-cmake -DPX_OUTPUT_LIB_DIR=$PX_OUTPUT_ROOT/lib/linux64-cmake -DPX_OUTPUT_EXE_DIR=$PX_OUTPUT_ROOT/bin/linux64-cmake cd ../../
\ No newline at end of file diff --git a/NvCloth/GenerateProjectsOsx.sh b/NvCloth/GenerateProjectsOsx.sh index 5f0e3c7..5c57f42 100644 --- a/NvCloth/GenerateProjectsOsx.sh +++ b/NvCloth/GenerateProjectsOsx.sh @@ -19,13 +19,13 @@ export PX_OUTPUT_ROOT="$PWD" rm -r -f compiler/osx32-cmake/ mkdir compiler/osx32-cmake/ cd compiler/osx32-cmake/ -cmake ../cmake/Mac -G Xcode -DTARGET_BUILD_PLATFORM=Mac -DPX_32BIT=1 -DPX_OUTPUT_LIB_DIR=$PX_OUTPUT_ROOT/Lib/osx32-cmake +cmake ../cmake/Mac -G Xcode -DTARGET_BUILD_PLATFORM=mac -DPX_32BIT=1 -DPX_OUTPUT_LIB_DIR=$PX_OUTPUT_ROOT/Lib/osx32-cmake cd ../../ rm -r -f compiler/osx64-cmake/ mkdir compiler/osx64-cmake/ cd compiler/osx64-cmake/ -cmake ../cmake/Mac -G Xcode -DTARGET_BUILD_PLATFORM=Mac -DPX_OUTPUT_LIB_DIR=$PX_OUTPUT_ROOT/Lib/osx64-cmake +cmake ../cmake/Mac -G Xcode -DTARGET_BUILD_PLATFORM=mac -DPX_OUTPUT_LIB_DIR=$PX_OUTPUT_ROOT/Lib/osx64-cmake cd ../../ diff --git a/NvCloth/ReleaseNotes.txt b/NvCloth/ReleaseNotes.txt index 5751a25..7dfa9c0 100644 --- a/NvCloth/ReleaseNotes.txt +++ b/NvCloth/ReleaseNotes.txt @@ -1,9 +1,9 @@ NvCloth Release notes. -[1.1.0] +[1.1.1] Supported platforms: -* Windows (CPU, CUDA, DX11, RenderCloth) -* Mac (beta, tested on Sierra, RenderCloth) +* Windows (CPU, CUDA, DX11) +* Mac (beta, tested on Sierra) * Linux (beta, tested on Ubuntu 16.04.1 LTS x64) * PlayStation 4 (beta) with PS4 SDK 4.5 * Xbox one (CPU, DX11, beta) @@ -17,6 +17,15 @@ Supported compilers (via cmake 3.7) * Xbox one: Visual studio 14 (2015) Fixed: +Air drag/lift did react to flow speed only linearly, model was changed to be more physically correct. +Fixed issue where scaling units would result in different air drag/lift behavior (by adding fluid density parameter). +Fixed "unused typedef `__t100`" warning in xcode. + +Known issues: +DirectX 11 solver has issues with ccd sphere/capsule collision. + +[1.1.0] +Fixed: DirectX 11 solver now supports triangle collision shapes and air drag/lift. Fixed bug where changing friction on cloth already in simulation would cause assert or crash. Optimization flag now working on PS4 release build. diff --git a/NvCloth/compiler/cmake/Linux/CMakeLists.txt b/NvCloth/compiler/cmake/Linux/CMakeLists.txt index 4d8d85a..9425dde 100644 --- a/NvCloth/compiler/cmake/Linux/CMakeLists.txt +++ b/NvCloth/compiler/cmake/Linux/CMakeLists.txt @@ -4,10 +4,10 @@ include(../common/CMakeLists.txt) IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to Linux - SET(TARGET_BUILD_PLATFORM "Linux") + SET(TARGET_BUILD_PLATFORM "linux") ENDIF() -SET(PLATFORM_LIST Linux) +SET(PLATFORM_LIST linux) IF (NOT ${TARGET_BUILD_PLATFORM} IN_LIST PLATFORM_LIST) MESSAGE(FATAL_ERROR "Invalid platform:" ${TARGET_BUILD_PLATFORM}) @@ -47,7 +47,7 @@ FIND_PACKAGE(PxShared REQUIRED) # such as - we don't want to be installing any built artifacts on this server, etc. So for now we hack it. # Add PxShared as a dependency so that we can use project references -ADD_SUBDIRECTORY(${PXSHARED_ROOT_DIR}/src/compiler/cmake/Linux "${CMAKE_CURRENT_BINARY_DIR}/pxshared_bin") +ADD_SUBDIRECTORY(${PXSHARED_ROOT_DIR}/src/compiler/cmake/linux "${CMAKE_CURRENT_BINARY_DIR}/pxshared_bin") # Include all of the projects INCLUDE(NvCloth.cmake) diff --git a/NvCloth/compiler/cmake/Mac/CMakeLists.txt b/NvCloth/compiler/cmake/Mac/CMakeLists.txt index 05a42ff..4e1c124 100644 --- a/NvCloth/compiler/cmake/Mac/CMakeLists.txt +++ b/NvCloth/compiler/cmake/Mac/CMakeLists.txt @@ -4,10 +4,10 @@ include(../common/CMakeLists.txt) IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to Mac - SET(TARGET_BUILD_PLATFORM "Mac") + SET(TARGET_BUILD_PLATFORM "mac") ENDIF() -SET(PLATFORM_LIST Mac) +SET(PLATFORM_LIST mac) IF (NOT ${TARGET_BUILD_PLATFORM} IN_LIST PLATFORM_LIST) MESSAGE(FATAL_ERROR "Invalid platform:" ${TARGET_BUILD_PLATFORM}) diff --git a/NvCloth/compiler/cmake/windows/CMakeLists.txt b/NvCloth/compiler/cmake/windows/CMakeLists.txt index 79b0236..81d8289 100644 --- a/NvCloth/compiler/cmake/windows/CMakeLists.txt +++ b/NvCloth/compiler/cmake/windows/CMakeLists.txt @@ -4,10 +4,10 @@ include(../common/CMakeLists.txt) IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to Windows - SET(TARGET_BUILD_PLATFORM "Windows") + SET(TARGET_BUILD_PLATFORM "windows") ENDIF() -SET(PLATFORM_LIST Windows) +SET(PLATFORM_LIST windows) IF (NOT ${TARGET_BUILD_PLATFORM} IN_LIST PLATFORM_LIST) MESSAGE(FATAL_ERROR "Invalid platform:" ${TARGET_BUILD_PLATFORM}) diff --git a/NvCloth/extensions/src/ClothFabricCooker.cpp b/NvCloth/extensions/src/ClothFabricCooker.cpp index f0e4dea..2d800e9 100644 --- a/NvCloth/extensions/src/ClothFabricCooker.cpp +++ b/NvCloth/extensions/src/ClothFabricCooker.cpp @@ -97,7 +97,9 @@ nv::cloth::Range<const T> CreateRange(typename nv::cloth::Vector<T>::Type const& template<typename T, typename U> nv::cloth::Range<const T> CreateRangeF(typename nv::cloth::Vector<U>::Type const& vector, int offset = 0) { +#ifndef _LIBCPP_HAS_NO_STATIC_ASSERT static_assert(sizeof(T) == sizeof(U), "Type T and U need to be of the same size"); +#endif const T* begin = reinterpret_cast<const T*>(vector.begin()+offset); const T* end = reinterpret_cast<const T*>(vector.end()); diff --git a/NvCloth/include/NvCloth/Cloth.h b/NvCloth/include/NvCloth/Cloth.h index 3d783e9..ac957f7 100644 --- a/NvCloth/include/NvCloth/Cloth.h +++ b/NvCloth/include/NvCloth/Cloth.h @@ -376,6 +376,10 @@ class Cloth : public UserAllocated virtual void setLiftCoefficient(float) = 0; ///Returns value set with setLiftCoefficient(). virtual float getLiftCoefficient() const = 0; + /** /brief Sets the fluid density used for air drag/lift calculations. */ + virtual void setFluidDensity(float) = 0; + ///Returns value set with setFluidDensity(). + virtual float getFluidDensity() const = 0; /* self collision */ diff --git a/NvCloth/samples/CmakeGenerateProjects.bat b/NvCloth/samples/CmakeGenerateProjects.bat index d63cde8..8c6d88c 100644 --- a/NvCloth/samples/CmakeGenerateProjects.bat +++ b/NvCloth/samples/CmakeGenerateProjects.bat @@ -14,6 +14,9 @@ SET PATH=%PATH%;"%CMAKE_PATH_F%" REM Make sure the various variables that we need are set +call "../scripts/locate_cuda.bat" CUDA_PATH_ +echo CUDA_PATH_ = %CUDA_PATH_% + IF EXIST %~dp0..\Externals\CMakeModules ( set GW_DEPS_ROOT=%~dp0..\ ) @@ -30,13 +33,13 @@ REM Generate projects here rmdir /s /q compiler\vc14win32-cmake\ mkdir compiler\vc14win32-cmake\ pushd compiler\vc14win32-cmake\ -cmake ..\.. -G "Visual Studio 14 2015" -AWin32 -DTARGET_BUILD_PLATFORM=Windows -DSTATIC_WINCRT=0 -DBL_DLL_OUTPUT_DIR=%OUTPUT_ROOT%\bin\vc14win32-cmake -DBL_LIB_OUTPUT_DIR=%OUTPUT_ROOT%\lib\vc14win32-cmake -DBL_EXE_OUTPUT_DIR=%OUTPUT_ROOT%\bin\vc14win32-cmake +cmake ..\.. -G "Visual Studio 14 2015" -AWin32 -DCUDA_TOOLKIT_ROOT_DIR="%CUDA_PATH_%" -DTARGET_BUILD_PLATFORM=windows -DSTATIC_WINCRT=0 -DBL_DLL_OUTPUT_DIR=%OUTPUT_ROOT%\bin\vc14win32-cmake -DBL_LIB_OUTPUT_DIR=%OUTPUT_ROOT%\lib\vc14win32-cmake -DBL_EXE_OUTPUT_DIR=%OUTPUT_ROOT%\bin\vc14win32-cmake popd rmdir /s /q compiler\vc14win64-cmake\ mkdir compiler\vc14win64-cmake\ pushd compiler\vc14win64-cmake\ -cmake ..\.. -G "Visual Studio 14 2015" -Ax64 -DTARGET_BUILD_PLATFORM=Windows -DSTATIC_WINCRT=0 -DBL_DLL_OUTPUT_DIR=%OUTPUT_ROOT%\bin\vc14win64-cmake -DBL_LIB_OUTPUT_DIR=%OUTPUT_ROOT%\lib\vc14win64-cmake -DBL_EXE_OUTPUT_DIR=%OUTPUT_ROOT%\bin\vc14win64-cmake +cmake ..\.. -G "Visual Studio 14 2015" -Ax64 -DCUDA_TOOLKIT_ROOT_DIR="%CUDA_PATH_%" -DTARGET_BUILD_PLATFORM=windows -DSTATIC_WINCRT=0 -DBL_DLL_OUTPUT_DIR=%OUTPUT_ROOT%\bin\vc14win64-cmake -DBL_LIB_OUTPUT_DIR=%OUTPUT_ROOT%\lib\vc14win64-cmake -DBL_EXE_OUTPUT_DIR=%OUTPUT_ROOT%\bin\vc14win64-cmake popd diff --git a/NvCloth/samples/SampleBase/Main.cpp b/NvCloth/samples/SampleBase/Main.cpp index 433ed88..f4107c3 100644 --- a/NvCloth/samples/SampleBase/Main.cpp +++ b/NvCloth/samples/SampleBase/Main.cpp @@ -12,7 +12,9 @@ #include <sstream> #include <Windows.h> - +#include <iostream> +#include <io.h> +#include <fcntl.h> using namespace std; @@ -22,6 +24,8 @@ int WINAPI wWinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPWSTR lpCmdLi #if defined(DEBUG) | defined(_DEBUG) _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF); AllocConsole(); + FILE* fp; + freopen_s(&fp, "CONOUT$", "w", stdout); #endif SampleConfig config; diff --git a/NvCloth/samples/SampleBase/scene/scenes/ScaledScene.cpp b/NvCloth/samples/SampleBase/scene/scenes/ScaledScene.cpp new file mode 100644 index 0000000..f2a5b82 --- /dev/null +++ b/NvCloth/samples/SampleBase/scene/scenes/ScaledScene.cpp @@ -0,0 +1,98 @@ +/* +* Copyright (c) 2008-2017, NVIDIA CORPORATION. All rights reserved. +* +* NVIDIA CORPORATION and its licensors retain all intellectual property +* and proprietary rights in and to this software, related documentation +* and any modifications thereto. Any use, reproduction, disclosure or +* distribution of this software and related documentation without an express +* license agreement from NVIDIA CORPORATION is strictly prohibited. +*/ + +#include "ScaledScene.h" +#include "Scene/SceneController.h" +#include <NvClothExt/ClothFabricCooker.h> +#include "ClothMeshGenerator.h" +#include <NvCloth/Fabric.h> +#include <NvCloth/Solver.h> +#include <NvCloth/Cloth.h> +#include <NvCloth/Factory.h> +#include "Renderer.h" +#include "renderer/RenderUtils.h" + +DECLARE_SCENE_NAME(ScaledScene,"Scaled Scene") + +void ScaledScene::onInitialize() +{ + /////////////////////////////////////////////////////////////////////// + ClothMeshData clothMesh; + + physx::PxMat44 transform = PxTransform(PxVec3(-2.f, 13.f, 0.f)*0.0, PxQuat(PxPi / 6.f, PxVec3(1.f, 0.f, 0.f))); + clothMesh.GeneratePlaneCloth(600.f, 700.f, 49, 59, false, transform); + clothMesh.AttachClothPlaneByAngles(49, 59); + + mClothActor = new ClothActor; + nv::cloth::ClothMeshDesc meshDesc = clothMesh.GetClothMeshDesc(); + { + mClothActor->mClothRenderMesh = new ClothRenderMesh(meshDesc); + mClothActor->mClothRenderable = getSceneController()->getRenderer().createRenderable(*(static_cast<IRenderMesh*>(mClothActor->mClothRenderMesh)), *getSceneController()->getDefaultMaterial()); + mClothActor->mClothRenderable->setColor(getRandomPastelColor()); + mClothActor->mClothRenderable->setScale(physx::PxVec3(0.01, 0.01, 0.01)); + mClothActor->mClothRenderable->setTransform(PxTransform(PxVec3(-2.f, 13.f, 0.f),physx::PxQuat(physx::PxIdentity))); + } + + nv::cloth::Vector<int32_t>::Type phaseTypeInfo; + mFabric = NvClothCookFabricFromMesh(getSceneController()->getFactory(), meshDesc, physx::PxVec3(0.0f, -9.8f, 0.0f), &phaseTypeInfo, false); + trackFabric(mFabric); + + // Initialize start positions and masses for the actual cloth instance + // (note: the particle/vertex positions do not have to match the mesh description here. Set the positions to the initial shape of this cloth instance) + std::vector<physx::PxVec4> particlesCopy; + particlesCopy.resize(clothMesh.mVertices.size()); + + physx::PxVec3 center = transform.transform(physx::PxVec3(0.0f, 0.0f, 0.0f)); + for (int i = 0; i < (int)clothMesh.mVertices.size(); i++) + { + // To put attachment point closer to each other + if(clothMesh.mInvMasses[i] < 1e-6) + clothMesh.mVertices[i] = (clothMesh.mVertices[i] - center) * 0.85f + center; + + particlesCopy[i] = physx::PxVec4(clothMesh.mVertices[i], clothMesh.mInvMasses[i]); // w component is 1/mass, or 0.0f for anchored/fixed particles + } + + // Create the cloth from the initial positions/masses and the fabric + mClothActor->mCloth = getSceneController()->getFactory()->createCloth(nv::cloth::Range<physx::PxVec4>(&particlesCopy[0], &particlesCopy[0] + particlesCopy.size()), *mFabric); + particlesCopy.clear(); particlesCopy.shrink_to_fit(); + + mClothActor->mCloth->setGravity(physx::PxVec3(0.0f, -980.0f, 0.0f)); + + // Setup phase configs + std::vector<nv::cloth::PhaseConfig> phases(mFabric->getNumPhases()); + for (int i = 0; i < (int)phases.size(); i++) + { + phases[i].mPhaseIndex = i; + phases[i].mStiffness = 1.0f; + phases[i].mStiffnessMultiplier = 1.0f; + phases[i].mCompressionLimit = 1.0f; + phases[i].mStretchLimit = 1.0f; + } + mClothActor->mCloth->setPhaseConfig(nv::cloth::Range<nv::cloth::PhaseConfig>(&phases.front(), &phases.back())); + mClothActor->mCloth->setDragCoefficient(0.1f); + mClothActor->mCloth->setLiftCoefficient(0.1f); + //mClothActor->mCloth->setWindVelocity(physx::PxVec3(50, 0.0, 50.0)); + mClothActor->mCloth->setFluidDensity(1.0f / powf(100, 3)); + + mSolver = getSceneController()->getFactory()->createSolver(); + trackSolver(mSolver); + trackClothActor(mClothActor); + + // Add the cloth to the solver for simulation + addClothToSolver(mClothActor, mSolver); + + { + IRenderMesh* mesh = getSceneController()->getRenderer().getPrimitiveRenderMesh(PrimitiveRenderMeshType::Plane); + Renderable* plane = getSceneController()->getRenderer().createRenderable(*mesh, *getSceneController()->getDefaultPlaneMaterial()); + plane->setTransform(PxTransform(PxVec3(0.f, 0.f, 0.f), PxQuat(PxPiDivTwo, PxVec3(0.f, 0.f, 1.f)))); + plane->setScale(PxVec3(1000.f)); + trackRenderable(plane); + } +} diff --git a/NvCloth/samples/SampleBase/scene/scenes/ScaledScene.h b/NvCloth/samples/SampleBase/scene/scenes/ScaledScene.h new file mode 100644 index 0000000..eb47c36 --- /dev/null +++ b/NvCloth/samples/SampleBase/scene/scenes/ScaledScene.h @@ -0,0 +1,32 @@ +/* +* Copyright (c) 2008-2017, NVIDIA CORPORATION. All rights reserved. +* +* NVIDIA CORPORATION and its licensors retain all intellectual property +* and proprietary rights in and to this software, related documentation +* and any modifications thereto. Any use, reproduction, disclosure or +* distribution of this software and related documentation without an express +* license agreement from NVIDIA CORPORATION is strictly prohibited. +*/ + +#ifndef SCALED_SCENE_H +#define SCALED_SCENE_H + +#include "scene/Scene.h" + +class ScaledScene : public Scene +{ +public: + + ScaledScene(SceneController* sceneController):Scene(sceneController) {} + + virtual void onInitialize() override; + +private: + nv::cloth::Fabric* mFabric; + nv::cloth::Solver* mSolver; + ClothActor* mClothActor; + +}; + + +#endif
\ No newline at end of file diff --git a/NvCloth/samples/SampleBase/scene/scenes/WindScene.cpp b/NvCloth/samples/SampleBase/scene/scenes/WindScene.cpp index 232316d..d84fb87 100644 --- a/NvCloth/samples/SampleBase/scene/scenes/WindScene.cpp +++ b/NvCloth/samples/SampleBase/scene/scenes/WindScene.cpp @@ -35,7 +35,7 @@ void WindScene::Animate(double dt) for(int i = 0; i < 3; i++) { - physx::PxVec3 wind = physx::PxVec3(dvx, vy, dvz); + physx::PxVec3 wind = physx::PxVec3(dvx, vy, dvz)/5.0f; mClothActor[i]->mCloth->setWindVelocity(wind); } } diff --git a/NvCloth/samples/compiler/cmake/SampleBase.cmake b/NvCloth/samples/compiler/cmake/SampleBase.cmake index e3aa2cc..cee4779 100644 --- a/NvCloth/samples/compiler/cmake/SampleBase.cmake +++ b/NvCloth/samples/compiler/cmake/SampleBase.cmake @@ -110,6 +110,8 @@ SET(SCENES_FILES ${SB_SCENE_SOURCE_DIR}/scenes/CCDScene.h ${SB_SCENE_SOURCE_DIR}/scenes/MultiSolverScene.cpp ${SB_SCENE_SOURCE_DIR}/scenes/MultiSolverScene.h + ${SB_SCENE_SOURCE_DIR}/scenes/ScaledScene.cpp + ${SB_SCENE_SOURCE_DIR}/scenes/ScaledScene.h ) SET(UI_FILES diff --git a/NvCloth/src/ClothBase.h b/NvCloth/src/ClothBase.h index 8d75a72..ec1ee40 100644 --- a/NvCloth/src/ClothBase.h +++ b/NvCloth/src/ClothBase.h @@ -74,6 +74,7 @@ void initialize(Cloth& cloth, const physx::PxVec4* pIt, const physx::PxVec4* pEn cloth.mWind = physx::PxVec3(0.0f); cloth.mDragLogCoefficient = 0.0f; cloth.mLiftLogCoefficient = 0.0f; + cloth.mFluidDensity = 1.0f; cloth.mEnableContinuousCollision = false; cloth.mCollisionMassScale = 0.0f; cloth.mFriction = 0.0f; diff --git a/NvCloth/src/ClothImpl.h b/NvCloth/src/ClothImpl.h index 0c2b362..1e8d9a1 100644 --- a/NvCloth/src/ClothImpl.h +++ b/NvCloth/src/ClothImpl.h @@ -157,6 +157,8 @@ class ClothImpl : public Cloth virtual float getDragCoefficient() const; virtual void setLiftCoefficient(float); virtual float getLiftCoefficient() const; + virtual void setFluidDensity(float); + virtual float getFluidDensity() const; virtual void setSelfCollisionDistance(float); virtual float getSelfCollisionDistance() const; @@ -216,6 +218,7 @@ public: //Fields shared between all cloth classes physx::PxVec3 mWind; float mDragLogCoefficient; float mLiftLogCoefficient; + float mFluidDensity; // sleeping uint32_t mSleepTestInterval; // how often to test for movement @@ -313,7 +316,8 @@ inline physx::PxVec3 ClothImpl<T>::getGravity() const inline float safeLog2(float x) { - return x ? physx::shdfnd::log2(x) : -FLT_MAX_EXP; + NV_CLOTH_ASSERT(("safeLog2",x >= 0.0f)); + return x > 0 ? physx::shdfnd::log2(x) : -FLT_MAX_EXP; } inline physx::PxVec3 safeLog2(const physx::PxVec3& v) @@ -1214,11 +1218,30 @@ inline float ClothImpl<T>::getLiftCoefficient() const } template <typename T> +inline void ClothImpl<T>::setFluidDensity(float fluidDensity) +{ + NV_CLOTH_ASSERT(fluidDensity < 0.f); + if (fluidDensity == mFluidDensity) + return; + + mFluidDensity = fluidDensity; + getChildCloth()->notifyChanged(); + wakeUp(); +} + +template <typename T> +inline float ClothImpl<T>::getFluidDensity() const +{ + return mFluidDensity; +} + +template <typename T> inline uint32_t ClothImpl<T>::getNumSelfCollisionIndices() const { return uint32_t(getChildCloth()->mSelfCollisionIndices.size()); } + // Fixed 4505:local function has been removed template <typename T> inline void ClothImpl<T>::setRestPositions(Range<const physx::PxVec4> restPositions) @@ -1255,6 +1278,7 @@ inline float ClothImpl<T>::getSelfCollisionDistance() const template <typename T> inline void ClothImpl<T>::setSelfCollisionStiffness(float stiffness) { + NV_CLOTH_ASSERT(stiffness <= 1.0f); float value = safeLog2(1 - stiffness); if (value == getChildCloth()->mSelfCollisionLogStiffness) return; diff --git a/NvCloth/src/IterationState.h b/NvCloth/src/IterationState.h index f199663..224e87e 100644 --- a/NvCloth/src/IterationState.h +++ b/NvCloth/src/IterationState.h @@ -137,7 +137,7 @@ struct IterationState Simd4f mCurBias; // in local space Simd4f mPrevBias; // in local space - Simd4f mWind; // delta position per iteration + Simd4f mWind; // delta position per iteration (wind velocity * mIterDt) Simd4f mPrevMatrix[3]; Simd4f mCurMatrix[3]; @@ -290,7 +290,7 @@ cloth::IterationState<Simd4f> cloth::IterationStateFactory::create(MyCloth const result.mCurBias = transform(result.mRotationMatrix, curLinearInertia + bias) & maskXYZ; result.mPrevBias = transform(result.mRotationMatrix, linearInertia - curLinearInertia) & maskXYZ; - Simd4f wind = load(array(cloth.mWind)) * iterDt; + Simd4f wind = load(array(cloth.mWind)) * iterDt; // multiply with delta time here already so we don't have to do it inside the solver result.mWind = transform(result.mRotationMatrix, translation - wind) & maskXYZ; result.mIsTurning = mPrevAngularVelocity.magnitudeSquared() + cloth.mAngularVelocity.magnitudeSquared() > 0.0f; diff --git a/NvCloth/src/SwClothData.cpp b/NvCloth/src/SwClothData.cpp index eddd821..f102bde 100644 --- a/NvCloth/src/SwClothData.cpp +++ b/NvCloth/src/SwClothData.cpp @@ -80,6 +80,7 @@ cloth::SwClothData::SwClothData(SwCloth& cloth, const SwFabric& fabric) mNumTriangles = uint32_t(fabric.mTriangles.size()) / 3; mDragCoefficient = 1.0f - expf(stiffnessExponent * cloth.mDragLogCoefficient); mLiftCoefficient = 1.0f - expf(stiffnessExponent * cloth.mLiftLogCoefficient); + mFluidDensity = cloth.mFluidDensity * 0.5f; //divide by 2 to so we don't have to compensate for double area from cross product in the solver mStartMotionConstraints = cloth.mMotionConstraints.mStart.size() ? array(cloth.mMotionConstraints.mStart.front()) : 0; mTargetMotionConstraints = diff --git a/NvCloth/src/SwClothData.h b/NvCloth/src/SwClothData.h index d2387b5..78a6f99 100644 --- a/NvCloth/src/SwClothData.h +++ b/NvCloth/src/SwClothData.h @@ -92,6 +92,7 @@ struct SwClothData uint32_t mNumTriangles; float mDragCoefficient; float mLiftCoefficient; + float mFluidDensity; // motion constraint data const float* mStartMotionConstraints; diff --git a/NvCloth/src/SwSolverKernel.cpp b/NvCloth/src/SwSolverKernel.cpp index 52dfdaa..dec46d7 100644 --- a/NvCloth/src/SwSolverKernel.cpp +++ b/NvCloth/src/SwSolverKernel.cpp @@ -384,9 +384,13 @@ T4f calculateMaxDelta(const T4f* prevIt, const T4f* curIt, const T4f* curEnd) template <bool IsTurning, typename T4f> void applyWind(T4f* __restrict curIt, const T4f* __restrict prevIt, const uint16_t* __restrict tIt, - const uint16_t* __restrict tEnd, T4f dragCoefficient, T4f liftCoefficient, T4f wind, + const uint16_t* __restrict tEnd, float itrDtf, float dragCoefficientf, float liftCoefficientf, float fluidDensityf, T4f wind, const T4f (&rotation)[3]) { + const T4f dragCoefficient = simd4f(dragCoefficientf); + const T4f liftCoefficient = simd4f(liftCoefficientf); + const T4f fluidDensity = simd4f(fluidDensityf); + const T4f itrDt = simd4f(itrDtf); const T4f oneThird = simd4f(1.0f / 3.0f); for (; tIt < tEnd; tIt += 3) @@ -410,7 +414,7 @@ void applyWind(T4f* __restrict curIt, const T4f* __restrict prevIt, const uint16 T4f previous = oneThird * (p0 + p1 + p2); //offset of the triangle center, including wind - T4f delta = current - previous + wind; + T4f delta = current - previous + wind; //wind is also already multiplied by dt in the iteration state so everything it in the same units if (IsTurning) { @@ -423,23 +427,25 @@ void applyWind(T4f* __restrict curIt, const T4f* __restrict prevIt, const uint16 T4f normal = cross3(c2 - c0, c1 - c0); T4f doubleArea = sqrt(dot3(normal, normal)); + normal = normal / doubleArea; T4f invSqrScale = dot3(delta, delta); T4f isZero = invSqrScale < gSimd4fEpsilon; T4f scale = rsqrt(invSqrScale); + T4f deltaLength = sqrt(invSqrScale); //scale 'normalizes' delta, doubleArea normalized normal - T4f cosTheta = dot3(normal, delta) * scale / doubleArea; + T4f cosTheta = dot3(normal, delta) * scale; T4f sinTheta = sqrt(max(gSimd4fZero, gSimd4fOne - cosTheta * cosTheta)); // orthogonal to delta, in delta-normal plane, same length as delta T4f liftDir = cross3(cross3(delta, normal), delta * scale); // sin(theta) * cos(theta) = 0.5 * sin(2 * theta) - T4f lift = liftCoefficient * cosTheta * sinTheta * liftDir; - T4f drag = dragCoefficient * abs(cosTheta) * doubleArea * delta; //dragCoefficient should compensate for double area + T4f lift = liftCoefficient * cosTheta * sinTheta * liftDir * deltaLength / itrDt; + T4f drag = dragCoefficient * abs(cosTheta) * delta * deltaLength / itrDt; - T4f impulse = (lift + drag) & ~isZero; + T4f impulse = (drag + lift) * fluidDensity * doubleArea & ~isZero; //fluidDensity compensates for double area curIt[i0] = c0 - impulse * splat<3>(c0); curIt[i1] = c1 - impulse * splat<3>(c1); @@ -668,17 +674,14 @@ void cloth::SwSolverKernel<T4f>::applyWind() const uint16_t* tIt = mClothData.mTriangles; const uint16_t* tEnd = tIt + 3 * mClothData.mNumTriangles; - T4f dragCoefficient = simd4f(mClothData.mDragCoefficient); - T4f liftCoefficient = simd4f(mClothData.mLiftCoefficient); - if (mState.mIsTurning) { - ::applyWind<true>(curIt, prevIt, tIt, tEnd, dragCoefficient, liftCoefficient, mState.mWind, + ::applyWind<true>(curIt, prevIt, tIt, tEnd, mState.mIterDt, mClothData.mDragCoefficient, mClothData.mLiftCoefficient, mClothData.mFluidDensity, mState.mWind, mState.mRotationMatrix); } else { - ::applyWind<false>(curIt, prevIt, tIt, tEnd, dragCoefficient, liftCoefficient, mState.mWind, + ::applyWind<false>(curIt, prevIt, tIt, tEnd, mState.mIterDt, mClothData.mDragCoefficient, mClothData.mLiftCoefficient, mClothData.mFluidDensity, mState.mWind, mState.mRotationMatrix); } } diff --git a/NvCloth/src/cuda/CuClothData.cpp b/NvCloth/src/cuda/CuClothData.cpp index decfd2c..927997c 100644 --- a/NvCloth/src/cuda/CuClothData.cpp +++ b/NvCloth/src/cuda/CuClothData.cpp @@ -124,6 +124,7 @@ cloth::CuFrameData::CuFrameData(CuCloth& cloth, uint32_t numSharedPositions, con stiffness = gSimd4fOne - exp2(logStiffness * stiffnessExponent); mDragCoefficient = array(stiffness)[0]; mLiftCoefficient = array(stiffness)[1]; + mFluidDensity = cloth.mFluidDensity * 0.5f; //divide by 2 to so we don't have to compensate for double area from cross product in the solver for (int i = 0; i < 9; ++i) mRotation[i] = array(state.mRotationMatrix[i / 3])[i % 3]; diff --git a/NvCloth/src/cuda/CuClothData.h b/NvCloth/src/cuda/CuClothData.h index 0e4cda0..dd836fd 100644 --- a/NvCloth/src/cuda/CuClothData.h +++ b/NvCloth/src/cuda/CuClothData.h @@ -135,6 +135,7 @@ struct CuFrameData // wind data float mDragCoefficient; float mLiftCoefficient; + float mFluidDensity; float mRotation[9]; // motion constraint data diff --git a/NvCloth/src/cuda/CuSolverKernel.cu b/NvCloth/src/cuda/CuSolverKernel.cu index 3517193..edb66dc 100644 --- a/NvCloth/src/cuda/CuSolverKernel.cu +++ b/NvCloth/src/cuda/CuSolverKernel.cu @@ -867,6 +867,8 @@ __device__ void applyWind(CurrentT& current, PreviousT& previous) { const float dragCoefficient = gFrameData.mDragCoefficient; const float liftCoefficient = gFrameData.mLiftCoefficient; + const float fluidDensity = gFrameData.mFluidDensity; + const float itrDt = gFrameData.mIterDt; if (dragCoefficient == 0.0f && liftCoefficient == 0.0f) return; @@ -912,20 +914,22 @@ __device__ void applyWind(CurrentT& current, PreviousT& previous) float3 normal = cross3(c2 - c0, c1 - c0); - float doubleArea = sqrtf(dot3(normal, normal)); + const float doubleArea = sqrtf(dot3(normal, normal)); + normal = (1.0f / doubleArea) * normal; float invSqrScale = dot3(delta, delta); float scale = rsqrtf(invSqrScale); + float deltaLength = sqrtf(invSqrScale); - float cosTheta = dot3(normal, delta) * scale / doubleArea; + float cosTheta = dot3(normal, delta) * scale; float sinTheta = sqrtf(max(0.0f, 1.0f - cosTheta * cosTheta)); float3 liftDir = cross3(cross3(delta, normal), scale * delta); - float3 lift = liftCoefficient * cosTheta * sinTheta * liftDir; - float3 drag = dragCoefficient * abs(cosTheta) * doubleArea * delta; + float3 lift = liftCoefficient * cosTheta * sinTheta * ((deltaLength / itrDt) * liftDir); + float3 drag = dragCoefficient * abs(cosTheta) * ((deltaLength / itrDt) * delta); - float3 impulse = invSqrScale < FLT_EPSILON ? make_float3(0.0f, 0.0f, 0.0f) : lift + drag; + float3 impulse = invSqrScale < FLT_EPSILON ? make_float3(0.0f, 0.0f, 0.0f) : fluidDensity * doubleArea * (lift + drag); applyImpulse(current(i0), impulse); applyImpulse(current(i1), impulse); diff --git a/NvCloth/src/dx/DxClothData.cpp b/NvCloth/src/dx/DxClothData.cpp index 075dc81..57049bf 100644 --- a/NvCloth/src/dx/DxClothData.cpp +++ b/NvCloth/src/dx/DxClothData.cpp @@ -112,6 +112,7 @@ cloth::DxFrameData::DxFrameData(DxCloth& cloth, uint32_t numSharedPositions, con Simd4f stiffness = gSimd4fOne - exp2(logStiffness * stiffnessExponent); mDragCoefficient = array(stiffness)[0]; mLiftCoefficient = array(stiffness)[1]; + mFluidDensity = cloth.mFluidDensity * 0.5f; //divide by 2 to so we don't have to compensate for double area from cross product in the solver for(int i = 0; i < 9; ++i) mRotation[i] = array(state.mRotationMatrix[i / 3])[i % 3]; } diff --git a/NvCloth/src/dx/DxClothData.h b/NvCloth/src/dx/DxClothData.h index af02bc6..f91d37d 100644 --- a/NvCloth/src/dx/DxClothData.h +++ b/NvCloth/src/dx/DxClothData.h @@ -46,7 +46,7 @@ typedef unsigned int uint32_t; typedef int int32_t; #endif -static const uint32_t MaxParticlesInSharedMem = 1972; +static const uint32_t MaxParticlesInSharedMem = 1971; struct DxPhaseConfig @@ -167,6 +167,7 @@ struct DxFrameData // wind data float mDragCoefficient; float mLiftCoefficient; + float mFluidDensity; float mRotation[9]; // motion constraint data diff --git a/NvCloth/src/dx/DxSolverKernel.hlsl b/NvCloth/src/dx/DxSolverKernel.hlsl index 2ca42b3..4d91f4b 100644 --- a/NvCloth/src/dx/DxSolverKernel.hlsl +++ b/NvCloth/src/dx/DxSolverKernel.hlsl @@ -202,7 +202,8 @@ void applyWind(IParticles curParticles, IParticles prevParticles, uint32_t threa { const float dragCoefficient = gFrameData.mDragCoefficient; const float liftCoefficient = gFrameData.mLiftCoefficient; - + const float fluidDensity = gFrameData.mFluidDensity; + const float itrDt = gFrameData.mIterDt; if(dragCoefficient == 0.0f && liftCoefficient == 0.0f) return; @@ -258,20 +259,22 @@ void applyWind(IParticles curParticles, IParticles prevParticles, uint32_t threa float3 normal = cross(c2.xyz - c0.xyz, c1.xyz - c0.xyz); - float doubleArea = sqrt(dot(normal, normal)); + const float doubleArea = sqrt(dot(normal, normal)); + normal = normal / doubleArea; float invSqrScale = dot(delta, delta); float scale = rsqrt(invSqrScale); + float deltaLength = sqrt(invSqrScale); - float cosTheta = dot(normal, delta) * scale / doubleArea; + float cosTheta = dot(normal, delta) * scale; float sinTheta = sqrt(max(0.0f, 1.0f - cosTheta * cosTheta)); float3 liftDir = cross(cross(delta, normal), scale * delta); - float3 lift = liftCoefficient * cosTheta * sinTheta * liftDir; - float3 drag = dragCoefficient * abs(cosTheta) * doubleArea * delta; + float3 lift = liftCoefficient * cosTheta * sinTheta * liftDir * deltaLength / itrDt; + float3 drag = dragCoefficient * abs(cosTheta) * delta * deltaLength / itrDt; - float3 impulse = invSqrScale < 1.192092896e-07F ? float3(0.0f, 0.0f, 0.0f) : lift + drag; + float3 impulse = invSqrScale < 1.192092896e-07F ? float3(0.0f, 0.0f, 0.0f) : (lift + drag) * fluidDensity * doubleArea; curParticles.atomicAdd(i0, -impulse * c0.w); curParticles.atomicAdd(i1, -impulse * c1.w); diff --git a/NvCloth/src/neon/SwCollisionHelpers.h b/NvCloth/src/neon/SwCollisionHelpers.h index 0b9410b..acd8a2a 100644 --- a/NvCloth/src/neon/SwCollisionHelpers.h +++ b/NvCloth/src/neon/SwCollisionHelpers.h @@ -33,7 +33,7 @@ #include <arm_neon.h> #endif -namespace physx +namespace nv { namespace cloth { @@ -84,4 +84,4 @@ Simd4i Gather<Simd4i>::operator()(const Simd4i* ptr) const } } // namespace cloth -} // namespace physx +} // namespace nv diff --git a/NvCloth/src/scalar/SwCollisionHelpers.h b/NvCloth/src/scalar/SwCollisionHelpers.h index 0d7321f..af21812 100644 --- a/NvCloth/src/scalar/SwCollisionHelpers.h +++ b/NvCloth/src/scalar/SwCollisionHelpers.h @@ -29,7 +29,7 @@ #pragma once -namespace physx +namespace nv { namespace cloth { @@ -89,4 +89,4 @@ Scalar4i Gather<Scalar4i>::operator()(const Scalar4i* ptr) const } } // namespace cloth -} // namespace physx +} // namespace nv diff --git a/PxShared/include/cudamanager/PxCudaContextManager.h b/PxShared/include/cudamanager/PxCudaContextManager.h index 24f4370..aca1112 100644 --- a/PxShared/include/cudamanager/PxCudaContextManager.h +++ b/PxShared/include/cudamanager/PxCudaContextManager.h @@ -23,7 +23,7 @@ // components in life support devices or systems without express written approval of // NVIDIA Corporation. // -// Copyright (c) 2008-2014 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. #ifndef PXCUDACONTEXTMANAGER_PXCUDACONTEXTMANAGER_H @@ -42,6 +42,7 @@ /* Forward decl to avoid inclusion of cuda.h */ typedef struct CUctx_st *CUcontext; typedef struct CUgraphicsResource_st *CUgraphicsResource; +typedef int CUdevice; namespace physx { @@ -58,7 +59,6 @@ struct PxCudaInteropMode enum Enum { NO_INTEROP = 0, - D3D9_INTEROP, D3D10_INTEROP, D3D11_INTEROP, OGL_INTEROP, @@ -154,6 +154,9 @@ public: * to every CUDA work submission, so we recommend that you carefully tune * this initial base memory size to closely approximate the amount of * memory your application will consume. + + Note: This is currently not used by PxSceneFlag::eENABLE_GPU_DYNAMICS. Memory allocation properties are configured + for GPU rigid bodies using PxSceneDesc::gpuDynamicsConfig. */ uint32_t memoryBaseSize[PxCudaBufferMemorySpace::COUNT]; @@ -162,11 +165,17 @@ public: * * The memory manager will dynamically grow and shrink in blocks multiple of * this page size. Size has to be power of two and bigger than 0. + + Note: This is currently not used by PxSceneFlag::eENABLE_GPU_DYNAMICS. Memory allocation properties are configured + for GPU rigid bodies using PxSceneDesc::gpuDynamicsConfig. */ uint32_t memoryPageSize[PxCudaBufferMemorySpace::COUNT]; /** * \brief Maximum size of memory that the memory manager will allocate + + Note: This is currently not used by PxSceneFlag::eENABLE_GPU_DYNAMICS. Memory allocation properties are configured + for GPU rigid bodies using PxSceneDesc::gpuDynamicsConfig. */ uint32_t maxMemorySize[PxCudaBufferMemorySpace::COUNT]; @@ -223,7 +232,7 @@ public: * harmfull to (re)acquire the context in code that is shared between * GpuTasks and non-task functions. */ - virtual CUcontext acquireContext() = 0; + virtual void acquireContext() = 0; /** * \brief Release the CUDA context from the current thread @@ -234,9 +243,16 @@ public: */ virtual void releaseContext() = 0; + /** + * \brief Return the CUcontext + */ + virtual CUcontext getContext() = 0; + /** * \brief Return the PxCudaMemoryManager instance associated with this * CUDA context + * Note: This is currently not used by PxSceneFlag::eENABLE_GPU_DYNAMICS. Memory allocation properties are configured + * for GPU rigid bodies using PxSceneDesc::gpuDynamicsConfig. */ virtual PxCudaMemoryManager *getMemoryManager() = 0; @@ -268,6 +284,7 @@ public: virtual bool supportsArchSM35() const = 0; //!< GK110 virtual bool supportsArchSM50() const = 0; //!< GM100 virtual bool supportsArchSM52() const = 0; //!< GM200 + virtual bool supportsArchSM60() const = 0; //!< GP100 virtual bool isIntegrated() const = 0; //!< true if GPU is an integrated (MCP) part virtual bool canMapHostMemory() const = 0; //!< true if GPU map host memory to GPU (0-copy) virtual int getDriverVersion() const = 0; //!< returns cached value of cuGetDriverVersion() @@ -278,6 +295,7 @@ public: virtual int getSharedMemPerMultiprocessor() const = 0; //!< returns total amount of shared memory available per multiprocessor in bytes virtual unsigned int getMaxThreadsPerBlock() const = 0; //!< returns the maximum number of threads per block virtual const char *getDeviceName() const = 0; //!< returns device name retrieved from driver + virtual CUdevice getDevice() const = 0; //!< returns device handle retrieved from driver virtual PxCudaInteropMode::Enum getInteropMode() const = 0; //!< interop mode the context was created with virtual void setUsingConcurrentStreams(bool) = 0; //!< turn on/off using concurrent streams for GPU work diff --git a/PxShared/include/foundation/PxIntrinsics.h b/PxShared/include/foundation/PxIntrinsics.h index 471f934..b4aff28 100644 --- a/PxShared/include/foundation/PxIntrinsics.h +++ b/PxShared/include/foundation/PxIntrinsics.h @@ -38,8 +38,8 @@ #include "foundation/unix/PxUnixIntrinsics.h" #elif PX_XBOXONE #include "foundation/XboxOne/PxXboxOneIntrinsics.h" -#elif PX_NX -#include "foundation/nx/PxNXIntrinsics.h" +#elif PX_SWITCH +#include "foundation/switch/PxSwitchIntrinsics.h" #else #error "Platform not supported!" #endif diff --git a/PxShared/include/foundation/PxPreprocessor.h b/PxShared/include/foundation/PxPreprocessor.h index 446ca76..9b6e0f4 100644 --- a/PxShared/include/foundation/PxPreprocessor.h +++ b/PxShared/include/foundation/PxPreprocessor.h @@ -88,7 +88,7 @@ Operating system defines, see http://sourceforge.net/p/predef/wiki/OperatingSyst #elif defined(__ORBIS__) #define PX_PS4 1 #elif defined(__NX__) -#define PX_NX 1 +#define PX_SWITCH 1 #else #error "Unknown operating system" #endif @@ -161,8 +161,8 @@ define anything not defined on this platform to 0 #ifndef PX_PS4 #define PX_PS4 0 #endif -#ifndef PX_NX -#define PX_NX 0 +#ifndef PX_SWITCH +#define PX_SWITCH 0 #endif #ifndef PX_X64 #define PX_X64 0 @@ -425,7 +425,7 @@ General defines */ // static assert -#if(defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))) || (PX_PS4) || (PX_APPLE_FAMILY) || (PX_NX) || (PX_CLANG && PX_ARM) +#if(defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))) || (PX_PS4) || (PX_APPLE_FAMILY) || (PX_SWITCH) || (PX_CLANG && PX_ARM) #define PX_COMPILE_TIME_ASSERT(exp) typedef char PxCompileTimeAssert_Dummy[(exp) ? 1 : -1] __attribute__((unused)) #else #define PX_COMPILE_TIME_ASSERT(exp) typedef char PxCompileTimeAssert_Dummy[(exp) ? 1 : -1] @@ -532,5 +532,11 @@ protected: #define PX_SUPPORT_COMPUTE_PHYSX 0 +#ifndef PX_SUPPORT_EXTERN_TEMPLATE +#define PX_SUPPORT_EXTERN_TEMPLATE ((!PX_ANDROID) && (PX_VC != 11)) +#else +#define PX_SUPPORT_EXTERN_TEMPLATE 0 +#endif + /** @} */ #endif // #ifndef PXFOUNDATION_PXPREPROCESSOR_H diff --git a/PxShared/include/foundation/nx/PxNXIntrinsics.h b/PxShared/include/foundation/nx/PxNXIntrinsics.h deleted file mode 100644 index adf3bf5..0000000 --- a/PxShared/include/foundation/nx/PxNXIntrinsics.h +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Copyright (c) 2008-2015, NVIDIA CORPORATION. All rights reserved. - * - * NVIDIA CORPORATION and its licensors retain all intellectual property - * and proprietary rights in and to this software, related documentation - * and any modifications thereto. Any use, reproduction, disclosure or - * distribution of this software and related documentation without an express - * license agreement from NVIDIA CORPORATION is strictly prohibited. - */ -// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. -// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. - - -#ifndef PX_FOUNDATION_PX_NX_INTRINSICS_H -#define PX_FOUNDATION_PX_NX_INTRINSICS_H - -#include "foundation/Px.h" -#include "foundation/PxAssert.h" - -#if !PX_NX - #error "This file should only be included by NX builds!!" -#endif - -#include <math.h> -#include <float.h> - -#include "nn/cstd/cstd_CMath.h" - -#if !PX_DOXYGEN -namespace physx -{ -namespace intrinsics -{ -#endif - - //! \brief platform-specific absolute value - PX_CUDA_CALLABLE PX_FORCE_INLINE float abs(float a) { return ::fabsf(a); } - - //! \brief platform-specific select float - PX_CUDA_CALLABLE PX_FORCE_INLINE float fsel(float a, float b, float c) { return (a >= 0.0f) ? b : c; } - - //! \brief platform-specific sign - PX_CUDA_CALLABLE PX_FORCE_INLINE float sign(float a) { return (a >= 0.0f) ? 1.0f : -1.0f; } - - //! \brief platform-specific reciprocal - PX_CUDA_CALLABLE PX_FORCE_INLINE float recip(float a) { return 1.0f/a; } - - //! \brief platform-specific reciprocal estimate - PX_CUDA_CALLABLE PX_FORCE_INLINE float recipFast(float a) { return 1.0f/a; } - - //! \brief platform-specific square root - PX_CUDA_CALLABLE PX_FORCE_INLINE float sqrt(float a) { return ::sqrtf(a); } - - //! \brief platform-specific reciprocal square root - PX_CUDA_CALLABLE PX_FORCE_INLINE float recipSqrt(float a) { return 1.0f/::sqrtf(a); } - - //! \brief platform-specific reciprocal square root estimate - PX_CUDA_CALLABLE PX_FORCE_INLINE float recipSqrtFast(float a) { return 1.0f/::sqrtf(a); } - - //! \brief platform-specific sine - PX_CUDA_CALLABLE PX_FORCE_INLINE float sin(float a) { return ::sinf(a); } - - //! \brief platform-specific cosine - PX_CUDA_CALLABLE PX_FORCE_INLINE float cos(float a) { return ::cosf(a); } - - //! \brief platform-specific minimum - PX_CUDA_CALLABLE PX_FORCE_INLINE float selectMin(float a, float b) { return a<b ? a : b; } - - //! \brief platform-specific maximum - PX_CUDA_CALLABLE PX_FORCE_INLINE float selectMax(float a, float b) { return a>b ? a : b; } - - //! \brief platform-specific finiteness check - PX_CUDA_CALLABLE PX_FORCE_INLINE bool isFinite(float a) - { -#ifdef __CUDACC__ - return isfinite(a) ? true : false; -#else - return !nn::cstd::IsNan(a) && !nn::cstd::IsInf(a); -#endif - } - - //! \brief platform-specific finiteness check - PX_CUDA_CALLABLE PX_FORCE_INLINE bool isFinite(double a) - { -#ifdef __CUDACC__ - return isfinite(a) ? true : false; -#else - return !nn::cstd::IsNan(a) && !nn::cstd::IsInf(a); -#endif - } - - /*! - Sets \c count bytes starting at \c dst to zero. - */ - PX_FORCE_INLINE void* memZero(void* PX_RESTRICT dest, uint32_t count) - { - return memset(dest, 0, count); - } - - /*! - Sets \c count bytes starting at \c dst to \c c. - */ - PX_FORCE_INLINE void* memSet(void* PX_RESTRICT dest, int32_t c, uint32_t count) - { - return memset(dest, c, count); - } - - /*! - Copies \c count bytes from \c src to \c dst. User memMove if regions overlap. - */ - PX_FORCE_INLINE void* memCopy(void* PX_RESTRICT dest, const void* PX_RESTRICT src, uint32_t count) - { - return memcpy(dest, src, count); - } - - /*! - Copies \c count bytes from \c src to \c dst. Supports overlapping regions. - */ - PX_FORCE_INLINE void* memMove(void* PX_RESTRICT dest, const void* PX_RESTRICT src, uint32_t count) - { - return memmove(dest, src, count); - } - - /*! - Set 128B to zero starting at \c dst+offset. Must be aligned. - */ - PX_FORCE_INLINE void memZero128(void* PX_RESTRICT dest, uint32_t offset = 0) - { - PX_ASSERT(((size_t(dest)+offset) & 0x7f) == 0); - memSet((char* PX_RESTRICT)dest+offset, 0, 128); - } - -#if !PX_DOXYGEN -} // namespace intrinsics -} // namespace physx -#endif - -#endif diff --git a/PxShared/include/task/PxTask.h b/PxShared/include/task/PxTask.h index 2761109..85d91da 100644 --- a/PxShared/include/task/PxTask.h +++ b/PxShared/include/task/PxTask.h @@ -45,7 +45,7 @@ namespace physx class PxBaseTask { public: - PxBaseTask() : mEventID(0xFFFF), mProfileStat(0), mTm(0) {} + PxBaseTask() : mContextID(0), mTm(NULL) {} virtual ~PxBaseTask() {} /** @@ -78,50 +78,24 @@ public: * references to it - so it may safely run its destructor, recycle itself, etc. * provided no additional user references to the task exist */ - virtual void release() = 0; - /** - * \brief Execute user run method with wrapping profiling events. - * - * Optional entry point for use by CpuDispatchers. - * - * \param[in] threadId The threadId of the thread that executed the task. - */ - PX_INLINE void runProfiled(uint32_t threadId=0) - { - mTm->emitStartEvent(*this, threadId); - run(); - mTm->emitStopEvent(*this, threadId); - } - - /** - * \brief Specify stop event statistic - * - * If called before or while the task is executing, the given value - * will appear in the task's event bar in the profile viewer - * - * \param[in] stat The stat to signal when the task is finished - */ - PX_INLINE void setProfileStat( uint16_t stat ) - { - mProfileStat = stat; - } - /** * \brief Return PxTaskManager to which this task was submitted * * Note, can return NULL if task was not submitted, or has been * completed. */ - PX_INLINE PxTaskManager* getTaskManager() const + PX_FORCE_INLINE PxTaskManager* getTaskManager() const { return mTm; } + PX_FORCE_INLINE void setContextId(PxU64 id) { mContextID = id; } + PX_FORCE_INLINE PxU64 getContextId() const { return mContextID; } + protected: - uint16_t mEventID; //!< Registered profile event ID - uint16_t mProfileStat; //!< Profiling statistic + PxU64 mContextID; //!< Context ID for profiler interface PxTaskManager* mTm; //!< Owning PxTaskManager instance friend class PxTaskMgr; @@ -212,7 +186,6 @@ public: { mStreamIndex = 0; mPreSyncRequired = false; - mProfileStat = 0; } /** diff --git a/PxShared/include/task/PxTaskManager.h b/PxShared/include/task/PxTaskManager.h index f6f29c4..f40f7b1 100644 --- a/PxShared/include/task/PxTaskManager.h +++ b/PxShared/include/task/PxTaskManager.h @@ -215,9 +215,6 @@ protected: virtual void decrReference(PxLightCpuTask&) = 0; virtual void addReference(PxLightCpuTask&) = 0; - virtual void emitStartEvent(PxBaseTask&, uint32_t threadId=0) = 0; - virtual void emitStopEvent(PxBaseTask&, uint32_t threadId=0) = 0; - /*! \endcond */ friend class PxBaseTask; diff --git a/PxShared/src/compiler/cmake/Android/CMakeLists.txt b/PxShared/src/compiler/cmake/Android/CMakeLists.txt index 0499c29..9aa00f9 100644 --- a/PxShared/src/compiler/cmake/Android/CMakeLists.txt +++ b/PxShared/src/compiler/cmake/Android/CMakeLists.txt @@ -1,34 +1,37 @@ cmake_minimum_required(VERSION 3.3) +PROJECT(PxShared ) include(../common/CMakeLists.txt) STRING(TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_LOWERCASE) -IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to Android - SET(TARGET_BUILD_PLATFORM "Android") +IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to android + SET(TARGET_BUILD_PLATFORM "android") ENDIF() -SET(PLATFORM_LIST Android) +SET(PLATFORM_LIST android) IF (NOT ${TARGET_BUILD_PLATFORM} IN_LIST PLATFORM_LIST) MESSAGE(FATAL_ERROR "Invalid platform:" ${TARGET_BUILD_PLATFORM}) ENDIF() if(${ANDROID_ABI} STREQUAL "armeabi-v7a") - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -D__STDC_LIMIT_MACROS -Wno-invalid-offsetof ") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffunction-sections -fdata-sections -funwind-tables -fomit-frame-pointer -funswitch-loops -finline-limit=300 -fno-strict-aliasing -fstack-protector -Wno-invalid-offsetof ") +elseif(${ANDROID_ABI} STREQUAL "armeabi-v7a with NEON") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -ffast-math -fno-exceptions -ffunction-sections -funwind-tables -fomit-frame-pointer -funswitch-loops -finline-limit=300 -fno-strict-aliasing -fstack-protector -Wno-invalid-offsetof ") elseif(${ANDROID_ABI} STREQUAL "arm64-v8a") - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -D__STDC_LIMIT_MACROS -Wno-invalid-offsetof ") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffunction-sections -fdata-sections -Wno-invalid-offsetof ") elseif(${ANDROID_ABI} STREQUAL "x86") - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -D__STDC_LIMIT_MACROS -Wno-invalid-offsetof -fpack-struct=8 -malign-double ") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffunction-sections -fdata-sections -Wno-invalid-offsetof -fpack-struct=8 -malign-double ") elseif(${ANDROID_ABI} STREQUAL "x86_64") - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -D__STDC_LIMIT_MACROS -Wno-invalid-offsetof -mstackrealign -msse3 ") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffunction-sections -fdata-sections -Wno-invalid-offsetof -mstackrealign -msse3 ") endif() SET(CMAKE_CXX_FLAGS_DEBUG "-O0 -g") -SET(CMAKE_CXX_FLAGS_CHECKED "-O2") -SET(CMAKE_CXX_FLAGS_PROFILE "-O2") -SET(CMAKE_CXX_FLAGS_RELEASE "-O2") +SET(CMAKE_CXX_FLAGS_CHECKED "-O3") +SET(CMAKE_CXX_FLAGS_PROFILE "-O3") +SET(CMAKE_CXX_FLAGS_RELEASE "-O3") -SET(PXSHARED_ANDROID_COMPILE_DEFS _LIB;) +SET(PXSHARED_ANDROID_COMPILE_DEFS _LIB;__STDC_LIMIT_MACROS;) SET(PXSHARED_ANDROID_DEBUG_COMPILE_DEFS _DEBUG;PX_DEBUG=1;PX_CHECKED=1) SET(PXSHARED_ANDROID_CHECKED_COMPILE_DEFS NDEBUG;PX_CHECKED=1) SET(PXSHARED_ANDROID_PROFILE_COMPILE_DEFS NDEBUG;PX_PROFILE=1) diff --git a/PxShared/src/compiler/cmake/IOS/CMakeLists.txt b/PxShared/src/compiler/cmake/IOS/CMakeLists.txt index d281e32..4e2f814 100644 --- a/PxShared/src/compiler/cmake/IOS/CMakeLists.txt +++ b/PxShared/src/compiler/cmake/IOS/CMakeLists.txt @@ -1,12 +1,13 @@ cmake_minimum_required(VERSION 3.3) +PROJECT(PxShared CXX) include(../common/CMakeLists.txt) -IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to IOS - SET(TARGET_BUILD_PLATFORM "IOS") +IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to ios + SET(TARGET_BUILD_PLATFORM "ios") ENDIF() -SET(PLATFORM_LIST IOS) +SET(PLATFORM_LIST ios) IF (NOT ${TARGET_BUILD_PLATFORM} IN_LIST PLATFORM_LIST) MESSAGE(FATAL_ERROR "Invalid platform:" ${TARGET_BUILD_PLATFORM}) diff --git a/PxShared/src/compiler/cmake/Linux/CMakeLists.txt b/PxShared/src/compiler/cmake/Linux/CMakeLists.txt index 2fa592e..ea25a8b 100644 --- a/PxShared/src/compiler/cmake/Linux/CMakeLists.txt +++ b/PxShared/src/compiler/cmake/Linux/CMakeLists.txt @@ -1,13 +1,14 @@ cmake_minimum_required(VERSION 3.3) +PROJECT(PxShared CXX) include(../common/CMakeLists.txt) STRING(TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_LOWERCASE) -IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to Linux - SET(TARGET_BUILD_PLATFORM "Linux") +IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to linux + SET(TARGET_BUILD_PLATFORM "linux") ENDIF() -SET(PLATFORM_LIST Linux) +SET(PLATFORM_LIST linux) IF (NOT ${TARGET_BUILD_PLATFORM} IN_LIST PLATFORM_LIST) MESSAGE(FATAL_ERROR "Invalid platform:" ${TARGET_BUILD_PLATFORM}) @@ -16,16 +17,16 @@ ENDIF() IF (${CMAKE_LIBRARY_ARCHITECTURE} STREQUAL "x86_64-unknown-linux-gnu" OR ${CMAKE_LIBRARY_ARCHITECTURE} STREQUAL "x86_64-linux-gnu") IF ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") # using Clang - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -Werror -ferror-limit=0 -Wall -Wextra -fstrict-aliasing -Wstrict-aliasing=2 -Weverything -Wno-documentation-deprecated-sync -Wno-documentation-unknown-command -Wno-float-equal -Wno-padded -Wno-weak-vtables -Wno-cast-align -Wno-conversion -Wno-missing-noreturn -Wno-missing-variable-declarations -Wno-shift-sign-overflow -Wno-covered-switch-default -Wno-exit-time-destructors -Wno-global-constructors -Wno-missing-prototypes -Wno-unreachable-code -Wno-unused-macros -Wno-unused-member-function -Wno-used-but-marked-unused -Wno-weak-template-vtables -Wno-deprecated -Wno-non-virtual-dtor -Wno-invalid-noreturn -Wno-return-type-c-linkage -Wno-reserved-id-macro -Wno-c++98-compat-pedantic -Wno-unused-local-typedef -Wno-old-style-cast -Wno-newline-eof -Wno-unused-private-field -Wno-undefined-func-template -Wno-format-nonliteral -Wno-implicit-fallthrough -Wno-undefined-reinterpret-cast") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -Werror -ferror-limit=0 -Wall -Wextra -fstrict-aliasing -Wstrict-aliasing=2 -Weverything -Wno-documentation-deprecated-sync -Wno-documentation-unknown-command -Wno-float-equal -Wno-padded -Wno-weak-vtables -Wno-cast-align -Wno-conversion -Wno-missing-noreturn -Wno-missing-variable-declarations -Wno-shift-sign-overflow -Wno-covered-switch-default -Wno-exit-time-destructors -Wno-global-constructors -Wno-missing-prototypes -Wno-unreachable-code -Wno-unused-macros -Wno-unused-member-function -Wno-used-but-marked-unused -Wno-weak-template-vtables -Wno-deprecated -Wno-non-virtual-dtor -Wno-invalid-noreturn -Wno-return-type-c-linkage -Wno-reserved-id-macro -Wno-c++98-compat-pedantic -Wno-unused-local-typedef -Wno-old-style-cast -Wno-newline-eof -Wno-unused-private-field -Wno-undefined-func-template -Wno-format-nonliteral -Wno-implicit-fallthrough -Wno-undefined-reinterpret-cast -Wno-disabled-macro-expansion") ELSEIF ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") # using GCC SET(LIBPATH_SUFFIX "x64") SET(CMAKE_CXX_FLAGS "-Werror -m64 -fPIC -msse2 -mfpmath=sse -ffast-math -fno-exceptions -fno-rtti -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wextra -fno-strict-aliasing -fdiagnostics-show-option -Wno-invalid-offsetof -Wno-uninitialized -Wno-missing-field-initializers") ENDIF("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") ELSEIF(${CMAKE_LIBRARY_ARCHITECTURE} STREQUAL "arm-unknown-linux-gnueabihf") - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -mfpu=neon -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -Werror -ferror-limit=0 -Wall -Wextra -fstrict-aliasing -Wstrict-aliasing=2 -Weverything -Wno-documentation-deprecated-sync -Wno-documentation-unknown-command -Wno-float-equal -Wno-padded -Wno-weak-vtables -Wno-cast-align -Wno-conversion -Wno-missing-noreturn -Wno-missing-variable-declarations -Wno-shift-sign-overflow -Wno-covered-switch-default -Wno-exit-time-destructors -Wno-global-constructors -Wno-missing-prototypes -Wno-unreachable-code -Wno-unused-macros -Wno-unused-member-function -Wno-used-but-marked-unused -Wno-weak-template-vtables -Wno-deprecated -Wno-non-virtual-dtor -Wno-old-style-cast -Wno-return-type-c-linkage -Wno-format-nonliteral -Wno-implicit-fallthrough") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -mfpu=neon -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -Werror -ferror-limit=0 -Wall -Wextra -fstrict-aliasing -Wstrict-aliasing=2 -Weverything -Wno-documentation-deprecated-sync -Wno-documentation-unknown-command -Wno-float-equal -Wno-padded -Wno-weak-vtables -Wno-cast-align -Wno-conversion -Wno-missing-noreturn -Wno-missing-variable-declarations -Wno-shift-sign-overflow -Wno-covered-switch-default -Wno-exit-time-destructors -Wno-global-constructors -Wno-missing-prototypes -Wno-unreachable-code -Wno-unused-macros -Wno-unused-member-function -Wno-used-but-marked-unused -Wno-weak-template-vtables -Wno-deprecated -Wno-non-virtual-dtor -Wno-old-style-cast -Wno-return-type-c-linkage -Wno-format-nonliteral -Wno-implicit-fallthrough -Wno-disabled-macro-expansion") ELSEIF(${CMAKE_LIBRARY_ARCHITECTURE} STREQUAL "aarch64-unknown-linux-gnueabi") - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -Werror -ferror-limit=0 -Wall -Wextra -fstrict-aliasing -Wstrict-aliasing=2 -Weverything -Wno-documentation-deprecated-sync -Wno-documentation-unknown-command -Wno-float-equal -Wno-padded -Wno-weak-vtables -Wno-cast-align -Wno-conversion -Wno-missing-noreturn -Wno-missing-variable-declarations -Wno-shift-sign-overflow -Wno-covered-switch-default -Wno-exit-time-destructors -Wno-global-constructors -Wno-missing-prototypes -Wno-unreachable-code -Wno-unused-macros -Wno-unused-member-function -Wno-used-but-marked-unused -Wno-weak-template-vtables -Wno-deprecated -Wno-non-virtual-dtor -Wno-old-style-cast -Wno-return-type-c-linkage -Wno-format-nonliteral -Wno-unused-local-typedef -Wno-implicit-fallthrough") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -Werror -ferror-limit=0 -Wall -Wextra -fstrict-aliasing -Wstrict-aliasing=2 -Weverything -Wno-documentation-deprecated-sync -Wno-documentation-unknown-command -Wno-float-equal -Wno-padded -Wno-weak-vtables -Wno-cast-align -Wno-conversion -Wno-missing-noreturn -Wno-missing-variable-declarations -Wno-shift-sign-overflow -Wno-covered-switch-default -Wno-exit-time-destructors -Wno-global-constructors -Wno-missing-prototypes -Wno-unreachable-code -Wno-unused-macros -Wno-unused-member-function -Wno-used-but-marked-unused -Wno-weak-template-vtables -Wno-deprecated -Wno-non-virtual-dtor -Wno-old-style-cast -Wno-return-type-c-linkage -Wno-format-nonliteral -Wno-unused-local-typedef -Wno-implicit-fallthrough -Wno-disabled-macro-expansion") ELSE(${CMAKE_LIBRARY_ARCHITECTURE} STREQUAL "x86_64-unknown-linux-gnu" OR ${CMAKE_LIBRARY_ARCHITECTURE} STREQUAL "x86_64-linux-gnu") MESSAGE(FATAL_ERROR "Unknown CMAKE_LIBRARY_ARCHITECTURE ${CMAKE_LIBRARY_ARCHITECTURE}") ENDIF(${CMAKE_LIBRARY_ARCHITECTURE} STREQUAL "x86_64-unknown-linux-gnu" OR ${CMAKE_LIBRARY_ARCHITECTURE} STREQUAL "x86_64-linux-gnu") @@ -36,7 +37,7 @@ SET(CMAKE_CXX_FLAGS_CHECKED "-O3 -g -gdwarf-3") SET(CMAKE_CXX_FLAGS_PROFILE "-O3 -g -gdwarf-3") SET(CMAKE_CXX_FLAGS_RELEASE "-O3 -g -gdwarf-3") -IF(DEFINED PX_GENERATE_GPU_PROJECTS) +IF(DEFINED GENERATE_GPU_PROJECTS) SET(PXSHARED_LINUX_COMPILE_DEFS _LIB) ELSE() # Disable cuda and dx for all projects on windows @@ -71,7 +72,7 @@ IF(DEFINED PX_SELECT_COMPONENTS) INCLUDE(PxTask.cmake) endif() if ("PxCudaContextManager" IN_LIST PX_SELECT_COMPONENTS) - IF(DEFINED PX_GENERATE_GPU_PROJECTS) + IF(DEFINED GENERATE_GPU_PROJECTS) INCLUDE(PxCudaContextManager.cmake) ENDIF() endif() @@ -80,7 +81,7 @@ INCLUDE(PxFoundation.cmake) INCLUDE(PsFastXml.cmake) INCLUDE(PxPvdSDK.cmake) INCLUDE(PxTask.cmake) -IF(DEFINED PX_GENERATE_GPU_PROJECTS) +IF(DEFINED GENERATE_GPU_PROJECTS) INCLUDE(PxCudaContextManager.cmake) ENDIF() ENDIF() diff --git a/PxShared/src/compiler/cmake/Mac/CMakeLists.txt b/PxShared/src/compiler/cmake/Mac/CMakeLists.txt index beb06bc..3242b46 100644 --- a/PxShared/src/compiler/cmake/Mac/CMakeLists.txt +++ b/PxShared/src/compiler/cmake/Mac/CMakeLists.txt @@ -1,12 +1,13 @@ cmake_minimum_required(VERSION 3.3) +PROJECT(PxShared CXX) include(../common/CMakeLists.txt) -IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to Mac - SET(TARGET_BUILD_PLATFORM "Mac") +IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to mac + SET(TARGET_BUILD_PLATFORM "mac") ENDIF() -SET(PLATFORM_LIST Mac) +SET(PLATFORM_LIST mac) IF (NOT ${TARGET_BUILD_PLATFORM} IN_LIST PLATFORM_LIST) MESSAGE(FATAL_ERROR "Invalid platform:" ${TARGET_BUILD_PLATFORM}) diff --git a/PxShared/src/compiler/cmake/Modules/FindnvToolsExt.cmake b/PxShared/src/compiler/cmake/Modules/FindnvToolsExt.cmake index c1675ae..f7bc761 100644 --- a/PxShared/src/compiler/cmake/Modules/FindnvToolsExt.cmake +++ b/PxShared/src/compiler/cmake/Modules/FindnvToolsExt.cmake @@ -13,7 +13,7 @@ FIND_PATH( NVTOOLSEXT_INCLUDE_DIRS nvToolsExt.h INCLUDE(FindPackageHandleStandardArgs) -IF(TARGET_BUILD_PLATFORM STREQUAL "Windows") +IF(TARGET_BUILD_PLATFORM STREQUAL "windows") # NOTE: Doesn't make sense for all platforms - ARM IF(CMAKE_CL_64) SET(NVTOOLSEXT_LIBNAME "nvToolsExt64_1") diff --git a/PxShared/src/compiler/cmake/common/CMakeLists.txt b/PxShared/src/compiler/cmake/common/CMakeLists.txt index 22d2097..3bbb57a 100644 --- a/PxShared/src/compiler/cmake/common/CMakeLists.txt +++ b/PxShared/src/compiler/cmake/common/CMakeLists.txt @@ -1,7 +1,5 @@ cmake_minimum_required(VERSION 3.3) -PROJECT(PxShared CXX) - CMAKE_POLICY(SET CMP0057 NEW) # Enable IN_LIST IF(DEFINED ENV{GW_DEPS_ROOT}) diff --git a/PxShared/src/compiler/cmake/html5/CMakeLists.txt b/PxShared/src/compiler/cmake/html5/CMakeLists.txt index 8b9587a..e0d2776 100644 --- a/PxShared/src/compiler/cmake/html5/CMakeLists.txt +++ b/PxShared/src/compiler/cmake/html5/CMakeLists.txt @@ -1,10 +1,11 @@ cmake_minimum_required(VERSION 3.3) +PROJECT(PxShared CXX) #set(CMAKE_VERBOSE_MAKEFILE ON) include(../common/CMakeLists.txt) IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to HTML5 - SET(TARGET_BUILD_PLATFORM "HTML5") + SET(TARGET_BUILD_PLATFORM "html5") ENDIF() SET(PLATFORM_LIST HTML5) diff --git a/PxShared/src/compiler/cmake/windows/CMakeLists.txt b/PxShared/src/compiler/cmake/windows/CMakeLists.txt index 39b7dfc..fe2b00d 100644 --- a/PxShared/src/compiler/cmake/windows/CMakeLists.txt +++ b/PxShared/src/compiler/cmake/windows/CMakeLists.txt @@ -1,12 +1,13 @@ cmake_minimum_required(VERSION 3.3) +PROJECT(PxShared CXX) include(../common/CMakeLists.txt) -IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to Windows - SET(TARGET_BUILD_PLATFORM "Windows") +IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to windows + SET(TARGET_BUILD_PLATFORM "windows") ENDIF() -SET(PLATFORM_LIST Windows) +SET(PLATFORM_LIST windows) IF (NOT ${TARGET_BUILD_PLATFORM} IN_LIST PLATFORM_LIST) MESSAGE(FATAL_ERROR "Invalid platform:" ${TARGET_BUILD_PLATFORM}) @@ -33,7 +34,7 @@ SET(CMAKE_SHARED_LINKER_FLAGS "/DEBUG") # Controls PX_NVTX for all projects on windows SET(PXSHARED_WINDOWS_ENABLE_NVTX 0) -IF(DEFINED PX_GENERATE_GPU_PROJECTS) +IF(DEFINED LINK_GPU_BINARIES) SET(PXSHARED_WINDOWS_COMPILE_DEFS WIN32;_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_WINSOCK_DEPRECATED_NO_WARNINGS;) ELSE() # Disable cuda and dx for all projects on windows @@ -44,6 +45,10 @@ SET(PXSHARED_WINDOWS_CHECKED_COMPILE_DEFS NDEBUG;PX_CHECKED=1;PX_NVTX=${PXSHARED SET(PXSHARED_WINDOWS_PROFILE_COMPILE_DEFS NDEBUG;PX_PROFILE=1;PX_NVTX=${PXSHARED_WINDOWS_ENABLE_NVTX}) SET(PXSHARED_WINDOWS_RELEASE_COMPILE_DEFS NDEBUG) +IF(DEFINED PX_SCALAR_MATH) + ADD_DEFINITIONS(-DPX_SIMD_DISABLED) +ENDIF() + IF(CMAKE_CL_64) ADD_DEFINITIONS(-DWIN64) ENDIF(CMAKE_CL_64) @@ -74,7 +79,7 @@ IF(DEFINED PX_SELECT_COMPONENTS) INCLUDE(PxTask.cmake) endif() if ("PxCudaContextManager" IN_LIST PX_SELECT_COMPONENTS) - IF(DEFINED PX_GENERATE_GPU_PROJECTS) + IF(DEFINED GENERATE_GPU_PROJECTS) INCLUDE(PxCudaContextManager.cmake) ENDIF() endif() @@ -83,7 +88,7 @@ INCLUDE(PxFoundation.cmake) INCLUDE(PsFastXml.cmake) INCLUDE(PxPvdSDK.cmake) INCLUDE(PxTask.cmake) -IF(DEFINED PX_GENERATE_GPU_PROJECTS) +IF(DEFINED GENERATE_GPU_PROJECTS) INCLUDE(PxCudaContextManager.cmake) ENDIF() ENDIF() diff --git a/PxShared/src/cudamanager/include/GpuDispatcher.h b/PxShared/src/cudamanager/include/GpuDispatcher.h index aedb345..10c412f 100644 --- a/PxShared/src/cudamanager/include/GpuDispatcher.h +++ b/PxShared/src/cudamanager/include/GpuDispatcher.h @@ -258,8 +258,6 @@ public: ~PxGpuWorkerThread(); void setCudaContext(PxCudaContextManager& ctx); - void emitStartEvent(const char *id); - void emitStopEvent(const char *id); /* API to TaskManager */ void startSimulation(); diff --git a/PxShared/src/cudamanager/src/BlockingWait.cpp b/PxShared/src/cudamanager/src/BlockingWait.cpp index fada532..8a2cc44 100644 --- a/PxShared/src/cudamanager/src/BlockingWait.cpp +++ b/PxShared/src/cudamanager/src/BlockingWait.cpp @@ -63,8 +63,9 @@ void PxGpuWorkerThread::blockingWaitFunc() } else if (!mFailureDetected) { - emitStartEvent("GpuDispatcher.BlockingWaitEvent"); - +#if PX_SUPPORT_PXTASK_PROFILING + PX_PROFILE_ZONE("GpuDispatcher.BlockingWaitEvent", 0); +#endif if (1 & ~intptr_t(b.blockingStream)) { GD_CHECK_CALL(cuStreamSynchronize(b.blockingStream)); @@ -73,8 +74,6 @@ void PxGpuWorkerThread::blockingWaitFunc() { GD_CHECK_CALL(cuEventSynchronize(b.blockingEvent)); } - - emitStopEvent("GpuDispatcher.BlockingWaitEvent"); } if (b.blockingEvent) diff --git a/PxShared/src/cudamanager/src/CudaContextManager.cpp b/PxShared/src/cudamanager/src/CudaContextManager.cpp index b5b6efc..e05911e 100644 --- a/PxShared/src/cudamanager/src/CudaContextManager.cpp +++ b/PxShared/src/cudamanager/src/CudaContextManager.cpp @@ -23,7 +23,7 @@ // components in life support devices or systems without express written approval of // NVIDIA Corporation. // -// Copyright (c) 2008-2014 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. #include "foundation/PxAssert.h" #include "foundation/PxErrorCallback.h" @@ -88,102 +88,6 @@ static void* GetProcAddress(void* handle, const char* name) { return dlsym(handl #define ENABLE_DEVICE_INFO_BRINGUP 0 -#include "GPUProfile.h" - -#if ENABLE_CUDA_DEVICE_RESET -#include "cudaProfiler.h" -#endif - -#if USE_PERFKIT -#pragma warning (push) -#pragma warning (disable : 4099) -#pragma warning (disable : 4191) -#define NVPM_INITGUID -#include <stdio.h> -#include "cuda.h" -#include "../../../../../../../externals/nvPerfKit/4.1.0.14260/inc/NvPmApi.Manager.h" -static NvPmApiManager S_NVPMManager; -extern NvPmApiManager *GetNvPmApiManager() {return &S_NVPMManager;} -const NvPmApi *GetNvPmApi() {return S_NVPMManager.Api();} -NVPMContext hNVPMContext(0); - -void initPerfKit() -{ - //Sync with GPU - cuCtxSynchronize(); - - // Reset counters - uint32_t nCount; - GetNvPmApi()->Sample(hNVPMContext, NULL, &nCount); -} - -void endPerfKit() -{ - //Sync with GPU - cuCtxSynchronize(); - - uint32_t nCount; - GetNvPmApi()->Sample(hNVPMContext, NULL, &nCount); - - uint64_t value; - uint64_t cycle; - - uint64_t sum = 0; - uint64_t maxVal = 0; - char name[512]; - - int nvStatus = 0; - - PX_UNUSED(value); - PX_UNUSED(cycle); - PX_UNUSED(sum); - PX_UNUSED(maxVal); - PX_UNUSED(name); - PX_UNUSED(nvStatus); - - printf("counters:\n"); - -#if COUNT_L2_TO_L1_BYTES - nvStatus |= GetNvPmApi()->GetCounterValueByName(hNVPMContext, "l2_read_bytes", 0, &value, &cycle); - printf("L2->L1 bytes %d\n",value); -#elif COUNT_SM_TO_L1_QUERIES - nvStatus |= GetNvPmApi()->GetCounterValueByName(hNVPMContext, "tex_cache_sector_queries", 0, &value, &cycle); - printf("SM->L1 queries %d\n",value); -#endif - -#if COUNT_INST_EXECUTED || COUNT_STORE_INST_EXECUTED || COUNT_ACTIVE_CYCLES || COUNT_ACTIVE_WARPS - for (int i = 0; i != SM_COUNT; i++) - { -#if COUNT_INST_EXECUTED - sprintf_s(name, 512, "sm_inst_executed_vsm%d", i); -#elif COUNT_STORE_INST_EXECUTED - sprintf_s(name, 512, "sm_inst_executed_global_stores_vsm%d", i); -#elif COUNT_ACTIVE_CYCLES - sprintf_s(name, 512, "sm_active_cycles_vsm%d", i); -#elif COUNT_ACTIVE_WARPS - sprintf_s(name, 512, "sm_active_warps_vsm%d", i); -#endif - nvStatus |= GetNvPmApi()->GetCounterValueByName(hNVPMContext, name, 0, &value, &cycle); - - sum += value; - maxVal = physx::PxMax(maxVal, value); - } -#if COUNT_ACTIVE_CYCLES - printf("sum %I64d\n", sum); -#else - printf("sum %I64d\n", sum); -#endif - - if (!nvStatus) - { - PX_ASSERT(false); - } -#endif -} - -#pragma warning (pop) -#endif - namespace physx { @@ -201,7 +105,7 @@ public: ~CudaCtxMgr(); bool safeDelayImport(PxErrorCallback& errorCallback); - CUcontext acquireContext(); + void acquireContext(); void releaseContext(); /* All these methods can be called without acquiring the context */ @@ -219,6 +123,7 @@ public: bool supportsArchSM35() const; // GK110 bool supportsArchSM50() const; // GM100 bool supportsArchSM52() const; // GM200 + bool supportsArchSM60() const; // GP100 bool isIntegrated() const; // true if GPU is integrated (MCP) part bool canMapHostMemory() const; // true if GPU map host memory to GPU int getDriverVersion() const; @@ -230,6 +135,7 @@ public: unsigned int getClockRate() const; const char* getDeviceName() const; + CUdevice getDevice() const; const CUdevprop* getDeviceProperties() const; PxCudaInteropMode::Enum getInteropMode() const; @@ -251,6 +157,8 @@ public: void release(); + CUcontext getContext() { return mCtx; } + private: int mSceneCount; @@ -330,6 +238,10 @@ bool CudaCtxMgr::supportsArchSM52() const { return mIsValid && ((mComputeCapMajor > 5) || (mComputeCapMajor == 5 && mComputeCapMinor >= 2)); } +bool CudaCtxMgr::supportsArchSM60() const +{ + return mIsValid && mComputeCapMajor >= 6; +} bool CudaCtxMgr::isIntegrated() const { @@ -380,6 +292,18 @@ const char* CudaCtxMgr::getDeviceName() const } } +CUdevice CudaCtxMgr::getDevice() const +{ + if (mIsValid) + { + return mDevHandle; + } + else + { + return -1; + } +} + PxCudaInteropMode::Enum CudaCtxMgr::getInteropMode() const { return mInteropMode; @@ -504,8 +428,7 @@ CudaCtxMgr::CudaCtxMgr(const PxCudaContextManagerDesc& desc, PxErrorCallback& er if (PhysXDeviceSettings::isUsingDedicatedGPU() == 1 || sliEnabled) { - if (mInteropMode == PxCudaInteropMode::D3D9_INTEROP || - mInteropMode == PxCudaInteropMode::D3D10_INTEROP || + if (mInteropMode == PxCudaInteropMode::D3D10_INTEROP || mInteropMode == PxCudaInteropMode::D3D11_INTEROP) { mInteropMode = PxCudaInteropMode::NO_INTEROP; @@ -562,18 +485,6 @@ CudaCtxMgr::CudaCtxMgr(const PxCudaContextManagerDesc& desc, PxErrorCallback& er mOwnContext = true; } #if PX_WIN32 || PX_WIN64 - else if (mInteropMode == PxCudaInteropMode::D3D9_INTEROP) - { - status = cuD3D9CtxCreate(&mCtx, &mDevHandle, (unsigned int)flags, - reinterpret_cast<IDirect3DDevice9*>(desc.graphicsDevice)); - - if (CUDA_SUCCESS != status) - { - errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuD3D9CtxCreate failed",__FILE__,__LINE__); - return; - } - mOwnContext = true; - } else if (mInteropMode == PxCudaInteropMode::D3D10_INTEROP) { status = cuD3D10CtxCreate(&mCtx, &mDevHandle, (unsigned int)flags, @@ -710,73 +621,6 @@ CudaCtxMgr::CudaCtxMgr(const PxCudaContextManagerDesc& desc, PxErrorCallback& er errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "Number of SM: %d", mMultiprocessorCount); errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "Max Threads Per Block: %d", mMaxThreadsPerBlock); #endif - -#if USE_PERFKIT - { -#if _WIN64 - wchar_t * dllName = L"..\\..\\..\\..\\..\\externals\\nvPerfKit\\4.1.0.14260\\bin\\win7_x64\\NvPmApi.Core.dll"; -#else - wchar_t * dllName = L"..\\..\\..\\..\\..\\externals\\nvPerfKit\\4.1.0.14260\\bin\\win7_x86\\NvPmApi.Core.dll"; -#endif - - NVPMRESULT nvResult; - - if ((nvResult = GetNvPmApiManager()->Construct(dllName)) != NVPM_OK) - { - printf("perfkit error 1\n"); - return; - } - - if ((nvResult = GetNvPmApi()->Init()) != NVPM_OK) - { - printf("perfkit error 2\n"); - return; - } - - acquireContext(); - - CUcontext ctx; - cuCtxGetCurrent(&ctx); - if ((nvResult = GetNvPmApi()->CreateContextFromCudaContext((APIContextHandle)ctx, &hNVPMContext)) != NVPM_OK) - { - printf("perfkit error 3\n"); - return; // This is an error condition - } - - uint32_t nvStatus = 0; - -#if COUNT_L2_TO_L1_BYTES - nvStatus |= GetNvPmApi()->AddCounterByName(hNVPMContext, "l2_read_bytes"); -#elif COUNT_SM_TO_L1_QUERIES - nvStatus |= GetNvPmApi()->AddCounterByName(hNVPMContext, "tex_cache_sector_queries"); -#endif - -#if COUNT_INST_EXECUTED || COUNT_STORE_INST_EXECUTED || COUNT_ACTIVE_CYCLES || COUNT_ACTIVE_WARPS - char name[512]; - for (int i = 0; i != SM_COUNT; i++) - { -#if COUNT_INST_EXECUTED - sprintf_s(name,512,"sm_inst_executed_vsm%d",i); -#elif COUNT_STORE_INST_EXECUTED - sprintf_s(name, 512, "sm_inst_executed_global_stores_vsm%d",i); -#elif COUNT_ACTIVE_CYCLES - sprintf_s(name, 512, "sm_active_cycles_vsm%d",i); -#elif COUNT_ACTIVE_WARPS - sprintf_s(name, 512, "sm_active_warps_vsm%d",i); -#endif - nvStatus |= GetNvPmApi()->AddCounterByName(hNVPMContext, name); - } -#elif COUNT_GPU_BUSY - nvStatus |= GetNvPmApi()->AddCounterByName(hNVPMContext, "gpu_busy"); -#endif - - if (nvStatus != 0) - { - printf("perfkit error 4\n"); - return; // This is an error condition - } - } -#endif } /* Some driver version mismatches can cause delay import crashes. Load NVCUDA.dll @@ -871,10 +715,6 @@ CudaCtxMgr::~CudaCtxMgr() if(!--mManagerRefCount) shdfnd::TlsFree(mContextRefCountTls); #endif - -#if ENABLE_CUDA_DEVICE_RESET - CUT_SAFE_CALL(cuProfilerStop()); -#endif } bool CudaCtxMgr::registerResourceInCudaGL(CUgraphicsResource& resource, uint32_t buffer, PxCudaInteropRegisterFlags flags) @@ -900,9 +740,6 @@ bool CudaCtxMgr::registerResourceInCudaD3D(CUgraphicsResource& resource, void* r switch (mInteropMode) { - case PxCudaInteropMode::D3D9_INTEROP: - ret = cuGraphicsD3D9RegisterResource(&resource, (IDirect3DResource9*)resourcePointer, uint32_t(flags)); - break; case PxCudaInteropMode::D3D10_INTEROP: ret = cuGraphicsD3D10RegisterResource(&resource, (ID3D10Resource*)resourcePointer, uint32_t(flags)); break; @@ -938,7 +775,7 @@ bool CudaCtxMgr::unregisterResourceInCuda(CUgraphicsResource resource) return ret == CUDA_SUCCESS; } -CUcontext CudaCtxMgr::acquireContext() +void CudaCtxMgr::acquireContext() { CUcontext ctx = 0; CUT_SAFE_CALL(cuCtxGetCurrent(&ctx)); @@ -955,8 +792,6 @@ CUcontext CudaCtxMgr::acquireContext() char* refCount = (char*)shdfnd::TlsGet(mContextRefCountTls); shdfnd::TlsSet(mContextRefCountTls, ++refCount); #endif - - return mCtx; } void CudaCtxMgr::releaseContext() diff --git a/PxShared/src/cudamanager/src/GpuDispatcher.cpp b/PxShared/src/cudamanager/src/GpuDispatcher.cpp index 0d05a97..432a0cd 100644 --- a/PxShared/src/cudamanager/src/GpuDispatcher.cpp +++ b/PxShared/src/cudamanager/src/GpuDispatcher.cpp @@ -487,21 +487,6 @@ PxGpuWorkerThread::~PxGpuWorkerThread() } } -void PxGpuWorkerThread::emitStartEvent(const char *id) -{ - PX_UNUSED(id); -#if PX_SUPPORT_PXTASK_PROFILING - PX_PROFILE_START_CROSSTHREAD(id,0); -#endif -} - -void PxGpuWorkerThread::emitStopEvent(const char *id) -{ - PX_UNUSED(id); -#if PX_SUPPORT_PXTASK_PROFILING - PX_PROFILE_STOP_CROSSTHREAD(id,0); -#endif -} /* A TaskManager is informing us that its simulation is being stepped */ void PxGpuWorkerThread::startSimulation() @@ -555,15 +540,14 @@ void PxGpuWorkerThread::execute() */ void PxGpuWorkerThread::addCompletionPrereq(PxBaseTask& task) { - if (mFailureDetected) - { + if(mFailureDetected) return; - } - emitStartEvent("GpuDispatcher.AddCompletionEvent"); +#if PX_SUPPORT_PXTASK_PROFILING + PX_PROFILE_ZONE("GpuDispatcher.AddCompletionEvent", task.getContextId()); +#endif task.addReference(); mCompletionTasks.pushBack(&task); - emitStopEvent("GpuDispatcher.AddCompletionEvent"); } namespace @@ -757,7 +741,9 @@ void PxGpuWorkerThread::pollSubmitted(shdfnd::Array<ReadyTask>* ready) void PxGpuWorkerThread::processActiveTasks() { - emitStartEvent("GpuDispatcher.ProcessTasksEvent"); +#if PX_SUPPORT_PXTASK_PROFILING + PX_PROFILE_ZONE("GpuDispatcher.ProcessTasksEvent", 0); // PT: TODO: fix invalid context +#endif if (mFailureDetected) { @@ -766,7 +752,6 @@ void PxGpuWorkerThread::processActiveTasks() mInputReady.reset(); mSubmittedTaskList.popBack()->release(); } - emitStopEvent("GpuDispatcher.ProcessTasksEvent"); return; } @@ -824,13 +809,16 @@ void PxGpuWorkerThread::processActiveTasks() else { const CUstream s = (r.task->mStreamIndex > 0) ? mCachedStreams.get(r.task->mStreamIndex) : 0; + + bool active; + { #if PX_PROFILE - r.task->mTm->emitStartEvent(*r.task); +#if PX_SUPPORT_PXTASK_PROFILING + PX_PROFILE_ZONE(r.task->getName(), r.task->getContextId()); #endif - bool active = r.task->launchInstance(s, int(r.iteration++)); -#if PX_PROFILE - r.task->mTm->emitStopEvent(*r.task); #endif + active = r.task->launchInstance(s, int(r.iteration++)); + } if(singleStream != r.task->mStreamIndex) singleStream = 0; @@ -935,8 +923,6 @@ void PxGpuWorkerThread::processActiveTasks() while (tasksRemain); mCachedNonBlockingEvents.add(nonBlockEv); - - emitStopEvent("GpuDispatcher.ProcessTasksEvent"); } #endif diff --git a/PxShared/src/foundation/include/PsAllocator.h b/PxShared/src/foundation/include/PsAllocator.h index cbf32d3..f988b6c 100644 --- a/PxShared/src/foundation/include/PsAllocator.h +++ b/PxShared/src/foundation/include/PsAllocator.h @@ -108,7 +108,7 @@ #elif PX_XBOXONE #include <malloc.h> #define PxAlloca(x) alloca(x) -#elif PX_NX +#elif PX_SWITCH #include <malloc.h> #define PxAlloca(x) alloca(x) #endif diff --git a/PxShared/src/foundation/include/PsAoS.h b/PxShared/src/foundation/include/PsAoS.h index 641a40a..5a7c82d 100644 --- a/PxShared/src/foundation/include/PsAoS.h +++ b/PxShared/src/foundation/include/PsAoS.h @@ -34,7 +34,7 @@ #if PX_WINDOWS && !PX_NEON #include "windows/PsWindowsAoS.h" -#elif(PX_UNIX_FAMILY || PX_PS4 || PX_NX) +#elif(PX_UNIX_FAMILY || PX_PS4 || PX_SWITCH) #include "unix/PsUnixAoS.h" #elif PX_XBOXONE #include "XboxOne/PsXboxOneAoS.h" diff --git a/PxShared/src/foundation/include/PsInlineAoS.h b/PxShared/src/foundation/include/PsInlineAoS.h index 6d43607..6ae15cf 100644 --- a/PxShared/src/foundation/include/PsInlineAoS.h +++ b/PxShared/src/foundation/include/PsInlineAoS.h @@ -35,7 +35,7 @@ #if PX_WINDOWS #include "windows/PsWindowsTrigConstants.h" #include "windows/PsWindowsInlineAoS.h" -#elif(PX_UNIX_FAMILY || PX_PS4 || PX_NX) +#elif(PX_UNIX_FAMILY || PX_PS4 || PX_SWITCH) #include "unix/PsUnixTrigConstants.h" #include "unix/PsUnixInlineAoS.h" #elif PX_XBOXONE diff --git a/PxShared/src/foundation/include/PsIntrinsics.h b/PxShared/src/foundation/include/PsIntrinsics.h index 1e1b9d1..38b91ba 100644 --- a/PxShared/src/foundation/include/PsIntrinsics.h +++ b/PxShared/src/foundation/include/PsIntrinsics.h @@ -38,8 +38,8 @@ #include "unix/PsUnixIntrinsics.h" #elif PX_XBOXONE #include "XboxOne/PsXboxOneIntrinsics.h" -#elif PX_NX -#include "nx/PsNXIntrinsics.h" +#elif PX_SWITCH +#include "switch/PsSwitchIntrinsics.h" #else #error "Platform not supported!" #endif diff --git a/PxShared/src/foundation/include/PsThread.h b/PxShared/src/foundation/include/PsThread.h index 8ba553a..4e7c104 100644 --- a/PxShared/src/foundation/include/PsThread.h +++ b/PxShared/src/foundation/include/PsThread.h @@ -41,7 +41,7 @@ #if PX_WINDOWS_FAMILY || PX_XBOXONE #define PxSpinLockPause() __asm pause -#elif PX_LINUX || PX_ANDROID || PX_PS4 || PX_APPLE_FAMILY || PX_NX +#elif PX_LINUX || PX_ANDROID || PX_PS4 || PX_APPLE_FAMILY || PX_SWITCH #define PxSpinLockPause() asm("nop") #else #error "Platform not supported!" @@ -173,7 +173,7 @@ class PX_FOUNDATION_API ThreadImpl Change the affinity mask for this thread. The mask is a platform specific value. - On Windows, Linux, PS4, XboxOne and NX platforms, each set mask bit represents + On Windows, Linux, PS4, XboxOne and Switch platforms, each set mask bit represents the index of a logical processor that the OS may schedule thread execution on. Bits outside the range of valid logical processors may be ignored or cause the function to return an error. diff --git a/PxShared/src/foundation/include/PsVecMath.h b/PxShared/src/foundation/include/PsVecMath.h index 4e891d8..ffd2de8 100644 --- a/PxShared/src/foundation/include/PsVecMath.h +++ b/PxShared/src/foundation/include/PsVecMath.h @@ -54,7 +54,7 @@ #define COMPILE_VECTOR_INTRINSICS 1 #elif PX_IOS&& PX_NEON #define COMPILE_VECTOR_INTRINSICS 1 -#elif PX_NX +#elif PX_SWITCH #define COMPILE_VECTOR_INTRINSICS 1 #else #define COMPILE_VECTOR_INTRINSICS 0 diff --git a/PxShared/src/foundation/include/PsVecMathAoSScalarInline.h b/PxShared/src/foundation/include/PsVecMathAoSScalarInline.h index 9bef465..f5dea7b 100644 --- a/PxShared/src/foundation/include/PsVecMathAoSScalarInline.h +++ b/PxShared/src/foundation/include/PsVecMathAoSScalarInline.h @@ -40,8 +40,13 @@ #define BOOL_TO_U16(b) (PxU16)(- PxI32(b)) +#define PX_VECMATH_ASSERT_ENABLED 0 +#if PX_VECMATH_ASSERT_ENABLED #define VECMATHAOS_ASSERT(x) { PX_ASSERT(x); } +#else +#define VECMATHAOS_ASSERT(x) +#endif ///////////////////////////////////////////////////////////////////// ////INTERNAL USE ONLY AND TESTS @@ -1479,7 +1484,7 @@ PX_FORCE_INLINE BoolV BOr(const BoolV a, const BoolV b) PX_FORCE_INLINE PxU32 BAllEq(const BoolV a, const BoolV b) { - return (a.ux == b.ux && a.uy == b.uy && a.uz == b.uz && a.uw == b.uw ? TRUE_TO_U32 : FALSE_TO_U32); + return (a.ux == b.ux && a.uy == b.uy && a.uz == b.uz && a.uw == b.uw ? 1 : 0); } PX_FORCE_INLINE PxU32 BAllEqTTTT(const BoolV a) diff --git a/PxShared/src/foundation/include/unix/neon/PsUnixNeonInlineAoS.h b/PxShared/src/foundation/include/unix/neon/PsUnixNeonInlineAoS.h index 2a0578d..a97f821 100644 --- a/PxShared/src/foundation/include/unix/neon/PsUnixNeonInlineAoS.h +++ b/PxShared/src/foundation/include/unix/neon/PsUnixNeonInlineAoS.h @@ -42,7 +42,7 @@ // "exact" #define VRECIPQ recipq_newton<4> -#if PX_NX +#if PX_SWITCH // StabilizationTests.AveragePoint needs more precision to succeed. #define VRECIP recip_newton<5> #else diff --git a/PxShared/src/foundation/src/PsAssert.cpp b/PxShared/src/foundation/src/PsAssert.cpp index 3070383..295a81e 100644 --- a/PxShared/src/foundation/src/PsAssert.cpp +++ b/PxShared/src/foundation/src/PsAssert.cpp @@ -34,8 +34,8 @@ #if PX_WINDOWS_FAMILY #include <crtdbg.h> -#elif PX_NX -#include "nx/PsNXAbort.h" +#elif PX_SWITCH +#include "switch/PsSwitchAbort.h" #endif namespace @@ -63,7 +63,7 @@ class DefaultAssertHandler : public physx::PxAssertHandler __debugbreak(); #elif PX_WINDOWS_FAMILY&& PX_CHECKED __debugbreak(); -#elif PX_NX +#elif PX_SWITCH abort(buffer); #else abort(); diff --git a/PxShared/src/pvd/include/PxProfileAllocatorWrapper.h b/PxShared/src/pvd/include/PxProfileAllocatorWrapper.h index 22903ec..abac561 100644 --- a/PxShared/src/pvd/include/PxProfileAllocatorWrapper.h +++ b/PxShared/src/pvd/include/PxProfileAllocatorWrapper.h @@ -74,7 +74,7 @@ namespace physx { namespace profile { { static const char* getName() { -#if PX_LINUX || PX_ANDROID || PX_PS4 || PX_IOS || PX_OSX || PX_EMSCRIPTEN || PX_NX +#if PX_LINUX || PX_ANDROID || PX_PS4 || PX_IOS || PX_OSX || PX_EMSCRIPTEN || PX_SWITCH return __PRETTY_FUNCTION__; #else return typeid(T).name(); diff --git a/PxShared/src/task/src/TaskManager.cpp b/PxShared/src/task/src/TaskManager.cpp index ffcbfcd..c6210bd 100644 --- a/PxShared/src/task/src/TaskManager.cpp +++ b/PxShared/src/task/src/TaskManager.cpp @@ -168,9 +168,6 @@ public: void decrReference( PxLightCpuTask& lighttask ); void addReference( PxLightCpuTask& lighttask ); - void emitStartEvent( PxBaseTask& basetask, uint32_t threadId=0); - void emitStopEvent( PxBaseTask& basetask, uint32_t threadId=0); - PxErrorCallback& mErrorCallback; PxCpuDispatcher *mCpuDispatcher; PxGpuDispatcher *mGpuDispatcher; @@ -304,35 +301,6 @@ void PxTaskMgr::addReference(PxLightCpuTask& lighttask) shdfnd::atomicIncrement(&lighttask.mRefCount); } -void PxTaskMgr::emitStartEvent(PxBaseTask& basetask, uint32_t threadId) -{ -#if DOT_LOG - currentTask = &basetask; -#endif - - PxBaseTask* tmp = &basetask; - PX_UNUSED(tmp); - PX_UNUSED(threadId); - - /* This does not need a lock! */ -#if PX_SUPPORT_PXTASK_PROFILING - //PX_COMPILE_TIME_ASSERT(sizeof(PxProfileEventId::mEventId) == sizeof(PxBaseTask::mEventID)); - PX_PROFILE_START_CROSSTHREAD(basetask.getName(),0); -#endif -} - -void PxTaskMgr::emitStopEvent(PxBaseTask& basetask, uint32_t threadId) -{ - PxBaseTask* tmp = &basetask; - PX_UNUSED(tmp); - PX_UNUSED(threadId); - - /* This does not need a lock! */ -#if PX_SUPPORT_PXTASK_PROFILING - //PX_COMPILE_TIME_ASSERT(sizeof(PxProfileEventId::mEventId) == sizeof(PxBaseTask::mEventID)); - PX_PROFILE_STOP_CROSSTHREAD(basetask.getName(),0); -#endif -} /* * Called by the owner (Scene) at the start of every frame, before |