diff options
| author | mtamis <[email protected]> | 2017-02-15 16:06:25 +0100 |
|---|---|---|
| committer | mtamis <[email protected]> | 2017-02-15 16:06:25 +0100 |
| commit | 85305930aeeb1d513e23522bd91f29ba81aa6d14 (patch) | |
| tree | 45f1bb20a45a300d1fef107e436cac95602a0e57 /PxShared/src | |
| download | nvcloth-85305930aeeb1d513e23522bd91f29ba81aa6d14.tar.xz nvcloth-85305930aeeb1d513e23522bd91f29ba81aa6d14.zip | |
NvCloth library v1.0.0
Diffstat (limited to 'PxShared/src')
255 files changed, 59040 insertions, 0 deletions
diff --git a/PxShared/src/compiler/cmake/Android/CMakeLists.txt b/PxShared/src/compiler/cmake/Android/CMakeLists.txt new file mode 100644 index 0000000..0499c29 --- /dev/null +++ b/PxShared/src/compiler/cmake/Android/CMakeLists.txt @@ -0,0 +1,43 @@ +cmake_minimum_required(VERSION 3.3) +include(../common/CMakeLists.txt) + +STRING(TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_LOWERCASE) + +IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to Android + SET(TARGET_BUILD_PLATFORM "Android") +ENDIF() + +SET(PLATFORM_LIST Android) + +IF (NOT ${TARGET_BUILD_PLATFORM} IN_LIST PLATFORM_LIST) + MESSAGE(FATAL_ERROR "Invalid platform:" ${TARGET_BUILD_PLATFORM}) +ENDIF() + +if(${ANDROID_ABI} STREQUAL "armeabi-v7a") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -D__STDC_LIMIT_MACROS -Wno-invalid-offsetof ") +elseif(${ANDROID_ABI} STREQUAL "arm64-v8a") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -D__STDC_LIMIT_MACROS -Wno-invalid-offsetof ") +elseif(${ANDROID_ABI} STREQUAL "x86") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -D__STDC_LIMIT_MACROS -Wno-invalid-offsetof -fpack-struct=8 -malign-double ") +elseif(${ANDROID_ABI} STREQUAL "x86_64") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -D__STDC_LIMIT_MACROS -Wno-invalid-offsetof -mstackrealign -msse3 ") +endif() + +SET(CMAKE_CXX_FLAGS_DEBUG "-O0 -g") +SET(CMAKE_CXX_FLAGS_CHECKED "-O2") +SET(CMAKE_CXX_FLAGS_PROFILE "-O2") +SET(CMAKE_CXX_FLAGS_RELEASE "-O2") + +SET(PXSHARED_ANDROID_COMPILE_DEFS _LIB;) +SET(PXSHARED_ANDROID_DEBUG_COMPILE_DEFS _DEBUG;PX_DEBUG=1;PX_CHECKED=1) +SET(PXSHARED_ANDROID_CHECKED_COMPILE_DEFS NDEBUG;PX_CHECKED=1) +SET(PXSHARED_ANDROID_PROFILE_COMPILE_DEFS NDEBUG;PX_PROFILE=1) +SET(PXSHARED_ANDROID_RELEASE_COMPILE_DEFS NDEBUG) + +# NOTE: PxCudaContextManager excluded on this platform + +# Include project cmake files here +INCLUDE(PxFoundation.cmake) +INCLUDE(PsFastXml.cmake) +INCLUDE(PxPvdSDK.cmake) +INCLUDE(PxTask.cmake) diff --git a/PxShared/src/compiler/cmake/Android/PsFastXml.cmake b/PxShared/src/compiler/cmake/Android/PsFastXml.cmake new file mode 100644 index 0000000..81d356e --- /dev/null +++ b/PxShared/src/compiler/cmake/Android/PsFastXml.cmake @@ -0,0 +1,40 @@ +# +# Build PsFastXml +# + +SET(PXSHARED_SOURCE_DIR ${PROJECT_SOURCE_DIR}/../../../../src) + +SET(LL_SOURCE_DIR ${PXSHARED_SOURCE_DIR}/fastxml) + +# Use generator expressions to set config specific preprocessor definitions +SET(PSFASTXML_COMPILE_DEFS + # Common to all configurations + ${PXSHARED_ANDROID_COMPILE_DEFS};PX_FOUNDATION_DLL=0;PxShared_STATIC_LIB; +) + +if(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "debug") + LIST(APPEND PSFASTXML_COMPILE_DEFS + ${PXSHARED_ANDROID_DEBUG_COMPILE_DEFS} + ) +elseif(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "checked") + LIST(APPEND PSFASTXML_COMPILE_DEFS + ${PXSHARED_ANDROID_CHECKED_COMPILE_DEFS} + ) +elseif(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "profile") + LIST(APPEND PSFASTXML_COMPILE_DEFS + ${PXSHARED_ANDROID_PROFILE_COMPILE_DEFS} + ) +elseif(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL release) + LIST(APPEND PSFASTXML_COMPILE_DEFS + ${PXSHARED_ANDROID_RELEASE_COMPILE_DEFS} + ) +else(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "debug") + MESSAGE(FATAL_ERROR "Unknown configuration ${CMAKE_BUILD_TYPE}") +endif(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "debug") + + +# include PsFastXml common +INCLUDE(../common/PsFastXml.cmake) + +# enable -fPIC so we can link static libs with the editor +SET_TARGET_PROPERTIES(PsFastXml PROPERTIES POSITION_INDEPENDENT_CODE TRUE) diff --git a/PxShared/src/compiler/cmake/Android/PxFoundation.cmake b/PxShared/src/compiler/cmake/Android/PxFoundation.cmake new file mode 100644 index 0000000..c23a327 --- /dev/null +++ b/PxShared/src/compiler/cmake/Android/PxFoundation.cmake @@ -0,0 +1,62 @@ +# +# Build PxFoundation +# + +SET(PXSHARED_SOURCE_DIR ${PROJECT_SOURCE_DIR}/../../../../src) + +SET(LL_SOURCE_DIR ${PXSHARED_SOURCE_DIR}/foundation) + +SET(PXFOUNDATION_LIBTYPE STATIC) + +SET(PXFOUNDATION_PLATFORM_FILES + ${LL_SOURCE_DIR}/src/unix/PsUnixAtomic.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixCpu.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixFPU.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixMutex.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixPrintString.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixSList.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixSocket.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixSync.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixThread.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixTime.cpp + ${ANDROID_NDK}/sources/android/cpufeatures/cpu-features.c +) + +SET(PXFOUNDATION_PLATFORM_INCLUDES + ${LL_SOURCE_DIR}/include/linux + ${ANDROID_NDK}/sources/android/cpufeatures +) + +SET(PXFOUNDATION_COMPILE_DEFS + # Common to all configurations + ${PXSHARED_ANDROID_COMPILE_DEFS};PxShared_STATIC_LIB; +) + +if(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "debug") + LIST(APPEND PXFOUNDATION_COMPILE_DEFS + ${PXSHARED_ANDROID_DEBUG_COMPILE_DEFS} + ) +elseif(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "checked") + LIST(APPEND PXFOUNDATION_COMPILE_DEFS + ${PXSHARED_ANDROID_CHECKED_COMPILE_DEFS} + ) +elseif(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "profile") + LIST(APPEND PXFOUNDATION_COMPILE_DEFS + ${PXSHARED_ANDROID_PROFILE_COMPILE_DEFS} + ) +elseif(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL release) + LIST(APPEND PXFOUNDATION_COMPILE_DEFS + ${PXSHARED_ANDROID_RELEASE_COMPILE_DEFS} + ) +else(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "debug") + MESSAGE(FATAL_ERROR "Unknown configuration ${CMAKE_BUILD_TYPE}") +endif(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "debug") + + +# include PxFoundation common +INCLUDE(../common/PxFoundation.cmake) + +TARGET_LINK_LIBRARIES(PxFoundation PUBLIC log) + +# enable -fPIC so we can link static libs with the editor +SET_TARGET_PROPERTIES(PxFoundation PROPERTIES POSITION_INDEPENDENT_CODE TRUE) diff --git a/PxShared/src/compiler/cmake/Android/PxPvdSDK.cmake b/PxShared/src/compiler/cmake/Android/PxPvdSDK.cmake new file mode 100644 index 0000000..f517efc --- /dev/null +++ b/PxShared/src/compiler/cmake/Android/PxPvdSDK.cmake @@ -0,0 +1,44 @@ +# +# Build PxPvdSDK +# + +SET(PXSHARED_SOURCE_DIR ${PROJECT_SOURCE_DIR}/../../../../src) + +SET(LL_SOURCE_DIR ${PXSHARED_SOURCE_DIR}/pvd) + +SET(PXPVDSDK_LIBTYPE STATIC) + +# Use generator expressions to set config specific preprocessor definitions +SET(PXPVDSDK_COMPILE_DEFS + # Common to all configurations + ${PXSHARED_ANDROID_COMPILE_DEFS};PxShared_STATIC_LIB; +) + +if(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "debug") + LIST(APPEND PXPVDSDK_COMPILE_DEFS + ${PXSHARED_ANDROID_DEBUG_COMPILE_DEFS} + ) +elseif(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "checked") + LIST(APPEND PXPVDSDK_COMPILE_DEFS + ${PXSHARED_ANDROID_CHECKED_COMPILE_DEFS} + ) +elseif(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "profile") + LIST(APPEND PXPVDSDK_COMPILE_DEFS + ${PXSHARED_ANDROID_PROFILE_COMPILE_DEFS} + ) +elseif(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL release) + LIST(APPEND PXPVDSDK_COMPILE_DEFS + ${PXSHARED_ANDROID_RELEASE_COMPILE_DEFS} + ) +else(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "debug") + MESSAGE(FATAL_ERROR "Unknown configuration ${CMAKE_BUILD_TYPE}") +endif(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "debug") + +# include PxPvdSDK common +INCLUDE(../common/PxPvdSDK.cmake) + +# Add linked libraries +TARGET_LINK_LIBRARIES(PxPvdSDK PRIVATE PxFoundation) + +# enable -fPIC so we can link static libs with the editor +SET_TARGET_PROPERTIES(PxPvdSDK PROPERTIES POSITION_INDEPENDENT_CODE TRUE) diff --git a/PxShared/src/compiler/cmake/Android/PxTask.cmake b/PxShared/src/compiler/cmake/Android/PxTask.cmake new file mode 100644 index 0000000..e02aefd --- /dev/null +++ b/PxShared/src/compiler/cmake/Android/PxTask.cmake @@ -0,0 +1,39 @@ +# +# Build PxTask +# + +SET(PXSHARED_SOURCE_DIR ${PROJECT_SOURCE_DIR}/../../../../src) + +SET(LL_SOURCE_DIR ${PXSHARED_SOURCE_DIR}/task) + +SET(PXTASK_COMPILE_DEFS + ${PXSHARED_ANDROID_COMPILE_DEFS};PxShared_STATIC_LIB; +) + +if(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "debug") + LIST(APPEND PXTASK_COMPILE_DEFS + ${PXSHARED_ANDROID_DEBUG_COMPILE_DEFS} + ) +elseif(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "checked") + LIST(APPEND PXTASK_COMPILE_DEFS + ${PXSHARED_ANDROID_CHECKED_COMPILE_DEFS} + ) +elseif(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "profile") + LIST(APPEND PXTASK_COMPILE_DEFS + ${PXSHARED_ANDROID_PROFILE_COMPILE_DEFS} + ) +elseif(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL release) + LIST(APPEND PXTASK_COMPILE_DEFS + ${PXSHARED_ANDROID_RELEASE_COMPILE_DEFS} + ) +else(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "debug") + MESSAGE(FATAL_ERROR "Unknown configuration ${CMAKE_BUILD_TYPE}") +endif(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "debug") + +SET(PXTASK_LIBTYPE OBJECT) + +# include PxTask common +INCLUDE(../common/PxTask.cmake) + +# enable -fPIC so we can link static libs with the editor +SET_TARGET_PROPERTIES(PxTask PROPERTIES POSITION_INDEPENDENT_CODE TRUE) diff --git a/PxShared/src/compiler/cmake/IOS/CMakeLists.txt b/PxShared/src/compiler/cmake/IOS/CMakeLists.txt new file mode 100644 index 0000000..d281e32 --- /dev/null +++ b/PxShared/src/compiler/cmake/IOS/CMakeLists.txt @@ -0,0 +1,73 @@ +cmake_minimum_required(VERSION 3.3) +include(../common/CMakeLists.txt) + + +IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to IOS + SET(TARGET_BUILD_PLATFORM "IOS") +ENDIF() + +SET(PLATFORM_LIST IOS) + +IF (NOT ${TARGET_BUILD_PLATFORM} IN_LIST PLATFORM_LIST) + MESSAGE(FATAL_ERROR "Invalid platform:" ${TARGET_BUILD_PLATFORM}) +ENDIF() + +SET(CMAKE_CXX_FLAGS "-std=c++11 -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -Werror -ferror-limit=0 -Wall -Wextra -fstrict-aliasing -Wstrict-aliasing=2 -Weverything -Wno-documentation-deprecated-sync -Wno-documentation-unknown-command -Wno-float-equal -Wno-padded -Wno-weak-vtables -Wno-cast-align -Wno-conversion -Wno-missing-noreturn -Wno-missing-variable-declarations -Wno-shift-sign-overflow -Wno-covered-switch-default -Wno-exit-time-destructors -Wno-global-constructors -Wno-missing-prototypes -Wno-unreachable-code -Wno-unused-macros -Wno-unused-member-function -Wno-used-but-marked-unused -Wno-weak-template-vtables -Wno-deprecated -Wno-non-virtual-dtor -Wno-invalid-noreturn -Wno-return-type-c-linkage -Wno-reserved-id-macro -Wno-c++98-compat-pedantic -Wno-unused-local-typedef -Wno-old-style-cast -Wno-newline-eof -Wno-unused-private-field -Wno-undefined-reinterpret-cast -Wno-invalid-offsetof -gdwarf-2") + +SET(CMAKE_SHARED_LINKER_FLAGS "") + +SET(CMAKE_CXX_FLAGS_DEBUG "-O0 -g") +SET(CMAKE_CXX_FLAGS_CHECKED "-O3 -g") +SET(CMAKE_CXX_FLAGS_PROFILE "-O3 -g") +SET(CMAKE_CXX_FLAGS_RELEASE "-O3 -g") + +SET(CMAKE_OSX_DEPLOYMENT_TARGET "") + +SET(CMAKE_OSX_ARCHITECTURES "armv7 armv7s arm64") + +SET(IOS_PLATFORM_LOCATION "iPhoneOS.platform") + +# Setup iOS developer location unless specified manually with CMAKE_IOS_DEVELOPER_ROOT +EXEC_PROGRAM(/usr/bin/xcode-select ARGS -print-path OUTPUT_VARIABLE CMAKE_XCODE_DEVELOPER_DIR) +SET(XCODE_ROOT "${CMAKE_XCODE_DEVELOPER_DIR}/Platforms/${IOS_PLATFORM_LOCATION}/Developer") +IF(NOT DEFINED CMAKE_IOS_DEVELOPER_ROOT) + IF(EXISTS ${XCODE_ROOT}) + SET(CMAKE_IOS_DEVELOPER_ROOT ${XCODE_ROOT}) + ENDIF(EXISTS ${XCODE_ROOT}) +ENDIF(NOT DEFINED CMAKE_IOS_DEVELOPER_ROOT) +SET(CMAKE_IOS_DEVELOPER_ROOT ${CMAKE_IOS_DEVELOPER_ROOT} CACHE PATH "Location of iOS Platform") + +# Find and use the most recent iOS sdk unless specified manually with CMAKE_IOS_SDK_ROOT +IF(NOT DEFINED CMAKE_IOS_SDK_ROOT) + FILE(GLOB _CMAKE_IOS_SDKS "${CMAKE_IOS_DEVELOPER_ROOT}/SDKs/*") + IF(_CMAKE_IOS_SDKS) + LIST(SORT _CMAKE_IOS_SDKS) + LIST(REVERSE _CMAKE_IOS_SDKS) + LIST(GET _CMAKE_IOS_SDKS 0 CMAKE_IOS_SDK_ROOT) + ELSE(_CMAKE_IOS_SDKS) + MESSAGE(FATAL_ERROR "No iOS SDK's found in default search path ${CMAKE_IOS_DEVELOPER_ROOT}. Manually set CMAKE_IOS_SDK_ROOT or install the iOS SDK.") + ENDIF(_CMAKE_IOS_SDKS) + MESSAGE(STATUS "Toolchain using default iOS SDK: ${CMAKE_IOS_SDK_ROOT}") +ENDIF(NOT DEFINED CMAKE_IOS_SDK_ROOT) +SET(CMAKE_IOS_SDK_ROOT ${CMAKE_IOS_SDK_ROOT} CACHE PATH "Location of the selected iOS SDK") + +# Set the sysroot default to the most recent SDK +SET(CMAKE_OSX_SYSROOT ${CMAKE_IOS_SDK_ROOT} CACHE PATH "Sysroot used for iOS support") + +SET(CMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS "iphoneos") +SET(CMAKE_XCODE_ATTRIBUTE_SDKROOT ${CMAKE_IOS_SDK_ROOT}) +SET(CMAKE_XCODE_ATTRIBUTE_IPHONEOS_DEPLOYMENT_TARGET "7.0") + +SET(PXSHARED_IOS_COMPILE_DEFS _LIB;DISABLE_CUDA_PHYSX;DISABLE_COMPUTE_PHYSX) +SET(PXSHARED_IOS_DEBUG_COMPILE_DEFS _DEBUG;PX_DEBUG=1;PX_CHECKED=1) +SET(PXSHARED_IOS_CHECKED_COMPILE_DEFS NDEBUG;PX_CHECKED=1) +SET(PXSHARED_IOS_PROFILE_COMPILE_DEFS NDEBUG;PX_PROFILE=1) +SET(PXSHARED_IOS_RELEASE_COMPILE_DEFS NDEBUG) + +# NOTE: PxCudaContextManager excluded on this platform + +# Include project cmake files here +INCLUDE(PxFoundation.cmake) +INCLUDE(PsFastXml.cmake) +INCLUDE(PxPvdSDK.cmake) +INCLUDE(PxTask.cmake) diff --git a/PxShared/src/compiler/cmake/IOS/PsFastXml.cmake b/PxShared/src/compiler/cmake/IOS/PsFastXml.cmake new file mode 100644 index 0000000..28b2a1b --- /dev/null +++ b/PxShared/src/compiler/cmake/IOS/PsFastXml.cmake @@ -0,0 +1,22 @@ +# +# Build PsFastXml +# + +SET(PXSHARED_SOURCE_DIR ${PROJECT_SOURCE_DIR}/../../../../src) + +SET(LL_SOURCE_DIR ${PXSHARED_SOURCE_DIR}/fastxml) + +# Use generator expressions to set config specific preprocessor definitions +SET(PSFASTXML_COMPILE_DEFS + + # Common to all configurations + ${PXSHARED_IOS_COMPILE_DEFS};PX_FOUNDATION_DLL=0; + + $<$<CONFIG:debug>:${PXSHARED_IOS_DEBUG_COMPILE_DEFS};> + $<$<CONFIG:checked>:${PXSHARED_IOS_CHECKED_COMPILE_DEFS};> + $<$<CONFIG:profile>:${PXSHARED_IOS_PROFILE_COMPILE_DEFS};> + $<$<CONFIG:release>:${PXSHARED_IOS_RELEASE_COMPILE_DEFS};> +) + +# include PsFastXml common +INCLUDE(../common/PsFastXml.cmake)
\ No newline at end of file diff --git a/PxShared/src/compiler/cmake/IOS/PxFoundation.cmake b/PxShared/src/compiler/cmake/IOS/PxFoundation.cmake new file mode 100644 index 0000000..7d022cf --- /dev/null +++ b/PxShared/src/compiler/cmake/IOS/PxFoundation.cmake @@ -0,0 +1,40 @@ +# +# Build PxFoundation +# + +SET(PXSHARED_SOURCE_DIR ${PROJECT_SOURCE_DIR}/../../../../src) + +SET(LL_SOURCE_DIR ${PXSHARED_SOURCE_DIR}/foundation) + +SET(PXFOUNDATION_LIBTYPE STATIC) + +SET(PXFOUNDATION_PLATFORM_FILES + ${LL_SOURCE_DIR}/src/unix/PsUnixAtomic.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixCpu.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixFPU.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixMutex.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixPrintString.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixSList.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixSocket.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixSync.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixThread.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixTime.cpp +) + +SET(PXFOUNDATION_PLATFORM_INCLUDES + ${LL_SOURCE_DIR}/include/ios +) + +SET(PXFOUNDATION_COMPILE_DEFS + + # Common to all configurations + ${PXSHARED_IOS_COMPILE_DEFS} + + $<$<CONFIG:debug>:${PXSHARED_IOS_DEBUG_COMPILE_DEFS}> + $<$<CONFIG:checked>:${PXSHARED_IOS_CHECKED_COMPILE_DEFS}> + $<$<CONFIG:profile>:${PXSHARED_IOS_PROFILE_COMPILE_DEFS}> + $<$<CONFIG:release>:${PXSHARED_IOS_RELEASE_COMPILE_DEFS}> +) + +# include PxFoundation common +INCLUDE(../common/PxFoundation.cmake)
\ No newline at end of file diff --git a/PxShared/src/compiler/cmake/IOS/PxPvdSDK.cmake b/PxShared/src/compiler/cmake/IOS/PxPvdSDK.cmake new file mode 100644 index 0000000..85e7e1f --- /dev/null +++ b/PxShared/src/compiler/cmake/IOS/PxPvdSDK.cmake @@ -0,0 +1,24 @@ +# +# Build PxPvdSDK +# + +SET(PXSHARED_SOURCE_DIR ${PROJECT_SOURCE_DIR}/../../../../src) + +SET(LL_SOURCE_DIR ${PXSHARED_SOURCE_DIR}/pvd) + +SET(PXPVDSDK_LIBTYPE STATIC) + +# Use generator expressions to set config specific preprocessor definitions +SET(PXPVDSDK_COMPILE_DEFS + + # Common to all configurations + ${PXSHARED_IOS_COMPILE_DEFS} + + $<$<CONFIG:debug>:${PXSHARED_IOS_DEBUG_COMPILE_DEFS}> + $<$<CONFIG:checked>:${PXSHARED_IOS_CHECKED_COMPILE_DEFS}> + $<$<CONFIG:profile>:${PXSHARED_IOS_PROFILE_COMPILE_DEFS}> + $<$<CONFIG:release>:${PXSHARED_IOS_RELEASE_COMPILE_DEFS}> +) + +# include PxPvdSDK common +INCLUDE(../common/PxPvdSDK.cmake) diff --git a/PxShared/src/compiler/cmake/IOS/PxTask.cmake b/PxShared/src/compiler/cmake/IOS/PxTask.cmake new file mode 100644 index 0000000..197e241 --- /dev/null +++ b/PxShared/src/compiler/cmake/IOS/PxTask.cmake @@ -0,0 +1,18 @@ +# +# Build PxTask +# + +SET(PXSHARED_SOURCE_DIR ${PROJECT_SOURCE_DIR}/../../../../src) + +SET(LL_SOURCE_DIR ${PXSHARED_SOURCE_DIR}/task) + +SET(PXTASK_COMPILE_DEFS + ${PXSHARED_IOS_COMPILE_DEFS};PX_FOUNDATION_DLL=0; + $<$<CONFIG:debug>:${PXSHARED_IOS_DEBUG_COMPILE_DEFS};> + $<$<CONFIG:checked>:${PXSHARED_IOS_CHECKED_COMPILE_DEFS};> + $<$<CONFIG:profile>:${PXSHARED_IOS_PROFILE_COMPILE_DEFS};> + $<$<CONFIG:release>:${PXSHARED_IOS_RELEASE_COMPILE_DEFS};> +) + +# include PxTask common +INCLUDE(../common/PxTask.cmake)
\ No newline at end of file diff --git a/PxShared/src/compiler/cmake/Linux/CMakeLists.txt b/PxShared/src/compiler/cmake/Linux/CMakeLists.txt new file mode 100644 index 0000000..2fa592e --- /dev/null +++ b/PxShared/src/compiler/cmake/Linux/CMakeLists.txt @@ -0,0 +1,87 @@ +cmake_minimum_required(VERSION 3.3) +include(../common/CMakeLists.txt) + +STRING(TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_LOWERCASE) + +IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to Linux + SET(TARGET_BUILD_PLATFORM "Linux") +ENDIF() + +SET(PLATFORM_LIST Linux) + +IF (NOT ${TARGET_BUILD_PLATFORM} IN_LIST PLATFORM_LIST) + MESSAGE(FATAL_ERROR "Invalid platform:" ${TARGET_BUILD_PLATFORM}) +ENDIF() + +IF (${CMAKE_LIBRARY_ARCHITECTURE} STREQUAL "x86_64-unknown-linux-gnu" OR ${CMAKE_LIBRARY_ARCHITECTURE} STREQUAL "x86_64-linux-gnu") + IF ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") + # using Clang + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -Werror -ferror-limit=0 -Wall -Wextra -fstrict-aliasing -Wstrict-aliasing=2 -Weverything -Wno-documentation-deprecated-sync -Wno-documentation-unknown-command -Wno-float-equal -Wno-padded -Wno-weak-vtables -Wno-cast-align -Wno-conversion -Wno-missing-noreturn -Wno-missing-variable-declarations -Wno-shift-sign-overflow -Wno-covered-switch-default -Wno-exit-time-destructors -Wno-global-constructors -Wno-missing-prototypes -Wno-unreachable-code -Wno-unused-macros -Wno-unused-member-function -Wno-used-but-marked-unused -Wno-weak-template-vtables -Wno-deprecated -Wno-non-virtual-dtor -Wno-invalid-noreturn -Wno-return-type-c-linkage -Wno-reserved-id-macro -Wno-c++98-compat-pedantic -Wno-unused-local-typedef -Wno-old-style-cast -Wno-newline-eof -Wno-unused-private-field -Wno-undefined-func-template -Wno-format-nonliteral -Wno-implicit-fallthrough -Wno-undefined-reinterpret-cast") + ELSEIF ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") + # using GCC + SET(LIBPATH_SUFFIX "x64") + SET(CMAKE_CXX_FLAGS "-Werror -m64 -fPIC -msse2 -mfpmath=sse -ffast-math -fno-exceptions -fno-rtti -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wextra -fno-strict-aliasing -fdiagnostics-show-option -Wno-invalid-offsetof -Wno-uninitialized -Wno-missing-field-initializers") + ENDIF("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") +ELSEIF(${CMAKE_LIBRARY_ARCHITECTURE} STREQUAL "arm-unknown-linux-gnueabihf") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -mfpu=neon -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -Werror -ferror-limit=0 -Wall -Wextra -fstrict-aliasing -Wstrict-aliasing=2 -Weverything -Wno-documentation-deprecated-sync -Wno-documentation-unknown-command -Wno-float-equal -Wno-padded -Wno-weak-vtables -Wno-cast-align -Wno-conversion -Wno-missing-noreturn -Wno-missing-variable-declarations -Wno-shift-sign-overflow -Wno-covered-switch-default -Wno-exit-time-destructors -Wno-global-constructors -Wno-missing-prototypes -Wno-unreachable-code -Wno-unused-macros -Wno-unused-member-function -Wno-used-but-marked-unused -Wno-weak-template-vtables -Wno-deprecated -Wno-non-virtual-dtor -Wno-old-style-cast -Wno-return-type-c-linkage -Wno-format-nonliteral -Wno-implicit-fallthrough") +ELSEIF(${CMAKE_LIBRARY_ARCHITECTURE} STREQUAL "aarch64-unknown-linux-gnueabi") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -Werror -ferror-limit=0 -Wall -Wextra -fstrict-aliasing -Wstrict-aliasing=2 -Weverything -Wno-documentation-deprecated-sync -Wno-documentation-unknown-command -Wno-float-equal -Wno-padded -Wno-weak-vtables -Wno-cast-align -Wno-conversion -Wno-missing-noreturn -Wno-missing-variable-declarations -Wno-shift-sign-overflow -Wno-covered-switch-default -Wno-exit-time-destructors -Wno-global-constructors -Wno-missing-prototypes -Wno-unreachable-code -Wno-unused-macros -Wno-unused-member-function -Wno-used-but-marked-unused -Wno-weak-template-vtables -Wno-deprecated -Wno-non-virtual-dtor -Wno-old-style-cast -Wno-return-type-c-linkage -Wno-format-nonliteral -Wno-unused-local-typedef -Wno-implicit-fallthrough") +ELSE(${CMAKE_LIBRARY_ARCHITECTURE} STREQUAL "x86_64-unknown-linux-gnu" OR ${CMAKE_LIBRARY_ARCHITECTURE} STREQUAL "x86_64-linux-gnu") + MESSAGE(FATAL_ERROR "Unknown CMAKE_LIBRARY_ARCHITECTURE ${CMAKE_LIBRARY_ARCHITECTURE}") +ENDIF(${CMAKE_LIBRARY_ARCHITECTURE} STREQUAL "x86_64-unknown-linux-gnu" OR ${CMAKE_LIBRARY_ARCHITECTURE} STREQUAL "x86_64-linux-gnu") + + +SET(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -gdwarf-3") +SET(CMAKE_CXX_FLAGS_CHECKED "-O3 -g -gdwarf-3") +SET(CMAKE_CXX_FLAGS_PROFILE "-O3 -g -gdwarf-3") +SET(CMAKE_CXX_FLAGS_RELEASE "-O3 -g -gdwarf-3") + +IF(DEFINED PX_GENERATE_GPU_PROJECTS) +SET(PXSHARED_LINUX_COMPILE_DEFS _LIB) +ELSE() +# Disable cuda and dx for all projects on windows +SET(PXSHARED_LINUX_COMPILE_DEFS _LIB;DISABLE_CUDA_PHYSX;) +ENDIF() +SET(PXSHARED_LINUX_DEBUG_COMPILE_DEFS _DEBUG;PX_DEBUG=1;PX_CHECKED=1) +SET(PXSHARED_LINUX_CHECKED_COMPILE_DEFS NDEBUG;PX_CHECKED=1) +SET(PXSHARED_LINUX_PROFILE_COMPILE_DEFS NDEBUG;PX_PROFILE=1) +SET(PXSHARED_LINUX_RELEASE_COMPILE_DEFS NDEBUG) + +IF(DEFINED LIBPATH_SUFFIX) + SET(CMAKE_DEBUG_POSTFIX "${CMAKE_DEBUG_POSTFIX}_${LIBPATH_SUFFIX}") + SET(CMAKE_PROFILE_POSTFIX "${CMAKE_PROFILE_POSTFIX}_${LIBPATH_SUFFIX}") + SET(CMAKE_CHECKED_POSTFIX "${CMAKE_CHECKED_POSTFIX}_${LIBPATH_SUFFIX}") + SET(CMAKE_RELEASE_POSTFIX "${CMAKE_RELEASE_POSTFIX}_${LIBPATH_SUFFIX}") +ENDIF() + +# NOTE: PxCudaContextManager excluded on this platform + +# Include project cmake files here +IF(DEFINED PX_SELECT_COMPONENTS) + if ("PxFoundation" IN_LIST PX_SELECT_COMPONENTS) + INCLUDE(PxFoundation.cmake) + endif() + if ("PsFastXml" IN_LIST PX_SELECT_COMPONENTS) + INCLUDE(PsFastXml.cmake) + endif() + if ("PxPvdSDK" IN_LIST PX_SELECT_COMPONENTS) + INCLUDE(PxPvdSDK.cmake) + endif() + if ("PxTask" IN_LIST PX_SELECT_COMPONENTS) + INCLUDE(PxTask.cmake) + endif() + if ("PxCudaContextManager" IN_LIST PX_SELECT_COMPONENTS) + IF(DEFINED PX_GENERATE_GPU_PROJECTS) + INCLUDE(PxCudaContextManager.cmake) + ENDIF() + endif() +ELSE() +INCLUDE(PxFoundation.cmake) +INCLUDE(PsFastXml.cmake) +INCLUDE(PxPvdSDK.cmake) +INCLUDE(PxTask.cmake) +IF(DEFINED PX_GENERATE_GPU_PROJECTS) + INCLUDE(PxCudaContextManager.cmake) +ENDIF() +ENDIF() + diff --git a/PxShared/src/compiler/cmake/Linux/PsFastXml.cmake b/PxShared/src/compiler/cmake/Linux/PsFastXml.cmake new file mode 100644 index 0000000..3c90c49 --- /dev/null +++ b/PxShared/src/compiler/cmake/Linux/PsFastXml.cmake @@ -0,0 +1,40 @@ +# +# Build PsFastXml +# + +SET(PXSHARED_SOURCE_DIR ${PROJECT_SOURCE_DIR}/../../../../src) + +SET(LL_SOURCE_DIR ${PXSHARED_SOURCE_DIR}/fastxml) + +# Use generator expressions to set config specific preprocessor definitions +SET(PSFASTXML_COMPILE_DEFS + # Common to all configurations + ${PXSHARED_LINUX_COMPILE_DEFS};PX_FOUNDATION_DLL=0; +) + +if(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "debug") + LIST(APPEND PSFASTXML_COMPILE_DEFS + ${PXSHARED_LINUX_DEBUG_COMPILE_DEFS} + ) +elseif(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "checked") + LIST(APPEND PSFASTXML_COMPILE_DEFS + ${PXSHARED_LINUX_CHECKED_COMPILE_DEFS} + ) +elseif(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "profile") + LIST(APPEND PSFASTXML_COMPILE_DEFS + ${PXSHARED_LINUX_PROFILE_COMPILE_DEFS} + ) +elseif(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL release) + LIST(APPEND PSFASTXML_COMPILE_DEFS + ${PXSHARED_LINUX_RELEASE_COMPILE_DEFS} + ) +else(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "debug") + MESSAGE(FATAL_ERROR "Unknown configuration ${CMAKE_BUILD_TYPE}") +endif(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "debug") + + +# include PsFastXml common +INCLUDE(../common/PsFastXml.cmake) + +# enable -fPIC so we can link static libs with the editor +SET_TARGET_PROPERTIES(PsFastXml PROPERTIES POSITION_INDEPENDENT_CODE TRUE) diff --git a/PxShared/src/compiler/cmake/Linux/PxCudaContextManager.cmake b/PxShared/src/compiler/cmake/Linux/PxCudaContextManager.cmake new file mode 100644 index 0000000..3454323 --- /dev/null +++ b/PxShared/src/compiler/cmake/Linux/PxCudaContextManager.cmake @@ -0,0 +1,29 @@ +# +# Build PxCudaContextManager +# + +FIND_PACKAGE(CUDA REQUIRED) + +SET(PXSHARED_SOURCE_DIR ${PROJECT_SOURCE_DIR}/../../../../src) + +SET(LL_SOURCE_DIR ${PXSHARED_SOURCE_DIR}/cudamanager) + +SET(CUDA_COMPILER_OPTION_DEBUG "--compiler-options=-Wall,-O3,-fPIC,-msse2,-mfpmath=sse,-malign-double,-m64,-fvisibility=hidden") +SET(CUDA_COMPILER_OPTION_CHECKED "--compiler-options=-Wall,-O3,-fPIC,-msse2,-mfpmath=sse,-malign-double,-m64,-fvisibility=hidden") +SET(CUDA_COMPILER_OPTION_PROFILE "--compiler-options=-Wall,-O3,-fPIC,-msse2,-mfpmath=sse,-malign-double,-m64,-fvisibility=hidden") +SET(CUDA_COMPILER_OPTION_RELEASE "--compiler-options=-Wall,-O3,-fPIC,-msse2,-mfpmath=sse,-malign-double,-m64,-fvisibility=hidden") + +# include PxCudaContextManager common +INCLUDE(../common/PxCudaContextManager.cmake) + +# Use generator expressions to set config specific preprocessor definitions +TARGET_COMPILE_DEFINITIONS(PxCudaContextManager + + # Common to all configurations + PRIVATE ${PXSHARED_LINUX_COMPILE_DEFS}; + + PRIVATE $<$<CONFIG:debug>:${PXSHARED_LINUX_DEBUG_COMPILE_DEFS};> + PRIVATE $<$<CONFIG:checked>:${PXSHARED_LINUX_CHECKED_COMPILE_DEFS};> + PRIVATE $<$<CONFIG:profile>:${PXSHARED_LINUX_PROFILE_COMPILE_DEFS};> + PRIVATE $<$<CONFIG:release>:${PXSHARED_LINUX_RELEASE_COMPILE_DEFS};> +) diff --git a/PxShared/src/compiler/cmake/Linux/PxFoundation.cmake b/PxShared/src/compiler/cmake/Linux/PxFoundation.cmake new file mode 100644 index 0000000..f074805 --- /dev/null +++ b/PxShared/src/compiler/cmake/Linux/PxFoundation.cmake @@ -0,0 +1,67 @@ +# +# Build PxFoundation +# + +SET(PXSHARED_SOURCE_DIR ${PROJECT_SOURCE_DIR}/../../../../src) + +SET(LL_SOURCE_DIR ${PXSHARED_SOURCE_DIR}/foundation) + +IF(DEFINED PX_STATIC_LIBRARIES) + SET(PXFOUNDATION_LIBTYPE STATIC) +ELSE() + SET(PXFOUNDATION_LIBTYPE SHARED) + SET(PXFOUNDATION_SHARED_LIBRARY_DEFS PX_PVDSDK_DLL=1;PX_FOUNDATION_DLL=1;) +ENDIF() + +SET(PXFOUNDATION_PLATFORM_FILES + ${LL_SOURCE_DIR}/src/unix/PsUnixAtomic.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixCpu.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixFPU.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixMutex.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixPrintString.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixSList.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixSocket.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixSync.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixThread.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixTime.cpp +) + +SET(PXFOUNDATION_PLATFORM_INCLUDES + ${LL_SOURCE_DIR}/include/linux +) + +SET(PXFOUNDATION_COMPILE_DEFS + # Common to all configurations + ${PXSHARED_LINUX_COMPILE_DEFS} +) + +if(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "debug") + LIST(APPEND PXFOUNDATION_COMPILE_DEFS + ${PXSHARED_LINUX_DEBUG_COMPILE_DEFS};${PXFOUNDATION_SHARED_LIBRARY_DEFS}; + ) +elseif(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "checked") + LIST(APPEND PXFOUNDATION_COMPILE_DEFS + ${PXSHARED_LINUX_CHECKED_COMPILE_DEFS};${PXFOUNDATION_SHARED_LIBRARY_DEFS}; + ) +elseif(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "profile") + LIST(APPEND PXFOUNDATION_COMPILE_DEFS + ${PXSHARED_LINUX_PROFILE_COMPILE_DEFS};${PXFOUNDATION_SHARED_LIBRARY_DEFS}; + ) +elseif(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL release) + LIST(APPEND PXFOUNDATION_COMPILE_DEFS + ${PXSHARED_LINUX_RELEASE_COMPILE_DEFS};${PXFOUNDATION_SHARED_LIBRARY_DEFS}; + ) +else(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "debug") + MESSAGE(FATAL_ERROR "Unknown configuration ${CMAKE_BUILD_TYPE}") +endif(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "debug") + + +# include PxFoundation common +INCLUDE(../common/PxFoundation.cmake) + +IF(NOT DEFINED PX_STATIC_LIBRARIES) +TARGET_LINK_LIBRARIES(PxFoundation PUBLIC rt) +ENDIF() + +# enable -fPIC so we can link static libs with the editor +SET_TARGET_PROPERTIES(PxFoundation PROPERTIES POSITION_INDEPENDENT_CODE TRUE) diff --git a/PxShared/src/compiler/cmake/Linux/PxPvdSDK.cmake b/PxShared/src/compiler/cmake/Linux/PxPvdSDK.cmake new file mode 100644 index 0000000..81692a0 --- /dev/null +++ b/PxShared/src/compiler/cmake/Linux/PxPvdSDK.cmake @@ -0,0 +1,53 @@ +# +# Build PxPvdSDK +# + +SET(PXSHARED_SOURCE_DIR ${PROJECT_SOURCE_DIR}/../../../../src) + +SET(LL_SOURCE_DIR ${PXSHARED_SOURCE_DIR}/pvd) + +IF(DEFINED PX_STATIC_LIBRARIES) + SET(PXPVDSDK_LIBTYPE STATIC) +ELSE() + SET(PXPVDSDK_LIBTYPE SHARED) + SET(PXPVDSDK_SHARED_LIBRARY_DEFS PX_PVDSDK_DLL=1;PX_FOUNDATION_DLL=1;) +ENDIF() + +# Use generator expressions to set config specific preprocessor definitions +SET(PXPVDSDK_COMPILE_DEFS + # Common to all configurations + ${PXSHARED_LINUX_COMPILE_DEFS} +) + +if(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "debug") + LIST(APPEND PXPVDSDK_COMPILE_DEFS + ${PXSHARED_LINUX_DEBUG_COMPILE_DEFS};${PXPVDSDK_SHARED_LIBRARY_DEFS} + ) +elseif(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "checked") + LIST(APPEND PXPVDSDK_COMPILE_DEFS + ${PXSHARED_LINUX_CHECKED_COMPILE_DEFS};${PXPVDSDK_SHARED_LIBRARY_DEFS} + ) +elseif(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "profile") + LIST(APPEND PXPVDSDK_COMPILE_DEFS + ${PXSHARED_LINUX_PROFILE_COMPILE_DEFS};${PXPVDSDK_SHARED_LIBRARY_DEFS} + ) +elseif(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL release) + LIST(APPEND PXPVDSDK_COMPILE_DEFS + ${PXSHARED_LINUX_RELEASE_COMPILE_DEFS};${PXPVDSDK_SHARED_LIBRARY_DEFS} + ) +else(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "debug") + MESSAGE(FATAL_ERROR "Unknown configuration ${CMAKE_BUILD_TYPE}") +endif(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "debug") + +# include PxPvdSDK common +INCLUDE(../common/PxPvdSDK.cmake) + +# Add linked libraries +IF(DEFINED PX_STATIC_LIBRARIES) +TARGET_LINK_LIBRARIES(PxPvdSDK PRIVATE PxFoundation ) +ELSE() +TARGET_LINK_LIBRARIES(PxPvdSDK PRIVATE PxFoundation rt) +ENDIF() + +# enable -fPIC so we can link static libs with the editor +SET_TARGET_PROPERTIES(PxPvdSDK PROPERTIES POSITION_INDEPENDENT_CODE TRUE) diff --git a/PxShared/src/compiler/cmake/Linux/PxTask.cmake b/PxShared/src/compiler/cmake/Linux/PxTask.cmake new file mode 100644 index 0000000..86689c4 --- /dev/null +++ b/PxShared/src/compiler/cmake/Linux/PxTask.cmake @@ -0,0 +1,43 @@ +# +# Build PxTask +# + +SET(PXSHARED_SOURCE_DIR ${PROJECT_SOURCE_DIR}/../../../../src) + +SET(LL_SOURCE_DIR ${PXSHARED_SOURCE_DIR}/task) + +SET(PXTASK_COMPILE_DEFS + ${PXSHARED_LINUX_COMPILE_DEFS}; +) + +if(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "debug") + LIST(APPEND PXTASK_COMPILE_DEFS + ${PXSHARED_LINUX_DEBUG_COMPILE_DEFS} + ) +elseif(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "checked") + LIST(APPEND PXTASK_COMPILE_DEFS + ${PXSHARED_LINUX_CHECKED_COMPILE_DEFS} + ) +elseif(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "profile") + LIST(APPEND PXTASK_COMPILE_DEFS + ${PXSHARED_LINUX_PROFILE_COMPILE_DEFS} + ) +elseif(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL release) + LIST(APPEND PXTASK_COMPILE_DEFS + ${PXSHARED_LINUX_RELEASE_COMPILE_DEFS} + ) +else(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "debug") + MESSAGE(FATAL_ERROR "Unknown configuration ${CMAKE_BUILD_TYPE}") +endif(${CMAKE_BUILD_TYPE_LOWERCASE} STREQUAL "debug") + +IF(DEFINED PX_STATIC_LIBRARIES) + SET(PXTASK_LIBTYPE OBJECT) +ELSE() + SET(PXTASK_LIBTYPE STATIC) +ENDIF() + +# include PxTask common +INCLUDE(../common/PxTask.cmake) + +# enable -fPIC so we can link static libs with the editor +SET_TARGET_PROPERTIES(PxTask PROPERTIES POSITION_INDEPENDENT_CODE TRUE) diff --git a/PxShared/src/compiler/cmake/Mac/CMakeLists.txt b/PxShared/src/compiler/cmake/Mac/CMakeLists.txt new file mode 100644 index 0000000..beb06bc --- /dev/null +++ b/PxShared/src/compiler/cmake/Mac/CMakeLists.txt @@ -0,0 +1,48 @@ +cmake_minimum_required(VERSION 3.3) +include(../common/CMakeLists.txt) + + +IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to Mac + SET(TARGET_BUILD_PLATFORM "Mac") +ENDIF() + +SET(PLATFORM_LIST Mac) + +IF (NOT ${TARGET_BUILD_PLATFORM} IN_LIST PLATFORM_LIST) + MESSAGE(FATAL_ERROR "Invalid platform:" ${TARGET_BUILD_PLATFORM}) +ENDIF() + +SET(CMAKE_CXX_FLAGS "-msse2 -std=c++11 -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -Werror -ferror-limit=0 -Wall -Wextra -fstrict-aliasing -Wstrict-aliasing=2 -Weverything -Wno-documentation-deprecated-sync -Wno-documentation-unknown-command -Wno-float-equal -Wno-padded -Wno-weak-vtables -Wno-cast-align -Wno-conversion -Wno-missing-noreturn -Wno-missing-variable-declarations -Wno-shift-sign-overflow -Wno-covered-switch-default -Wno-exit-time-destructors -Wno-global-constructors -Wno-missing-prototypes -Wno-unreachable-code -Wno-unused-macros -Wno-unused-member-function -Wno-used-but-marked-unused -Wno-weak-template-vtables -Wno-deprecated -Wno-non-virtual-dtor -Wno-invalid-noreturn -Wno-return-type-c-linkage -Wno-reserved-id-macro -Wno-c++98-compat-pedantic -Wno-unused-local-typedef -Wno-old-style-cast -Wno-newline-eof -Wno-unused-private-field -Wno-undefined-reinterpret-cast -Wno-invalid-offsetof -gdwarf-2") + +IF (DEFINED PX_32BIT) +SET(CMAKE_CXX_FLAGS "-arch i386 ${CMAKE_CXX_FLAGS}") +ENDIF() +IF (DEFINED PX_64BIT) +SET(CMAKE_CXX_FLAGS "-arch x86_64 ${CMAKE_CXX_FLAGS}") +ENDIF() + +SET(CMAKE_SHARED_LINKER_FLAGS "") + +SET(CMAKE_CXX_FLAGS_DEBUG "-O0 -g") +SET(CMAKE_CXX_FLAGS_CHECKED "-O3 -g") +SET(CMAKE_CXX_FLAGS_PROFILE "-O3 -g") +SET(CMAKE_CXX_FLAGS_RELEASE "-O3 -g") + +# Build libs compatible with OS X 10.9 +SET(CMAKE_OSX_DEPLOYMENT_TARGET "10.9") + +#set(CMAKE_XCODE_ATTRIBUTE_DEBUG_INFORMATION_FORMAT "dwarf-with-dsym") + +SET(PXSHARED_MAC_COMPILE_DEFS _LIB;DISABLE_CUDA_PHYSX;DISABLE_COMPUTE_PHYSX) +SET(PXSHARED_MAC_DEBUG_COMPILE_DEFS _DEBUG;PX_DEBUG=1;PX_CHECKED=1) +SET(PXSHARED_MAC_CHECKED_COMPILE_DEFS NDEBUG;PX_CHECKED=1) +SET(PXSHARED_MAC_PROFILE_COMPILE_DEFS NDEBUG;PX_PROFILE=1) +SET(PXSHARED_MAC_RELEASE_COMPILE_DEFS NDEBUG) + +# NOTE: PxCudaContextManager excluded on this platform + +# Include project cmake files here +INCLUDE(PxFoundation.cmake) +INCLUDE(PsFastXml.cmake) +INCLUDE(PxPvdSDK.cmake) +INCLUDE(PxTask.cmake) diff --git a/PxShared/src/compiler/cmake/Mac/PsFastXml.cmake b/PxShared/src/compiler/cmake/Mac/PsFastXml.cmake new file mode 100644 index 0000000..7f140e0 --- /dev/null +++ b/PxShared/src/compiler/cmake/Mac/PsFastXml.cmake @@ -0,0 +1,22 @@ +# +# Build PsFastXml +# + +SET(PXSHARED_SOURCE_DIR ${PROJECT_SOURCE_DIR}/../../../../src) + +SET(LL_SOURCE_DIR ${PXSHARED_SOURCE_DIR}/fastxml) + +# Use generator expressions to set config specific preprocessor definitions +SET(PSFASTXML_COMPILE_DEFS + + # Common to all configurations + ${PXSHARED_MAC_COMPILE_DEFS};PX_FOUNDATION_DLL=0; + + $<$<CONFIG:debug>:${PXSHARED_MAC_DEBUG_COMPILE_DEFS};> + $<$<CONFIG:checked>:${PXSHARED_MAC_CHECKED_COMPILE_DEFS};> + $<$<CONFIG:profile>:${PXSHARED_MAC_PROFILE_COMPILE_DEFS};> + $<$<CONFIG:release>:${PXSHARED_MAC_RELEASE_COMPILE_DEFS};> +) + +# include PsFastXml common +INCLUDE(../common/PsFastXml.cmake)
\ No newline at end of file diff --git a/PxShared/src/compiler/cmake/Mac/PxFoundation.cmake b/PxShared/src/compiler/cmake/Mac/PxFoundation.cmake new file mode 100644 index 0000000..2a21910 --- /dev/null +++ b/PxShared/src/compiler/cmake/Mac/PxFoundation.cmake @@ -0,0 +1,40 @@ +# +# Build PxFoundation +# + +SET(PXSHARED_SOURCE_DIR ${PROJECT_SOURCE_DIR}/../../../../src) + +SET(LL_SOURCE_DIR ${PXSHARED_SOURCE_DIR}/foundation) + +SET(PXFOUNDATION_LIBTYPE SHARED) + +SET(PXFOUNDATION_PLATFORM_FILES + ${LL_SOURCE_DIR}/src/unix/PsUnixAtomic.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixCpu.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixFPU.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixMutex.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixPrintString.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixSList.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixSocket.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixSync.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixThread.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixTime.cpp +) + +SET(PXFOUNDATION_PLATFORM_INCLUDES + ${LL_SOURCE_DIR}/include/mac +) + +SET(PXFOUNDATION_COMPILE_DEFS + + # Common to all configurations + ${PXSHARED_MAC_COMPILE_DEFS} + + $<$<CONFIG:debug>:${PXSHARED_MAC_DEBUG_COMPILE_DEFS}> + $<$<CONFIG:checked>:${PXSHARED_MAC_CHECKED_COMPILE_DEFS}> + $<$<CONFIG:profile>:${PXSHARED_MAC_PROFILE_COMPILE_DEFS}> + $<$<CONFIG:release>:${PXSHARED_MAC_RELEASE_COMPILE_DEFS}> +) + +# include PxFoundation common +INCLUDE(../common/PxFoundation.cmake)
\ No newline at end of file diff --git a/PxShared/src/compiler/cmake/Mac/PxPvdSDK.cmake b/PxShared/src/compiler/cmake/Mac/PxPvdSDK.cmake new file mode 100644 index 0000000..c236882 --- /dev/null +++ b/PxShared/src/compiler/cmake/Mac/PxPvdSDK.cmake @@ -0,0 +1,28 @@ +# +# Build PxPvdSDK +# + +SET(PXSHARED_SOURCE_DIR ${PROJECT_SOURCE_DIR}/../../../../src) + +SET(LL_SOURCE_DIR ${PXSHARED_SOURCE_DIR}/pvd) + +SET(PXPVDSDK_LIBTYPE SHARED) + +# Use generator expressions to set config specific preprocessor definitions +SET(PXPVDSDK_COMPILE_DEFS + + # Common to all configurations + ${PXSHARED_MAC_COMPILE_DEFS} + + $<$<CONFIG:debug>:${PXSHARED_MAC_DEBUG_COMPILE_DEFS}> + $<$<CONFIG:checked>:${PXSHARED_MAC_CHECKED_COMPILE_DEFS}> + $<$<CONFIG:profile>:${PXSHARED_MAC_PROFILE_COMPILE_DEFS}> + $<$<CONFIG:release>:${PXSHARED_MAC_RELEASE_COMPILE_DEFS}> +) + +# include PxPvdSDK common +INCLUDE(../common/PxPvdSDK.cmake) + +# Add linked libraries +TARGET_LINK_LIBRARIES(PxPvdSDK PRIVATE PxFoundation) + diff --git a/PxShared/src/compiler/cmake/Mac/PxTask.cmake b/PxShared/src/compiler/cmake/Mac/PxTask.cmake new file mode 100644 index 0000000..2326a1f --- /dev/null +++ b/PxShared/src/compiler/cmake/Mac/PxTask.cmake @@ -0,0 +1,18 @@ +# +# Build PxTask +# + +SET(PXSHARED_SOURCE_DIR ${PROJECT_SOURCE_DIR}/../../../../src) + +SET(LL_SOURCE_DIR ${PXSHARED_SOURCE_DIR}/task) + +SET(PXTASK_COMPILE_DEFS + ${PXSHARED_MAC_COMPILE_DEFS};PX_FOUNDATION_DLL=0; + $<$<CONFIG:debug>:${PXSHARED_MAC_DEBUG_COMPILE_DEFS};> + $<$<CONFIG:checked>:${PXSHARED_MAC_CHECKED_COMPILE_DEFS};> + $<$<CONFIG:profile>:${PXSHARED_MAC_PROFILE_COMPILE_DEFS};> + $<$<CONFIG:release>:${PXSHARED_MAC_RELEASE_COMPILE_DEFS};> +) + +# include PxTask common +INCLUDE(../common/PxTask.cmake)
\ No newline at end of file diff --git a/PxShared/src/compiler/cmake/Modules/FindPxShared.cmake b/PxShared/src/compiler/cmake/Modules/FindPxShared.cmake new file mode 100644 index 0000000..ea90ab6 --- /dev/null +++ b/PxShared/src/compiler/cmake/Modules/FindPxShared.cmake @@ -0,0 +1,18 @@ +# - Try to find PxShared +# Once done this will define +# PXSHARED_FOUND - System has PxShared +# PXSHARED_INCLUDE_DIRS - The PxShared include directories + +# NOTE: We're including a version in this, but the first hint is without one - we should use that! +FIND_PATH( PXSHARED_INCLUDE_DIRS include/cudamanager/PxGpuCopyDesc.h + HINTS + ${GW_DEPS_ROOT}/PxShared + ${GW_DEPS_ROOT}/sw/physx/PxShared/1.0/trunk/ + ) + +MESSAGE(${PXSHARED_INCLUDE_DIRS}) + +INCLUDE(FindPackageHandleStandardArgs) +FIND_PACKAGE_HANDLE_STANDARD_ARGS(PxShared DEFAULT_MSG PXSHARED_INCLUDE_DIRS) + +mark_as_advanced(PXSHARED_INCLUDE_DIRS) diff --git a/PxShared/src/compiler/cmake/Modules/FindnvToolsExt.cmake b/PxShared/src/compiler/cmake/Modules/FindnvToolsExt.cmake new file mode 100644 index 0000000..c1675ae --- /dev/null +++ b/PxShared/src/compiler/cmake/Modules/FindnvToolsExt.cmake @@ -0,0 +1,38 @@ +# - Try to find nvToolsExt +# Once done this will define +# NVTOOLSEXT_FOUND - System has nvToolsExt +# NVTOOLSEXT_INCLUDE_DIRS - The nvToolsExt include directories +# NVTOOLSEXT_LIBRARIES - The libraries needed to use nvToolsExt +# NVTOOLSEXT_DEFINITIONS - Compiler switches required for using nvToolsExt + +FIND_PATH( NVTOOLSEXT_INCLUDE_DIRS nvToolsExt.h + HINTS + ${GW_DEPS_ROOT}/PhysX_3.4/externals/nvToolsExt + ${GW_DEPS_ROOT}/sw/physx/externals/nvToolsExt/1 + PATH_SUFFIXES include) + +INCLUDE(FindPackageHandleStandardArgs) + +IF(TARGET_BUILD_PLATFORM STREQUAL "Windows") + # NOTE: Doesn't make sense for all platforms - ARM + IF(CMAKE_CL_64) + SET(NVTOOLSEXT_LIBNAME "nvToolsExt64_1") + SET(NVTOOLSEXT_LIBPATH_SUFFIX "x64") + ELSE(CMAKE_CL_64) + SET(NVTOOLSEXT_LIBNAME nvToolsExt32_1) + SET(NVTOOLSEXT_LIBPATH_SUFFIX "Win32") + ENDIF(CMAKE_CL_64) + + + FIND_LIBRARY( NVTOOLSEXT_LIBRARIES ${NVTOOLSEXT_LIBNAME} + ${GW_DEPS_ROOT}/PhysX_3.4/externals/nvToolsExt/lib/${NVTOOLSEXT_LIBPATH_SUFFIX} + ${GW_DEPS_ROOT}/sw/physx/externals/nvToolsExt/1/lib/${NVTOOLSEXT_LIBPATH_SUFFIX} + ) + + FIND_PACKAGE_HANDLE_STANDARD_ARGS(nvToolsExt DEFAULT_MSG NVTOOLSEXT_LIBRARIES NVTOOLSEXT_INCLUDE_DIRS) +ELSE() + # Exclude the libraries for non-windows platforms + FIND_PACKAGE_HANDLE_STANDARD_ARGS(nvToolsExt DEFAULT_MSG NVTOOLSEXT_INCLUDE_DIRS) +ENDIF() + +mark_as_advanced(NVTOOLSEXT_INCLUDE_DIRS NVTOOLSEXT_LIBRARIES)
\ No newline at end of file diff --git a/PxShared/src/compiler/cmake/RegenProjects.bat b/PxShared/src/compiler/cmake/RegenProjects.bat new file mode 100644 index 0000000..bdde57f --- /dev/null +++ b/PxShared/src/compiler/cmake/RegenProjects.bat @@ -0,0 +1,4 @@ +rmdir CMakeFiles /s /q +del CMakeCache.txt +cmake ../../.. -A x64 + diff --git a/PxShared/src/compiler/cmake/common/CMakeLists.txt b/PxShared/src/compiler/cmake/common/CMakeLists.txt new file mode 100644 index 0000000..22d2097 --- /dev/null +++ b/PxShared/src/compiler/cmake/common/CMakeLists.txt @@ -0,0 +1,84 @@ +cmake_minimum_required(VERSION 3.3) + +PROJECT(PxShared CXX) + +CMAKE_POLICY(SET CMP0057 NEW) # Enable IN_LIST + +IF(DEFINED ENV{GW_DEPS_ROOT}) + SET(GW_DEPS_ROOT $ENV{GW_DEPS_ROOT}) + + SET(CMAKE_MODULE_PATH $ENV{GW_DEPS_ROOT}/sw/physx/tools/CMakeModules) + + IF(EXISTS $ENV{GW_DEPS_ROOT}/Externals/CMakeModules) + SET(CMAKE_MODULE_PATH $ENV{GW_DEPS_ROOT}/Externals/CMakeModules) + ENDIF() +ENDIF() + +# Add find modules to the path +IF(NOT EXISTS ${CMAKE_MODULE_PATH}) + MESSAGE(FATAL_ERROR "Could not find CMakeModules at ${CMAKE_MODULE_PATH}") +ENDIF() + +MESSAGE("PxShared Build Platform: " ${TARGET_BUILD_PLATFORM}) +MESSAGE("Using CXX Compiler: " ${CMAKE_CXX_COMPILER}) + +# TODO: Fail if we didn't find deps + +# Tell MSVC to stop doing MBCS +#ADD_DEFINITIONS(-D_UNICODE -DUNICODE) + +IF(CMAKE_CONFIGURATION_TYPES) + SET(CMAKE_CONFIGURATION_TYPES debug checked profile release) + SET(CMAKE_CONFIGURATION_TYPES "${CMAKE_CONFIGURATION_TYPES}" CACHE STRING + "Reset config to what we need" + FORCE) + + SET(CMAKE_SHARED_LINKER_FLAGS_CHECKED "") + SET(CMAKE_SHARED_LINKER_FLAGS_PROFILE "") + + # Build PDBs for all configurations + SET(CMAKE_SHARED_LINKER_FLAGS "/DEBUG") + +ENDIF() + +# Default to appending "DEBUG", "PROFILE", etc to produced artifacts +IF(NOT DEFINED APPEND_CONFIG_NAME) + SET(APPEND_CONFIG_NAME ON) +ENDIF() + +IF (APPEND_CONFIG_NAME) + MESSAGE("Appending config to output names") + + SET(CMAKE_DEBUG_POSTFIX "DEBUG") + SET(CMAKE_PROFILE_POSTFIX "PROFILE") + SET(CMAKE_CHECKED_POSTFIX "CHECKED") + SET(CMAKE_RELEASE_POSTFIX "") +ENDIF() + +SET(PROJECT_ROOT_DIR ${PROJECT_SOURCE_DIR}/../../../../) + +INCLUDE(SetOutputPaths) + +IF(DEFINED PX_OUTPUT_EXE_DIR) + SetExeOutputPath(${PX_OUTPUT_EXE_DIR}) +ENDIF() +IF(DEFINED PX_OUTPUT_DLL_DIR) + SetDllOutputPath(${PX_OUTPUT_DLL_DIR}) +ENDIF() +IF(DEFINED PX_OUTPUT_LIB_DIR) + SetLibOutputPath(${PX_OUTPUT_LIB_DIR}) +ENDIF() +# All EXE/DLL/LIB output will be overwritten if PX_OUTPUT_ALL_DIR is defined +IF(DEFINED PX_OUTPUT_ALL_DIR) + SetSingleOutputPath(${PX_OUTPUT_ALL_DIR}) +ENDIF() + +# Prevent failure due to command line limitations +IF(USE_RESPONSE_FILES) + SET(CMAKE_C_USE_RESPONSE_FILE_FOR_OBJECTS 1) + SET(CMAKE_C_USE_RESPONSE_FILE_FOR_INCLUDES 1) + SET(CMAKE_C_USE_RESPONSE_FILE_FOR_LIBRARIES 1) + SET(CMAKE_CXX_USE_RESPONSE_FILE_FOR_OBJECTS 1) + SET(CMAKE_CXX_USE_RESPONSE_FILE_FOR_INCLUDES 1) + SET(CMAKE_CXX_USE_RESPONSE_FILE_FOR_LIBRARIES 1) +ENDIF() diff --git a/PxShared/src/compiler/cmake/common/PsFastXml.cmake b/PxShared/src/compiler/cmake/common/PsFastXml.cmake new file mode 100644 index 0000000..06c1282 --- /dev/null +++ b/PxShared/src/compiler/cmake/common/PsFastXml.cmake @@ -0,0 +1,37 @@ +# +# Build PsFastXml common +# + +SET(PSFASTXML_HEADERS + ${LL_SOURCE_DIR}/include/PsFastXml.h +) +SOURCE_GROUP(include FILES ${PSFASTXML_HEADERS}) + +SET(PSFASTXML_SOURCE + ${LL_SOURCE_DIR}/src/PsFastXml.cpp +) +SOURCE_GROUP(src FILES ${PSFASTXML_SOURCE}) + +ADD_LIBRARY(PsFastXml STATIC + ${PSFASTXML_HEADERS} + ${PSFASTXML_SOURCE} +) + +TARGET_INCLUDE_DIRECTORIES(PsFastXml + PRIVATE ${PXSHARED_SOURCE_DIR}/../include + PRIVATE ${PXSHARED_SOURCE_DIR}/foundation/include + PRIVATE ${LL_SOURCE_DIR}/include + + PRIVATE ${PLATFORM_INCLUDES} +) + +TARGET_COMPILE_DEFINITIONS(PsFastXml + PRIVATE ${PSFASTXML_COMPILE_DEFS} +) + +SET_TARGET_PROPERTIES(PsFastXml PROPERTIES + COMPILE_PDB_NAME_DEBUG "PsFastXml${CMAKE_DEBUG_POSTFIX}" + COMPILE_PDB_NAME_CHECKED "PsFastXml${CMAKE_CHECKED_POSTFIX}" + COMPILE_PDB_NAME_PROFILE "PsFastXml${CMAKE_PROFILE_POSTFIX}" + COMPILE_PDB_NAME_RELEASE "PsFastXml${CMAKE_RELEASE_POSTFIX}" +)
\ No newline at end of file diff --git a/PxShared/src/compiler/cmake/common/PxCudaContextManager.cmake b/PxShared/src/compiler/cmake/common/PxCudaContextManager.cmake new file mode 100644 index 0000000..ab76997 --- /dev/null +++ b/PxShared/src/compiler/cmake/common/PxCudaContextManager.cmake @@ -0,0 +1,77 @@ +# +# Build PxCudaContextManager common +# + + +# CUDA! +SET(CUDA_NVCC_FLAGS "-lineinfo -use_fast_math -ftz=true -prec-div=false -prec-sqrt=false -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_50,code=compute_50 -D_CONSOLE -D_WIN32_WINNT=0x0501") + +CUDA_INCLUDE_DIRECTORIES( + ${PXSHARED_SOURCE_DIR}/../include + ${PXSHARED_SOURCE_DIR}/foundation/include + ${PXSHARED_SOURCE_DIR}/cudamanager/include +) + +SET(CUDA_PROPAGATE_HOST_FLAGS OFF) + +# Compile the CuKernelSolver - it has different options than the other CUDA files +SET(CUDA_NVCC_FLAGS_DEBUG "-D_DEBUG -D_CONSOLE -D_WIN32_WINNT=0x0501 ${CUDA_COMPILER_OPTION_DEBUG}") +SET(CUDA_NVCC_FLAGS_CHECKED "-DNDEBUG -D_CONSOLE -D_WIN32_WINNT=0x0501 ${CUDA_COMPILER_OPTION_CHECKED}") +SET(CUDA_NVCC_FLAGS_PROFILE "-DNDEBUG -D_CONSOLE -D_WIN32_WINNT=0x0501 ${CUDA_COMPILER_OPTION_PROFILE}") +SET(CUDA_NVCC_FLAGS_RELEASE "-DNDEBUG -D_CONSOLE -D_WIN32_WINNT=0x0501 ${CUDA_COMPILER_OPTION_RELEASE}") + +SET(CUDACONTEXTMANAGER_HEADERS + ${PXSHARED_SOURCE_DIR}/../include/cudamanager/PxCudaContextManager.h + ${PXSHARED_SOURCE_DIR}/../include/cudamanager/PxCudaMemoryManager.h + ${PXSHARED_SOURCE_DIR}/../include/cudamanager/PxGpuCopyDesc.h + ${PXSHARED_SOURCE_DIR}/../include/cudamanager/PxGpuCopyDescQueue.h +) +SOURCE_GROUP(include FILES ${CUDACONTEXTMANAGER_HEADERS}) + +SET(CUDACONTEXTMANAGER_KERNELS + ${LL_SOURCE_DIR}/src/CUDA/UtilKernels.cu +) +SOURCE_GROUP("src kernels" FILES ${CUDACONTEXTMANAGER_KERNELS}) + +SET(CUDACONTEXTMANAGER_SOURCE + ${LL_SOURCE_DIR}/src/CudaContextManager.cpp + ${LL_SOURCE_DIR}/src/CudaKernelWrangler.cpp + ${LL_SOURCE_DIR}/src/CudaMemoryManager.cpp + ${LL_SOURCE_DIR}/src/HeapManagerRef.cpp + ${LL_SOURCE_DIR}/src/GpuDispatcher.cpp + ${LL_SOURCE_DIR}/src/BlockingWait.cpp + ${LL_SOURCE_DIR}/src/PhysXDeviceSettings.cpp +) +SOURCE_GROUP(src\\src FILES ${CUDACONTEXTMANAGER_SOURCE}) + +SET(CUDACONTEXTMANAGER_SOURCE_HEADERS + ${LL_SOURCE_DIR}/include/CudaContextManager.h + ${LL_SOURCE_DIR}/include/CudaKernelWrangler.h + ${LL_SOURCE_DIR}/include/GpuDispatcher.h + ${LL_SOURCE_DIR}/include/PhysXDeviceSettings.h +) +SOURCE_GROUP(src\\src FILES ${CUDACONTEXTMANAGER_SOURCE_HEADERS}) + +CUDA_ADD_LIBRARY(PxCudaContextManager STATIC + ${CUDACONTEXTMANAGER_HEADERS} + ${CUDACONTEXTMANAGER_SOURCE} + ${CUDACONTEXTMANAGER_SOURCE_HEADERS} + + ${CUDACONTEXTMANAGER_KERNELS} +) + +# Target specific compile options + + +TARGET_INCLUDE_DIRECTORIES(PxCudaContextManager + PRIVATE ${PXSHARED_SOURCE_DIR}/../include + PRIVATE ${PXSHARED_SOURCE_DIR}/foundation/include + PRIVATE ${PXSHARED_SOURCE_DIR}/task/include + PRIVATE ${PXSHARED_SOURCE_DIR}/cudamanager/include + PRIVATE ${LL_SOURCE_DIR}/include + PRIVATE ${CUDA_INCLUDE_DIRS} + +) + + + diff --git a/PxShared/src/compiler/cmake/common/PxFoundation.cmake b/PxShared/src/compiler/cmake/common/PxFoundation.cmake new file mode 100644 index 0000000..99905e2 --- /dev/null +++ b/PxShared/src/compiler/cmake/common/PxFoundation.cmake @@ -0,0 +1,118 @@ +# +# Build PxFoundation common +# + +SET(PXFOUNDATION_HEADERS + ${PROJECT_SOURCE_DIR}/../../../../include/foundation/Px.h + ${PROJECT_SOURCE_DIR}/../../../../include/foundation/PxAllocatorCallback.h + ${PROJECT_SOURCE_DIR}/../../../../include/foundation/PxAssert.h + ${PROJECT_SOURCE_DIR}/../../../../include/foundation/PxBitAndData.h + ${PROJECT_SOURCE_DIR}/../../../../include/foundation/PxBounds3.h + ${PROJECT_SOURCE_DIR}/../../../../include/foundation/PxErrorCallback.h + ${PROJECT_SOURCE_DIR}/../../../../include/foundation/PxErrors.h + ${PROJECT_SOURCE_DIR}/../../../../include/foundation/PxFlags.h + ${PROJECT_SOURCE_DIR}/../../../../include/foundation/PxFoundation.h + ${PROJECT_SOURCE_DIR}/../../../../include/foundation/PxFoundationVersion.h + ${PROJECT_SOURCE_DIR}/../../../../include/foundation/PxIntrinsics.h + ${PROJECT_SOURCE_DIR}/../../../../include/foundation/PxIO.h + ${PROJECT_SOURCE_DIR}/../../../../include/foundation/PxMat33.h + ${PROJECT_SOURCE_DIR}/../../../../include/foundation/PxMat44.h + ${PROJECT_SOURCE_DIR}/../../../../include/foundation/PxMath.h + ${PROJECT_SOURCE_DIR}/../../../../include/foundation/PxMathUtils.h + ${PROJECT_SOURCE_DIR}/../../../../include/foundation/PxMemory.h + ${PROJECT_SOURCE_DIR}/../../../../include/foundation/PxPlane.h + ${PROJECT_SOURCE_DIR}/../../../../include/foundation/PxPreprocessor.h + ${PROJECT_SOURCE_DIR}/../../../../include/foundation/PxProfiler.h + ${PROJECT_SOURCE_DIR}/../../../../include/foundation/PxQuat.h + ${PROJECT_SOURCE_DIR}/../../../../include/foundation/PxSimpleTypes.h + ${PROJECT_SOURCE_DIR}/../../../../include/foundation/PxStrideIterator.h + ${PROJECT_SOURCE_DIR}/../../../../include/foundation/PxTransform.h + ${PROJECT_SOURCE_DIR}/../../../../include/foundation/PxUnionCast.h + ${PROJECT_SOURCE_DIR}/../../../../include/foundation/PxVec2.h + ${PROJECT_SOURCE_DIR}/../../../../include/foundation/PxVec3.h + ${PROJECT_SOURCE_DIR}/../../../../include/foundation/PxVec4.h +) +SOURCE_GROUP(include FILES ${PXFOUNDATION_HEADERS}) + +SET(PXFOUNDATION_SOURCE + ${LL_SOURCE_DIR}/src/PsAllocator.cpp + ${LL_SOURCE_DIR}/src/PsAssert.cpp + ${LL_SOURCE_DIR}/src/PsFoundation.cpp + ${LL_SOURCE_DIR}/src/PsMathUtils.cpp + ${LL_SOURCE_DIR}/src/PsString.cpp + ${LL_SOURCE_DIR}/src/PsTempAllocator.cpp + ${LL_SOURCE_DIR}/src/PsUtilities.cpp +) +SOURCE_GROUP(src\\src FILES ${PXFOUNDATION_SOURCE}) + +SET(PXFOUNDATION_SOURCE_HEADERS + ${LL_SOURCE_DIR}/include/Ps.h + ${LL_SOURCE_DIR}/include/PsAlignedMalloc.h + ${LL_SOURCE_DIR}/include/PsAlloca.h + ${LL_SOURCE_DIR}/include/PsAllocator.h + ${LL_SOURCE_DIR}/include/PsAoS.h + ${LL_SOURCE_DIR}/include/PsArray.h + ${LL_SOURCE_DIR}/include/PsAtomic.h + ${LL_SOURCE_DIR}/include/PsBasicTemplates.h + ${LL_SOURCE_DIR}/include/PsBitUtils.h + ${LL_SOURCE_DIR}/include/PsBroadcast.h + ${LL_SOURCE_DIR}/include/PsCpu.h + ${LL_SOURCE_DIR}/include/PsFoundation.h + ${LL_SOURCE_DIR}/include/PsFPU.h + ${LL_SOURCE_DIR}/include/PsHash.h + ${LL_SOURCE_DIR}/include/PsHashInternals.h + ${LL_SOURCE_DIR}/include/PsHashMap.h + ${LL_SOURCE_DIR}/include/PsHashSet.h + ${LL_SOURCE_DIR}/include/PsInlineAllocator.h + ${LL_SOURCE_DIR}/include/PsInlineAoS.h + ${LL_SOURCE_DIR}/include/PsInlineArray.h + ${LL_SOURCE_DIR}/include/PsIntrinsics.h + ${LL_SOURCE_DIR}/include/PsMathUtils.h + ${LL_SOURCE_DIR}/include/PsMutex.h + ${LL_SOURCE_DIR}/include/PsPool.h + ${LL_SOURCE_DIR}/include/PsSList.h + ${LL_SOURCE_DIR}/include/PsSocket.h + ${LL_SOURCE_DIR}/include/PsSort.h + ${LL_SOURCE_DIR}/include/PsSortInternals.h + ${LL_SOURCE_DIR}/include/PsString.h + ${LL_SOURCE_DIR}/include/PsSync.h + ${LL_SOURCE_DIR}/include/PsTempAllocator.h + ${LL_SOURCE_DIR}/include/PsThread.h + ${LL_SOURCE_DIR}/include/PsTime.h + ${LL_SOURCE_DIR}/include/PsUserAllocated.h + ${LL_SOURCE_DIR}/include/PsUtilities.h + ${LL_SOURCE_DIR}/include/PsVecMath.h + ${LL_SOURCE_DIR}/include/PsVecMathAoSScalar.h + ${LL_SOURCE_DIR}/include/PsVecMathAoSScalarInline.h + ${LL_SOURCE_DIR}/include/PsVecMathSSE.h + ${LL_SOURCE_DIR}/include/PsVecMathUtilities.h + ${LL_SOURCE_DIR}/include/PsVecQuat.h + ${LL_SOURCE_DIR}/include/PsVecTransform.h +) +SOURCE_GROUP(src\\include FILES ${PXFOUNDATION_SOURCE_HEADERS}) + +ADD_LIBRARY(PxFoundation ${PXFOUNDATION_LIBTYPE} + ${PXFOUNDATION_SOURCE} + ${PXFOUNDATION_SOURCE_HEADERS} + ${PXFOUNDATION_HEADERS} + + ${PXFOUNDATION_PLATFORM_FILES} +) + +TARGET_INCLUDE_DIRECTORIES(PxFoundation + PRIVATE ${PXSHARED_SOURCE_DIR}/../include + PRIVATE ${LL_SOURCE_DIR}/include + + PRIVATE ${PXFOUNDATION_PLATFORM_INCLUDES} +) + +TARGET_COMPILE_DEFINITIONS(PxFoundation + PRIVATE ${PXFOUNDATION_COMPILE_DEFS} +) + +SET_TARGET_PROPERTIES(PxFoundation PROPERTIES + COMPILE_PDB_NAME_DEBUG "PxFoundation${CMAKE_DEBUG_POSTFIX}" + COMPILE_PDB_NAME_CHECKED "PxFoundation${CMAKE_CHECKED_POSTFIX}" + COMPILE_PDB_NAME_PROFILE "PxFoundation${CMAKE_PROFILE_POSTFIX}" + COMPILE_PDB_NAME_RELEASE "PxFoundation${CMAKE_RELEASE_POSTFIX}" +)
\ No newline at end of file diff --git a/PxShared/src/compiler/cmake/common/PxPvdSDK.cmake b/PxShared/src/compiler/cmake/common/PxPvdSDK.cmake new file mode 100644 index 0000000..7014209 --- /dev/null +++ b/PxShared/src/compiler/cmake/common/PxPvdSDK.cmake @@ -0,0 +1,123 @@ +# +# Build PxPvdSDK common +# + +SET(PXPVDSDK_HEADERS + ${PROJECT_SOURCE_DIR}/../../../../include/pvd/PxPvd.h + ${PROJECT_SOURCE_DIR}/../../../../include/pvd/PxPvdTransport.h +) +SOURCE_GROUP(include FILES ${PXPVDSDK_HEADERS}) + +SET(PXPVDSDK_SOURCE + ${PXSHARED_SOURCE_DIR}/pvd/src/PxProfileBase.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxProfileCompileTimeEventFilter.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxProfileContextProvider.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxProfileContextProviderImpl.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxProfileDataBuffer.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxProfileDataParsing.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxProfileEventBuffer.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxProfileEventBufferAtomic.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxProfileEventBufferClient.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxProfileEventBufferClientManager.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxProfileEventFilter.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxProfileEventHandler.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxProfileEventId.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxProfileEventImpl.cpp + ${PXSHARED_SOURCE_DIR}/pvd/src/PxProfileEventMutex.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxProfileEventNames.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxProfileEventParser.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxProfileEvents.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxProfileEventSender.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxProfileEventSerialization.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxProfileEventSystem.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxProfileMemory.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxProfileMemoryBuffer.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxProfileMemoryEventBuffer.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxProfileMemoryEventParser.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxProfileMemoryEventRecorder.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxProfileMemoryEventReflexiveWriter.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxProfileMemoryEvents.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxProfileMemoryEventSummarizer.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxProfileMemoryEventTypes.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxProfileScopedEvent.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxProfileScopedMutexLock.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxProfileZone.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxProfileZoneImpl.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxProfileZoneManager.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxProfileZoneManagerImpl.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxPvd.cpp + ${PXSHARED_SOURCE_DIR}/pvd/src/PxPvdBits.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxPvdByteStreams.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxPvdCommStreamEvents.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxPvdCommStreamEventSink.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxPvdCommStreamSDKEventTypes.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxPvdCommStreamTypes.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxPvdDataStream.cpp + ${PXSHARED_SOURCE_DIR}/pvd/src/PxPvdDefaultFileTransport.cpp + ${PXSHARED_SOURCE_DIR}/pvd/src/PxPvdDefaultFileTransport.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxPvdDefaultSocketTransport.cpp + ${PXSHARED_SOURCE_DIR}/pvd/src/PxPvdDefaultSocketTransport.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxPvdFoundation.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxPvdImpl.cpp + ${PXSHARED_SOURCE_DIR}/pvd/src/PxPvdImpl.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxPvdInternalByteStreams.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxPvdMarshalling.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxPvdMemClient.cpp + ${PXSHARED_SOURCE_DIR}/pvd/src/PxPvdMemClient.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxPvdObjectModel.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxPvdObjectModelInternalTypeDefs.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxPvdObjectModelInternalTypes.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxPvdObjectModelMetaData.cpp + ${PXSHARED_SOURCE_DIR}/pvd/src/PxPvdObjectModelMetaData.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxPvdObjectRegistrar.cpp + ${PXSHARED_SOURCE_DIR}/pvd/src/PxPvdObjectRegistrar.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxPvdProfileZoneClient.cpp + ${PXSHARED_SOURCE_DIR}/pvd/src/PxPvdProfileZoneClient.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxPvdUserRenderer.cpp + ${PXSHARED_SOURCE_DIR}/pvd/src/PxPvdUserRenderImpl.h + ${PXSHARED_SOURCE_DIR}/pvd/src/PxPvdUserRenderTypes.h +) +SOURCE_GROUP(src\\src FILES ${PXPVDSDK_SOURCE}) + +SET(PXPVDSDK_INTERNAL_HEADERS + ${PXSHARED_SOURCE_DIR}/pvd/include/PsPvd.h + ${PXSHARED_SOURCE_DIR}/pvd/include/PxProfileAllocatorWrapper.h + ${PXSHARED_SOURCE_DIR}/pvd/include/PxPvdClient.h + ${PXSHARED_SOURCE_DIR}/pvd/include/PxPvdDataStream.h + ${PXSHARED_SOURCE_DIR}/pvd/include/PxPvdDataStreamHelpers.h + ${PXSHARED_SOURCE_DIR}/pvd/include/PxPvdErrorCodes.h + ${PXSHARED_SOURCE_DIR}/pvd/include/PxPvdObjectModelBaseTypes.h + ${PXSHARED_SOURCE_DIR}/pvd/include/PxPvdRenderBuffer.h + ${PXSHARED_SOURCE_DIR}/pvd/include/PxPvdUserRenderer.h +) +SOURCE_GROUP(src\\include FILES ${PXPVDSDK_INTERNAL_HEADERS}) + +ADD_LIBRARY(PxPvdSDK ${PXPVDSDK_LIBTYPE} + ${PXPVDSDK_HEADERS} + + ${PXPVDSDK_INTERNAL_HEADERS} + ${PXPVDSDK_SOURCE} + + ${PXPVDSDK_PLATFORM_FILES} +) + +TARGET_INCLUDE_DIRECTORIES(PxPvdSDK + PRIVATE ${PXSHARED_SOURCE_DIR}/../include + PRIVATE ${PXSHARED_SOURCE_DIR}/foundation/include + PRIVATE ${PXSHARED_SOURCE_DIR}/pvd/include + PRIVATE ${PXSHARED_SOURCE_DIR}/filebuf/include + + PRIVATE ${PXPVDSDK_PLATFORM_INCLUDES} + +) + +TARGET_COMPILE_DEFINITIONS(PxPvdSDK + PRIVATE ${PXPVDSDK_COMPILE_DEFS} +) + +SET_TARGET_PROPERTIES(PxPvdSDK PROPERTIES + COMPILE_PDB_NAME_DEBUG "PxPvdSDK${CMAKE_DEBUG_POSTFIX}" + COMPILE_PDB_NAME_CHECKED "PxPvdSDK${CMAKE_CHECKED_POSTFIX}" + COMPILE_PDB_NAME_PROFILE "PxPvdSDK${CMAKE_PROFILE_POSTFIX}" + COMPILE_PDB_NAME_RELEASE "PxPvdSDK${CMAKE_RELEASE_POSTFIX}" +)
\ No newline at end of file diff --git a/PxShared/src/compiler/cmake/common/PxTask.cmake b/PxShared/src/compiler/cmake/common/PxTask.cmake new file mode 100644 index 0000000..0d1cd1f --- /dev/null +++ b/PxShared/src/compiler/cmake/common/PxTask.cmake @@ -0,0 +1,44 @@ +# +# Build PxTask common +# + +SET(PXTASK_HEADERS + ${PROJECT_SOURCE_DIR}/../../../../include/task/PxCpuDispatcher.h + ${PROJECT_SOURCE_DIR}/../../../../include/task/PxGpuDispatcher.h + ${PROJECT_SOURCE_DIR}/../../../../include/task/PxGpuTask.h + ${PROJECT_SOURCE_DIR}/../../../../include/task/PxTask.h + ${PROJECT_SOURCE_DIR}/../../../../include/task/PxTaskDefine.h + ${PROJECT_SOURCE_DIR}/../../../../include/task/PxTaskManager.h +) +SOURCE_GROUP(include FILES ${PXTASK_HEADERS}) + +SET(PXTASK_SOURCE + ${PXSHARED_SOURCE_DIR}/task/src/TaskManager.cpp +) +SOURCE_GROUP(src FILES ${PXTASK_SOURCE}) + +ADD_LIBRARY(PxTask ${PXTASK_LIBTYPE} + ${PXTASK_HEADERS} + ${PXTASK_SOURCE} +) + +TARGET_INCLUDE_DIRECTORIES(PxTask + PRIVATE ${PXSHARED_SOURCE_DIR}/../include + PRIVATE ${PXSHARED_SOURCE_DIR}/cudamanager/include + PRIVATE ${PXSHARED_SOURCE_DIR}/foundation/include + + PRIVATE ${PXTASK_PLATFORM_INCLUDES} +) + +TARGET_COMPILE_DEFINITIONS(PxTask + PRIVATE ${PXTASK_COMPILE_DEFS} +) + +IF(NOT ${PXTASK_LIBTYPE} STREQUAL "OBJECT") + SET_TARGET_PROPERTIES(PxTask PROPERTIES + COMPILE_PDB_NAME_DEBUG "PxTask${CMAKE_DEBUG_POSTFIX}" + COMPILE_PDB_NAME_CHECKED "PxTask${CMAKE_CHECKED_POSTFIX}" + COMPILE_PDB_NAME_PROFILE "PxTask${CMAKE_PROFILE_POSTFIX}" + COMPILE_PDB_NAME_RELEASE "PxTask${CMAKE_RELEASE_POSTFIX}" + ) +ENDIF()
\ No newline at end of file diff --git a/PxShared/src/compiler/cmake/findfileswithspec.py b/PxShared/src/compiler/cmake/findfileswithspec.py new file mode 100644 index 0000000..484ff4f --- /dev/null +++ b/PxShared/src/compiler/cmake/findfileswithspec.py @@ -0,0 +1,23 @@ +import os,argparse,sys,string + +# +# Simple helper program - give it a path and it will list all of the files of the specified extension in relative format, using the +# pathroot variable as a substitution. This greatly simplifies one part of the process of creating a CMake file for a project +# +parser = argparse.ArgumentParser() +parser.add_argument("dir", help="Path to find files in") +parser.add_argument("extension", help="Spec to find (ie .cpp)") +parser.add_argument("--pathroot", help="Path variable to prepend to each line, example: ${PX_ROOT}", default="${DUDER}") + +args = parser.parse_args() + +if not os.path.exists(args.dir): + print("Unable to find path {}".format(args.dir)) + exit(1) + +for root, dirs, files in os.walk(args.dir): + for file in files: + if file.endswith(args.extension): + result = os.path.join(root, file) + + print(result.replace(args.dir, args.pathroot)) diff --git a/PxShared/src/compiler/cmake/html5/CMakeLists.txt b/PxShared/src/compiler/cmake/html5/CMakeLists.txt new file mode 100644 index 0000000..8b9587a --- /dev/null +++ b/PxShared/src/compiler/cmake/html5/CMakeLists.txt @@ -0,0 +1,36 @@ +cmake_minimum_required(VERSION 3.3) +#set(CMAKE_VERBOSE_MAKEFILE ON) +include(../common/CMakeLists.txt) + + +IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to HTML5 + SET(TARGET_BUILD_PLATFORM "HTML5") +ENDIF() + +SET(PLATFORM_LIST HTML5) + +IF (NOT ${TARGET_BUILD_PLATFORM} IN_LIST PLATFORM_LIST) + MESSAGE(FATAL_ERROR "Invalid platform:" ${TARGET_BUILD_PLATFORM}) +ENDIF() + +SET(CMAKE_CXX_FLAGS "${EPIC_BUILD_FLAGS} -fdiagnostics-show-option -fno-rtti -fno-exceptions -ffast-math -ffunction-sections -fdata-sections -Werror -ferror-limit=0 -Wall -Wextra -fstrict-aliasing -Wstrict-aliasing=2 -pedantic -Weverything -Wno-c++11-long-long -Wno-padded -Wno-reserved-id-macro -Wno-float-equal -Wno-sign-conversion -Wno-covered-switch-default -Wno-documentation-unknown-command -Wno-weak-vtables -Wno-missing-prototypes -Wno-unused-local-typedef -Wno-float-conversion -Wno-global-constructors -Wno-missing-variable-declarations -Wno-exit-time-destructors -Wno-unused-macros -Wno-undef -Wno-c++11-extra-semi -Wno-c++11-extensions -Wno-non-virtual-dtor -Wno-unknown-pragmas -Wno-old-style-cast -Wno-extra-semi -Wno-cast-align -Wno-documentation -Wno-shadow -Wno-conversion -Wno-newline-eof -Wno-header-hygiene -Wno-switch-enum -Wno-undefined-reinterpret-cast -Wno-variadic-macros -Wno-gnu-zero-variadic-macro-arguments -Wno-overloaded-virtual -Wno-dynamic-class-memaccess -Wno-nested-anon-types -Wno-invalid-offsetof -Wno-reorder -Wno-local-type-template-args -Wno-unreachable-code -Wno-unreachable-code-return -Wno-format-pedantic -Wno-unused-private-field -Wno-unused-parameter -Wno-unused-member-function -Wno-used-but-marked-unused -Wno-unused-variable -Wno-format-nonliteral -Wno-shift-sign-overflow -Wno-comma -Wno-expansion-to-defined -Wno-undefined-func-template -Wno-weak-template-vtables -Wno-double-promotion -Wno-nonportable-include-path -Wno-disabled-macro-expansion -Wno-missing-noreturn") +SET(CMAKE_STATIC_LIBRARY_PREFIX "") + +SET(PXSHARED_HTML5_COMPILE_DEFS _LIB;_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE) +SET(PXSHARED_HTML5_DEBUG_COMPILE_DEFS _DEBUG;PX_DEBUG=1;PX_CHECKED=1) +SET(PXSHARED_HTML5_CHECKED_COMPILE_DEFS NDEBUG;PX_CHECKED=1) +SET(PXSHARED_HTML5_PROFILE_COMPILE_DEFS NDEBUG;PX_PROFILE=1) +SET(PXSHARED_HTML5_RELEASE_COMPILE_DEFS NDEBUG) + +SET(CMAKE_DEBUG_POSTFIX $ENV{LIB_SUFFIX}) +SET(CMAKE_PROFILE_POSTFIX $ENV{LIB_SUFFIX}) +SET(CMAKE_CHECKED_POSTFIX $ENV{LIB_SUFFIX}) +SET(CMAKE_RELEASE_POSTFIX $ENV{LIB_SUFFIX}) + +# Include project cmake files here +INCLUDE(PxFoundation.cmake) +INCLUDE(PsFastXml.cmake) +INCLUDE(PxPvdSDK.cmake) +INCLUDE(PxTask.cmake) +# INCLUDE(PxCudaContextManager.cmake) + diff --git a/PxShared/src/compiler/cmake/html5/PsFastXml.cmake b/PxShared/src/compiler/cmake/html5/PsFastXml.cmake new file mode 100644 index 0000000..3279134 --- /dev/null +++ b/PxShared/src/compiler/cmake/html5/PsFastXml.cmake @@ -0,0 +1,26 @@ +# +# Build PsFastXml +# + +SET(PXSHARED_SOURCE_DIR ${PROJECT_SOURCE_DIR}/../../../../src) + +SET(LL_SOURCE_DIR ${PXSHARED_SOURCE_DIR}/fastxml) + +SET(PLATFORM_INCLUDES + $ENV{EMSCRIPTEN}/system/include +) + +# Use generator expressions to set config specific preprocessor definitions +SET(PSFASTXML_COMPILE_DEFS + + # Common to all configurations + ${PXSHARED_HTML5_COMPILE_DEFS};PX_FOUNDATION_DLL=0; + + $<$<CONFIG:debug>:${PXSHARED_HTML5_DEBUG_COMPILE_DEFS};> + $<$<CONFIG:checked>:${PXSHARED_HTML5_CHECKED_COMPILE_DEFS};> + $<$<CONFIG:profile>:${PXSHARED_HTML5_PROFILE_COMPILE_DEFS};> + $<$<CONFIG:release>:${PXSHARED_HTML5_RELEASE_COMPILE_DEFS};> +) + +# include PsFastXml common +INCLUDE(../common/PsFastXml.cmake)
\ No newline at end of file diff --git a/PxShared/src/compiler/cmake/html5/PxFoundation.cmake b/PxShared/src/compiler/cmake/html5/PxFoundation.cmake new file mode 100644 index 0000000..a78e4e6 --- /dev/null +++ b/PxShared/src/compiler/cmake/html5/PxFoundation.cmake @@ -0,0 +1,41 @@ +# +# Build PxFoundation +# + +SET(PXSHARED_SOURCE_DIR ${PROJECT_SOURCE_DIR}/../../../../src) + +SET(LL_SOURCE_DIR ${PXSHARED_SOURCE_DIR}/foundation) + +SET(PXFOUNDATION_LIBTYPE STATIC) + +SET(PXFOUNDATION_PLATFORM_FILES + ${LL_SOURCE_DIR}/src/unix/PsUnixAtomic.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixCpu.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixFPU.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixMutex.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixPrintString.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixSList.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixSocket.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixSync.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixThread.cpp + ${LL_SOURCE_DIR}/src/unix/PsUnixTime.cpp +) + +SET(PXFOUNDATION_PLATFORM_INCLUDES + $ENV{EMSCRIPTEN}/system/include + ${LL_SOURCE_DIR}/include/unix +) + +SET(PXFOUNDATION_COMPILE_DEFS + + # Common to all configurations + ${PXSHARED_HTML5_COMPILE_DEFS} + + $<$<CONFIG:debug>:${PXSHARED_HTML5_DEBUG_COMPILE_DEFS}> + $<$<CONFIG:checked>:${PXSHARED_HTML5_CHECKED_COMPILE_DEFS}> + $<$<CONFIG:profile>:${PXSHARED_HTML5_PROFILE_COMPILE_DEFS}> + $<$<CONFIG:release>:${PXSHARED_HTML5_RELEASE_COMPILE_DEFS}> +) + +# include PxFoundation common +INCLUDE(../common/PxFoundation.cmake)
\ No newline at end of file diff --git a/PxShared/src/compiler/cmake/html5/PxPvdSDK.cmake b/PxShared/src/compiler/cmake/html5/PxPvdSDK.cmake new file mode 100644 index 0000000..36a465f --- /dev/null +++ b/PxShared/src/compiler/cmake/html5/PxPvdSDK.cmake @@ -0,0 +1,31 @@ +# +# Build PxPvdSDK +# + +SET(PXSHARED_SOURCE_DIR ${PROJECT_SOURCE_DIR}/../../../../src) + +SET(LL_SOURCE_DIR ${PXSHARED_SOURCE_DIR}/pvd) + + SET(PXPVDSDK_LIBTYPE STATIC) + + SET(PXPVDSDK_PLATFORM_INCLUDES + $ENV{EMSCRIPTEN}/system/include + ) + + # Use generator expressions to set config specific preprocessor definitions + SET(PXPVDSDK_COMPILE_DEFS + ${PXSHARED_HTML5_COMPILE_DEFS} + + $<$<CONFIG:debug>:${PXSHARED_HTML5_DEBUG_COMPILE_DEFS}> + $<$<CONFIG:checked>:${PXSHARED_HTML5_CHECKED_COMPILE_DEFS}> + $<$<CONFIG:profile>:${PXSHARED_HTML5_PROFILE_COMPILE_DEFS}> + $<$<CONFIG:release>:${PXSHARED_HTML5_RELEASE_COMPILE_DEFS}> + ) + +# include PxPvdSDK common +INCLUDE(../common/PxPvdSDK.cmake) + +# Add linked libraries +TARGET_LINK_LIBRARIES(PxPvdSDK PRIVATE PxFoundation) + + diff --git a/PxShared/src/compiler/cmake/html5/PxTask.cmake b/PxShared/src/compiler/cmake/html5/PxTask.cmake new file mode 100644 index 0000000..5c00c13 --- /dev/null +++ b/PxShared/src/compiler/cmake/html5/PxTask.cmake @@ -0,0 +1,22 @@ +# +# Build PxTask +# + +SET(PXSHARED_SOURCE_DIR ${PROJECT_SOURCE_DIR}/../../../../src) + +SET(LL_SOURCE_DIR ${PXSHARED_SOURCE_DIR}/task) + +SET(PXTASK_PLATFORM_INCLUDES + $ENV{EMSCRIPTEN}/system/include +) + +SET(PXTASK_COMPILE_DEFS + ${PXSHARED_HTML5_COMPILE_DEFS};PX_FOUNDATION_DLL=0; + $<$<CONFIG:debug>:${PXSHARED_HTML5_DEBUG_COMPILE_DEFS};> + $<$<CONFIG:checked>:${PXSHARED_HTML5_CHECKED_COMPILE_DEFS};> + $<$<CONFIG:profile>:${PXSHARED_HTML5_PROFILE_COMPILE_DEFS};> + $<$<CONFIG:release>:${PXSHARED_HTML5_RELEASE_COMPILE_DEFS};> +) + +# include PxTask common +INCLUDE(../common/PxTask.cmake) diff --git a/PxShared/src/compiler/cmake/windows/CMakeLists.txt b/PxShared/src/compiler/cmake/windows/CMakeLists.txt new file mode 100644 index 0000000..39b7dfc --- /dev/null +++ b/PxShared/src/compiler/cmake/windows/CMakeLists.txt @@ -0,0 +1,90 @@ +cmake_minimum_required(VERSION 3.3) +include(../common/CMakeLists.txt) + + +IF(NOT DEFINED TARGET_BUILD_PLATFORM) # Not defined, default to Windows + SET(TARGET_BUILD_PLATFORM "Windows") +ENDIF() + +SET(PLATFORM_LIST Windows) + +IF (NOT ${TARGET_BUILD_PLATFORM} IN_LIST PLATFORM_LIST) + MESSAGE(FATAL_ERROR "Invalid platform:" ${TARGET_BUILD_PLATFORM}) +ENDIF() + +SET(CMAKE_CXX_FLAGS "/Wall /wd4514 /wd4820 /wd4127 /wd4710 /wd4711 /wd4577 /d2Zi+ /WX /W4 /GF /GS- /GR- /Gd /fp:fast") + +IF(DEFINED STATIC_WINCRT) + SET(WINCRT_NDEBUG "/MT") + SET(WINCRT_DEBUG "/MTd") +ELSE() + SET(WINCRT_NDEBUG "/MD") + SET(WINCRT_DEBUG "/MDd") +ENDIF() + +SET(CMAKE_CXX_FLAGS_DEBUG "/Od ${WINCRT_DEBUG} /RTCu /Zi") +SET(CMAKE_CXX_FLAGS_CHECKED "/Ox ${WINCRT_NDEBUG} /Zi") +SET(CMAKE_CXX_FLAGS_PROFILE "/Ox ${WINCRT_NDEBUG} /Zi") +SET(CMAKE_CXX_FLAGS_RELEASE "/Ox ${WINCRT_NDEBUG} /Zi") + +# Build PDBs for all configurations +SET(CMAKE_SHARED_LINKER_FLAGS "/DEBUG") + +# Controls PX_NVTX for all projects on windows +SET(PXSHARED_WINDOWS_ENABLE_NVTX 0) + +IF(DEFINED PX_GENERATE_GPU_PROJECTS) +SET(PXSHARED_WINDOWS_COMPILE_DEFS WIN32;_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_WINSOCK_DEPRECATED_NO_WARNINGS;) +ELSE() +# Disable cuda and dx for all projects on windows +SET(PXSHARED_WINDOWS_COMPILE_DEFS WIN32;_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_WINSOCK_DEPRECATED_NO_WARNINGS;DISABLE_CUDA_PHYSX;) +ENDIF() +SET(PXSHARED_WINDOWS_DEBUG_COMPILE_DEFS _DEBUG;PX_DEBUG=1;PX_CHECKED=1;PX_NVTX=${PXSHARED_WINDOWS_ENABLE_NVTX}) +SET(PXSHARED_WINDOWS_CHECKED_COMPILE_DEFS NDEBUG;PX_CHECKED=1;PX_NVTX=${PXSHARED_WINDOWS_ENABLE_NVTX}) +SET(PXSHARED_WINDOWS_PROFILE_COMPILE_DEFS NDEBUG;PX_PROFILE=1;PX_NVTX=${PXSHARED_WINDOWS_ENABLE_NVTX}) +SET(PXSHARED_WINDOWS_RELEASE_COMPILE_DEFS NDEBUG) + +IF(CMAKE_CL_64) + ADD_DEFINITIONS(-DWIN64) +ENDIF(CMAKE_CL_64) + +IF(CMAKE_CL_64) + SET(LIBPATH_SUFFIX "x64") +ELSE(CMAKE_CL_64) + SET(LIBPATH_SUFFIX "x86") +ENDIF(CMAKE_CL_64) + +SET(CMAKE_DEBUG_POSTFIX "${CMAKE_DEBUG_POSTFIX}_${LIBPATH_SUFFIX}") +SET(CMAKE_PROFILE_POSTFIX "${CMAKE_PROFILE_POSTFIX}_${LIBPATH_SUFFIX}") +SET(CMAKE_CHECKED_POSTFIX "${CMAKE_CHECKED_POSTFIX}_${LIBPATH_SUFFIX}") +SET(CMAKE_RELEASE_POSTFIX "${CMAKE_RELEASE_POSTFIX}_${LIBPATH_SUFFIX}") + +# Include project cmake files here +IF(DEFINED PX_SELECT_COMPONENTS) + if ("PxFoundation" IN_LIST PX_SELECT_COMPONENTS) + INCLUDE(PxFoundation.cmake) + endif() + if ("PsFastXml" IN_LIST PX_SELECT_COMPONENTS) + INCLUDE(PsFastXml.cmake) + endif() + if ("PxPvdSDK" IN_LIST PX_SELECT_COMPONENTS) + INCLUDE(PxPvdSDK.cmake) + endif() + if ("PxTask" IN_LIST PX_SELECT_COMPONENTS) + INCLUDE(PxTask.cmake) + endif() + if ("PxCudaContextManager" IN_LIST PX_SELECT_COMPONENTS) + IF(DEFINED PX_GENERATE_GPU_PROJECTS) + INCLUDE(PxCudaContextManager.cmake) + ENDIF() + endif() +ELSE() +INCLUDE(PxFoundation.cmake) +INCLUDE(PsFastXml.cmake) +INCLUDE(PxPvdSDK.cmake) +INCLUDE(PxTask.cmake) +IF(DEFINED PX_GENERATE_GPU_PROJECTS) + INCLUDE(PxCudaContextManager.cmake) +ENDIF() +ENDIF() + diff --git a/PxShared/src/compiler/cmake/windows/PsFastXml.cmake b/PxShared/src/compiler/cmake/windows/PsFastXml.cmake new file mode 100644 index 0000000..862b06e --- /dev/null +++ b/PxShared/src/compiler/cmake/windows/PsFastXml.cmake @@ -0,0 +1,21 @@ +# +# Build PsFastXml +# + +SET(PXSHARED_SOURCE_DIR ${PROJECT_SOURCE_DIR}/../../../../src) + +SET(LL_SOURCE_DIR ${PXSHARED_SOURCE_DIR}/fastxml) + +# Use generator expressions to set config specific preprocessor definitions +SET(PSFASTXML_COMPILE_DEFS + # Common to all configurations + ${PXSHARED_WINDOWS_COMPILE_DEFS};PX_FOUNDATION_DLL=0; + + $<$<CONFIG:debug>:${PXSHARED_WINDOWS_DEBUG_COMPILE_DEFS};> + $<$<CONFIG:checked>:${PXSHARED_WINDOWS_CHECKED_COMPILE_DEFS};> + $<$<CONFIG:profile>:${PXSHARED_WINDOWS_PROFILE_COMPILE_DEFS};> + $<$<CONFIG:release>:${PXSHARED_WINDOWS_RELEASE_COMPILE_DEFS};> +) + +# include PsFastXml common +INCLUDE(../common/PsFastXml.cmake)
\ No newline at end of file diff --git a/PxShared/src/compiler/cmake/windows/PxCudaContextManager.cmake b/PxShared/src/compiler/cmake/windows/PxCudaContextManager.cmake new file mode 100644 index 0000000..6b59d6e --- /dev/null +++ b/PxShared/src/compiler/cmake/windows/PxCudaContextManager.cmake @@ -0,0 +1,32 @@ +# +# Build PxCudaContextManager +# +FIND_PACKAGE(CUDA REQUIRED) + +SET(PXSHARED_SOURCE_DIR ${PROJECT_SOURCE_DIR}/../../../../src) + +SET(LL_SOURCE_DIR ${PXSHARED_SOURCE_DIR}/cudamanager) + +SET(CUDA_COMPILER_OPTION_DEBUG "--compiler-options=/W3,/nologo,/Ot,/Ox,/Zi,${WINCRT_DEBUG}") +SET(CUDA_COMPILER_OPTION_CHECKED "--compiler-options=/W3,/nologo,/Ot,/Ox,/Zi,${WINCRT_NDEBUG}") +SET(CUDA_COMPILER_OPTION_PROFILE "--compiler-options=/W3,/nologo,/Ot,/Ox,/Zi,${WINCRT_NDEBUG}") +SET(CUDA_COMPILER_OPTION_RELEASE "--compiler-options=/W3,/nologo,/Ot,/Ox,/Zi,${WINCRT_NDEBUG}") + +# include PxCudaContextManager common +INCLUDE(../common/PxCudaContextManager.cmake) + +# No linked libraries + +# Use generator expressions to set config specific preprocessor definitions +TARGET_COMPILE_DEFINITIONS(PxCudaContextManager + + # Common to all configurations + PRIVATE ${PXSHARED_WINDOWS_COMPILE_DEFS}; + + PRIVATE $<$<CONFIG:debug>:${PXSHARED_WINDOWS_DEBUG_COMPILE_DEFS};> + PRIVATE $<$<CONFIG:checked>:${PXSHARED_WINDOWS_CHECKED_COMPILE_DEFS};> + PRIVATE $<$<CONFIG:profile>:${PXSHARED_WINDOWS_PROFILE_COMPILE_DEFS};> + PRIVATE $<$<CONFIG:release>:${PXSHARED_WINDOWS_RELEASE_COMPILE_DEFS};> +) + +#TODO: Link flags diff --git a/PxShared/src/compiler/cmake/windows/PxFoundation.cmake b/PxShared/src/compiler/cmake/windows/PxFoundation.cmake new file mode 100644 index 0000000..31de53b --- /dev/null +++ b/PxShared/src/compiler/cmake/windows/PxFoundation.cmake @@ -0,0 +1,70 @@ +# +# Build PxFoundation +# + +SET(PXSHARED_SOURCE_DIR ${PROJECT_SOURCE_DIR}/../../../../src) + +SET(LL_SOURCE_DIR ${PXSHARED_SOURCE_DIR}/foundation) + +SET(PXFOUNDATION_LIBTYPE SHARED) + +SET(PXFOUNDATION_RESOURCE_FILE + ${PXSHARED_SOURCE_DIR}/compiler/resource_${LIBPATH_SUFFIX}/PxFoundation.rc +) +SOURCE_GROUP(resource FILES ${PXFOUNDATION_RESOURCE_FILE}) + +SET(PXFOUNDATION_PLATFORM_HEADERS + ${PXSHARED_SOURCE_DIR}/../include/foundation/windows/PxWindowsIntrinsics.h + ${PXSHARED_SOURCE_DIR}/../include/foundation/windows/PxWindowsFoundationDelayLoadHook.h +) +SOURCE_GROUP(include\\windows FILES ${PXFOUNDATION_PLATFORM_HEADERS}) + +SET(PXFOUNDATION_PLATFORM_SOURCE + ${LL_SOURCE_DIR}/src/windows/PsWindowsAtomic.cpp + ${LL_SOURCE_DIR}/src/windows/PsWindowsCpu.cpp + ${LL_SOURCE_DIR}/src/windows/PsWindowsFPU.cpp + ${LL_SOURCE_DIR}/src/windows/PsWindowsMutex.cpp + ${LL_SOURCE_DIR}/src/windows/PsWindowsPrintString.cpp + ${LL_SOURCE_DIR}/src/windows/PsWindowsSList.cpp + ${LL_SOURCE_DIR}/src/windows/PsWindowsSocket.cpp + ${LL_SOURCE_DIR}/src/windows/PsWindowsSync.cpp + ${LL_SOURCE_DIR}/src/windows/PsWindowsThread.cpp + ${LL_SOURCE_DIR}/src/windows/PsWindowsTime.cpp +) +SOURCE_GROUP(src\\src\\windows FILES ${PXFOUNDATION_PLATFORM_SOURCE}) + +SET(PXFOUNDATION_PLATFORM_SOURCE_HEADERS + ${LL_SOURCE_DIR}/include/windows/PsWindowsAoS.h + ${LL_SOURCE_DIR}/include/windows/PsWindowsFPU.h + ${LL_SOURCE_DIR}/include/windows/PsWindowsInclude.h + ${LL_SOURCE_DIR}/include/windows/PsWindowsInlineAoS.h + ${LL_SOURCE_DIR}/include/windows/PsWindowsIntrinsics.h + ${LL_SOURCE_DIR}/include/windows/PsWindowsLoadLibrary.h + ${LL_SOURCE_DIR}/include/windows/PsWindowsTrigConstants.h +) +SOURCE_GROUP(src\\include\\windows FILES ${PXFOUNDATION_PLATFORM_SOURCE_HEADERS}) + + +SET(PXFOUNDATION_PLATFORM_FILES + ${PXFOUNDATION_PLATFORM_SOURCE} + ${PXFOUNDATION_PLATFORM_SOURCE_HEADERS} + ${PXFOUNDATION_PLATFORM_HEADERS} + ${PXFOUNDATION_RESOURCE_FILE} +) + +SET(PXFOUNDATION_PLATFORM_INCLUDES + ${LL_SOURCE_DIR}/include/windows +) + +SET(PXFOUNDATION_COMPILE_DEFS + # Common to all configurations + ${PXSHARED_WINDOWS_COMPILE_DEFS};PX_FOUNDATION_DLL=1; + + $<$<CONFIG:debug>:${PXSHARED_WINDOWS_DEBUG_COMPILE_DEFS};> + $<$<CONFIG:checked>:${PXSHARED_WINDOWS_CHECKED_COMPILE_DEFS};> + $<$<CONFIG:profile>:${PXSHARED_WINDOWS_PROFILE_COMPILE_DEFS};> + $<$<CONFIG:release>:${PXSHARED_WINDOWS_RELEASE_COMPILE_DEFS};> +) + +# include PxFoundation common +INCLUDE(../common/PxFoundation.cmake)
\ No newline at end of file diff --git a/PxShared/src/compiler/cmake/windows/PxPvdSDK.cmake b/PxShared/src/compiler/cmake/windows/PxPvdSDK.cmake new file mode 100644 index 0000000..287ec1c --- /dev/null +++ b/PxShared/src/compiler/cmake/windows/PxPvdSDK.cmake @@ -0,0 +1,55 @@ +# +# Build PxPvdSDK +# + +SET(PXSHARED_SOURCE_DIR ${PROJECT_SOURCE_DIR}/../../../../src) + +SET(LL_SOURCE_DIR ${PXSHARED_SOURCE_DIR}/pvd) + +FIND_PACKAGE(nvToolsExt REQUIRED) + +SET(PXPVDSDK_LIBTYPE SHARED) + +SET(PXPVDSDK_RESOURCE_FILE + ${PXSHARED_SOURCE_DIR}/compiler/resource_${LIBPATH_SUFFIX}/PxPvdSDK.rc +) +SOURCE_GROUP(resource FILES ${PXPVDSDK_RESOURCE_FILE}) + +SET(PXPVDSDK_PLATFORM_HEADERS + ${PXSHARED_SOURCE_DIR}/../include/pvd/windows/PxWindowsPvdDelayLoadHook.h +) +SOURCE_GROUP(include\\windows FILES ${PXPVDSDK_PLATFORM_HEADERS}) + +SET(PXPVDSDK_PLATFORM_SOURCE + ${PXSHARED_SOURCE_DIR}/pvd/src/windows/PxWindowsPvdDelayLoadHook.cpp +) +SOURCE_GROUP(src\\src\\windows FILES ${PXPVDSDK_PLATFORM_SOURCE}) + +SET(PXPVDSDK_PLATFORM_FILES + ${PXPVDSDK_RESOURCE_FILE} + ${PXPVDSDK_PLATFORM_HEADERS} + ${PXPVDSDK_PLATFORM_SOURCE} +) + +SET(PXPVDSDK_PLATFORM_INCLUDES + ${NVTOOLSEXT_INCLUDE_DIRS} +) + +# Use generator expressions to set config specific preprocessor definitions +SET(PXPVDSDK_COMPILE_DEFS + # Common to all configurations + ${PXSHARED_WINDOWS_COMPILE_DEFS};PX_PVDSDK_DLL=1;PX_FOUNDATION_DLL=1; + + $<$<CONFIG:debug>:${PXSHARED_WINDOWS_DEBUG_COMPILE_DEFS};> + $<$<CONFIG:checked>:${PXSHARED_WINDOWS_CHECKED_COMPILE_DEFS};> + $<$<CONFIG:profile>:${PXSHARED_WINDOWS_PROFILE_COMPILE_DEFS};> + $<$<CONFIG:release>:${PXSHARED_WINDOWS_RELEASE_COMPILE_DEFS};> +) + +# include PxPvdSDK common +INCLUDE(../common/PxPvdSDK.cmake) + +# Add linked libraries +TARGET_LINK_LIBRARIES(PxPvdSDK PUBLIC ${NVTOOLSEXT_LIBRARIES} PxFoundation) + + diff --git a/PxShared/src/compiler/cmake/windows/PxTask.cmake b/PxShared/src/compiler/cmake/windows/PxTask.cmake new file mode 100644 index 0000000..32d4b39 --- /dev/null +++ b/PxShared/src/compiler/cmake/windows/PxTask.cmake @@ -0,0 +1,19 @@ +# +# Build PxTask +# + +SET(PXSHARED_SOURCE_DIR ${PROJECT_SOURCE_DIR}/../../../../src) + +SET(PXTASK_COMPILE_DEFS + ${PXSHARED_WINDOWS_COMPILE_DEFS};_LIB + + $<$<CONFIG:debug>:${PXSHARED_WINDOWS_DEBUG_COMPILE_DEFS};> + $<$<CONFIG:checked>:${PXSHARED_WINDOWS_CHECKED_COMPILE_DEFS};> + $<$<CONFIG:profile>:${PXSHARED_WINDOWS_PROFILE_COMPILE_DEFS};> + $<$<CONFIG:release>:${PXSHARED_WINDOWS_RELEASE_COMPILE_DEFS};> +) + +SET(PXTASK_LIBTYPE STATIC) + +# include PxTask common +INCLUDE(../common/PxTask.cmake) diff --git a/PxShared/src/compiler/resource_x64/PxFoundation.rc b/PxShared/src/compiler/resource_x64/PxFoundation.rc Binary files differnew file mode 100644 index 0000000..dfcfde2 --- /dev/null +++ b/PxShared/src/compiler/resource_x64/PxFoundation.rc diff --git a/PxShared/src/compiler/resource_x64/PxPvdSDK.rc b/PxShared/src/compiler/resource_x64/PxPvdSDK.rc Binary files differnew file mode 100644 index 0000000..37308cf --- /dev/null +++ b/PxShared/src/compiler/resource_x64/PxPvdSDK.rc diff --git a/PxShared/src/compiler/resource_x64/resource.h b/PxShared/src/compiler/resource_x64/resource.h new file mode 100644 index 0000000..b421bea --- /dev/null +++ b/PxShared/src/compiler/resource_x64/resource.h @@ -0,0 +1,44 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +//{{NO_DEPENDENCIES}} +// Microsoft Visual C++ generated include file. +// Used by PxFoundation.rc +// + +// Next default values for new objects +// +#ifdef APSTUDIO_INVOKED +#ifndef APSTUDIO_READONLY_SYMBOLS +#define _APS_NEXT_RESOURCE_VALUE 101 +#define _APS_NEXT_COMMAND_VALUE 40001 +#define _APS_NEXT_CONTROL_VALUE 1000 +#define _APS_NEXT_SYMED_VALUE 101 +#endif +#endif diff --git a/PxShared/src/compiler/resource_x86/PxFoundation.rc b/PxShared/src/compiler/resource_x86/PxFoundation.rc Binary files differnew file mode 100644 index 0000000..8b1085d --- /dev/null +++ b/PxShared/src/compiler/resource_x86/PxFoundation.rc diff --git a/PxShared/src/compiler/resource_x86/PxPvdSDK.rc b/PxShared/src/compiler/resource_x86/PxPvdSDK.rc Binary files differnew file mode 100644 index 0000000..90636c2 --- /dev/null +++ b/PxShared/src/compiler/resource_x86/PxPvdSDK.rc diff --git a/PxShared/src/compiler/resource_x86/resource.h b/PxShared/src/compiler/resource_x86/resource.h new file mode 100644 index 0000000..b421bea --- /dev/null +++ b/PxShared/src/compiler/resource_x86/resource.h @@ -0,0 +1,44 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +//{{NO_DEPENDENCIES}} +// Microsoft Visual C++ generated include file. +// Used by PxFoundation.rc +// + +// Next default values for new objects +// +#ifdef APSTUDIO_INVOKED +#ifndef APSTUDIO_READONLY_SYMBOLS +#define _APS_NEXT_RESOURCE_VALUE 101 +#define _APS_NEXT_COMMAND_VALUE 40001 +#define _APS_NEXT_CONTROL_VALUE 1000 +#define _APS_NEXT_SYMED_VALUE 101 +#endif +#endif diff --git a/PxShared/src/cudamanager/include/CudaContextManager.h b/PxShared/src/cudamanager/include/CudaContextManager.h new file mode 100644 index 0000000..3d68f82 --- /dev/null +++ b/PxShared/src/cudamanager/include/CudaContextManager.h @@ -0,0 +1,51 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PXCUDACONTEXTMANAGER_CUDACONTEXTMANAGER_H +#define PXCUDACONTEXTMANAGER_CUDACONTEXTMANAGER_H + +#include "task/PxTaskDefine.h" + +#if PX_SUPPORT_GPU_PHYSX + +namespace physx +{ + +class PxCudaContextManager; +class PxCudaContextManagerDesc; +class PxErrorCallback; + +/** +Creates cuda context manager for PhysX and APEX. +*/ +PxCudaContextManager* createCudaContextManager(const PxCudaContextManagerDesc& desc, PxErrorCallback& errorCallback); + +} + +#endif + +#endif // PXCUDACONTEXTMANAGER_CUDACONTEXTMANAGER_H diff --git a/PxShared/src/cudamanager/include/CudaKernelWrangler.h b/PxShared/src/cudamanager/include/CudaKernelWrangler.h new file mode 100644 index 0000000..36a2cc8 --- /dev/null +++ b/PxShared/src/cudamanager/include/CudaKernelWrangler.h @@ -0,0 +1,331 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef __CUDA_KERNEL_WRANGLER__ +#define __CUDA_KERNEL_WRANGLER__ + +// Make this header is safe for inclusion in headers that are shared with device code. +#if !defined(__CUDACC__) + +#include "task/PxTaskDefine.h" +#include "task/PxGpuDispatcher.h" + +#include "PsUserAllocated.h" +#include "PsArray.h" + +#include <cuda.h> + +namespace physx +{ + +class KernelWrangler : public shdfnd::UserAllocated +{ + PX_NOCOPY(KernelWrangler) +public: + KernelWrangler(PxGpuDispatcher& gd, PxErrorCallback& errorCallback, const char** funcNames, uint16_t numFuncs); + ~KernelWrangler(); + + CUfunction getCuFunction(uint16_t funcIndex) const + { + return mCuFunctions[ funcIndex ]; + } + + CUmodule getCuModule(uint16_t funcIndex) const + { + uint16_t modIndex = mCuFuncModIndex[ funcIndex ]; + return mCuModules[ modIndex ]; + } + + static void const* const* getImages(); + static int getNumImages(); + + bool hadError() const { return mError; } + +protected: + bool mError; + shdfnd::Array<CUfunction> mCuFunctions; + shdfnd::Array<uint16_t> mCuFuncModIndex; + shdfnd::Array<CUmodule> mCuModules; + PxGpuDispatcher& mGpuDispatcher; + PxErrorCallback& mErrorCallback; +}; + +/* SJB - These were "borrowed" from an Ignacio Llamas email to devtech-compute. + * If we feel this is too clumsy, we can steal the boost based bits from APEX + */ + +class ExplicitCudaFlush +{ +public: + ExplicitCudaFlush(int cudaFlushCount) : mCudaFlushCount(cudaFlushCount), mDefaultCudaFlushCount(mCudaFlushCount) {} + ~ExplicitCudaFlush() {} + + void setCudaFlushCount(int value) { mCudaFlushCount = mDefaultCudaFlushCount = value; } + unsigned int getCudaFlushCount() const { return (unsigned int)mCudaFlushCount; } + void resetCudaFlushCount() { mCudaFlushCount = mDefaultCudaFlushCount; } + + void decrementFlushCount() + { + if (mCudaFlushCount == 0) return; + + if (--mCudaFlushCount == 0) + { + CUresult ret = cuStreamQuery(0); // flushes current push buffer + PX_UNUSED(ret); + PX_ASSERT(ret == CUDA_SUCCESS || ret == CUDA_ERROR_NOT_READY); + + // For current implementation, disable resetting of cuda flush count + // reset cuda flush count + // mCudaFlushCount = mDefaultCudaFlushCount; + } + } + +private: + int mCudaFlushCount; + int mDefaultCudaFlushCount; +}; + +} + +template <typename T0> +PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, + T0 v0) +{ + void* kernelParams[] = + { + &v0, + }; + return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); +} + +template <typename T0, typename T1> +PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, + T0 v0, T1 v1) +{ + void* kernelParams[] = + { + &v0, &v1, + }; + return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); +} + +template <typename T0, typename T1, typename T2> +PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, + T0 v0, T1 v1, T2 v2) +{ + void* kernelParams[] = + { + &v0, &v1, &v2, + }; + return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); +} + +template <typename T0, typename T1, typename T2, typename T3> +PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, + T0 v0, T1 v1, T2 v2, T3 v3) +{ + void* kernelParams[] = + { + &v0, &v1, &v2, &v3, + }; + return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); +} + +template <typename T0, typename T1, typename T2, typename T3, typename T4> +PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, + T0 v0, T1 v1, T2 v2, T3 v3, T4 v4) +{ + void* kernelParams[] = + { + &v0, &v1, &v2, &v3, &v4, + }; + return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); +} + +template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5> +PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, + T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5) +{ + void* kernelParams[] = + { + &v0, &v1, &v2, &v3, &v4, &v5, + }; + return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); +} + +template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6> +PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, + T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6) +{ + void* kernelParams[] = + { + &v0, &v1, &v2, &v3, &v4, &v5, &v6, + }; + return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); +} + +template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7> +PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, + T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7) +{ + void* kernelParams[] = + { + &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, + }; + return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); +} + +template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, + typename T8> +PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, + T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8) +{ + void* kernelParams[] = + { + &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, + }; + return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); +} + +template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, + typename T8, typename T9> +PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, + T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9) +{ + void* kernelParams[] = + { + &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, + }; + return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); +} + +template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, + typename T8, typename T9, typename T10> +PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, + T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10) +{ + void* kernelParams[] = + { + &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, + }; + return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); +} + +template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, + typename T8, typename T9, typename T10, typename T11> +PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, + T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11) +{ + void* kernelParams[] = + { + &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, + }; + return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); +} + +template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, + typename T8, typename T9, typename T10, typename T11, typename T12> +PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, + T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12) +{ + void* kernelParams[] = + { + &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, + }; + return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); +} + +template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, + typename T8, typename T9, typename T10, typename T11, typename T12, typename T13> +PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, + T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, + T13 v13) +{ + void* kernelParams[] = + { + &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, &v13, + }; + return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); +} + +template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, + typename T8, typename T9, typename T10, typename T11, typename T12, typename T13, typename T14> +PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, + T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, + T13 v13, T14 v14) +{ + void* kernelParams[] = + { + &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, &v13, &v14, + }; + return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); +} + +template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, + typename T8, typename T9, typename T10, typename T11, typename T12, typename T13, typename T14, typename T15> +PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, + T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, + T13 v13, T14 v14, T15 v15) +{ + void* kernelParams[] = + { + &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, &v13, &v14, &v15, + }; + return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); +} + +template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, + typename T8, typename T9, typename T10, typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16> +PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, + T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, + T13 v13, T14 v14, T15 v15, T16 v16) +{ + void* kernelParams[] = + { + &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, &v13, &v14, &v15, &v16, + }; + return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); +} + +template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, + typename T8, typename T9, typename T10, typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17> +PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, + T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, + T13 v13, T14 v14, T15 v15, T16 v16, T17 v17) +{ + void* kernelParams[] = + { + &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, &v13, &v14, &v15, &v16, &v17, + }; + return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); +} + +#endif + +#endif diff --git a/PxShared/src/cudamanager/include/GpuDispatcher.h b/PxShared/src/cudamanager/include/GpuDispatcher.h new file mode 100644 index 0000000..aedb345 --- /dev/null +++ b/PxShared/src/cudamanager/include/GpuDispatcher.h @@ -0,0 +1,334 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PXTASK_GPUDISPATCHER_H +#define PXTASK_GPUDISPATCHER_H + +#include "task/PxTask.h" +#include "task/PxTaskDefine.h" +#include "task/PxGpuTask.h" +#include "task/PxTaskManager.h" +#include "task/PxGpuDispatcher.h" +#include "foundation/PxProfiler.h" + +#include "PsUserAllocated.h" +#include "PsThread.h" +#include "PsAtomic.h" +#include "PsMutex.h" +#include "PsSync.h" +#include "PsArray.h" + +#include <cuda.h> + +namespace physx { + +typedef uint16_t EventID; + +void releaseGpuDispatcher(PxGpuDispatcher&); + +class KernelWrangler; +class BlockingWaitThread; +class FanoutTask; +class LaunchTask; +class BlockTask; +class PxGpuWorkerThread; + +class GpuDispatcherImpl : public PxGpuDispatcher, public shdfnd::UserAllocated +{ +public: + GpuDispatcherImpl(PxErrorCallback& errorCallback, PxCudaContextManager& ctx); + virtual ~GpuDispatcherImpl(); + + void start(); + void startSimulation(); + void startGroup(); + void submitTask(PxTask& task); + void finishGroup(); + void addCompletionPrereq(PxBaseTask& task); + bool failureDetected() const; + void forceFailureMode(); + void stopSimulation(); + void launchCopyKernel(PxGpuCopyDesc* desc, uint32_t count, CUstream stream); + + PxBaseTask& getPreLaunchTask(); + void addPreLaunchDependent(PxBaseTask& dependent); + + PxBaseTask& getPostLaunchTask(); + void addPostLaunchDependent(PxBaseTask& dependent); + + PxCudaContextManager* getCudaContextManager(); + + PxGpuWorkerThread* mDispatcher; + BlockingWaitThread* mBlockingThread; + LaunchTask* mLaunchTask; // predecessor of tasks launching kernels + BlockTask* mBlockTask; // continuation of tasks launching kernels + FanoutTask* mSyncTask; // predecessor of tasks waiting for cuda context synchronize +}; + +class JobQueue +{ + PX_NOCOPY(JobQueue) +public: + JobQueue() : taskarray(PX_DEBUG_EXP("PxTask*")) {} + void push(PxTask* t) + { + access.lock(); + taskarray.pushBack(t); + access.unlock(); + } + PxTask* popBack() + { + access.lock(); + PxTask* t = NULL; + if (taskarray.size()) + { + t = taskarray.popBack(); + } + access.unlock(); + return t; + } + uint32_t size() + { + return taskarray.size(); + } + bool empty() + { + return taskarray.size() == 0; + } + +private: + shdfnd::Array<PxTask*> taskarray; + shdfnd::Mutex access; +}; + +class EventPool +{ + PX_NOCOPY(EventPool) +public: + EventPool(uint32_t inflags) : flags(inflags), evarray(PX_DEBUG_EXP("CUevent")) {} + void add(CUevent ev) + { + access.lock(); + evarray.pushBack(ev); + access.unlock(); + } + CUevent get() + { + access.lock(); + CUevent ev; + if (evarray.size()) + { + ev = evarray.popBack(); + } + else + { + cuEventCreate(&ev, flags); + } + access.unlock(); + return ev; + } + bool empty() const + { + return evarray.size() == 0; + } + void clear() + { + access.lock(); + for (uint32_t i = 0; i < evarray.size(); i++) + { + cuEventDestroy(evarray[i]); + } + access.unlock(); + } + +private: + uint32_t flags; + shdfnd::Array<CUevent> evarray; + shdfnd::Mutex access; +}; + +class StreamCache +{ +public: + StreamCache() : sarray(PX_DEBUG_EXP("CUstream")), freeIndices(PX_DEBUG_EXP("freeIndices")) + { + } + CUstream get(uint32_t s) + { + PX_ASSERT(s); + return sarray[ s - 1 ]; + } + void push(uint32_t s) + { + freeIndices.pushBack(s); + } + uint32_t popBack() + { + if (freeIndices.size()) + { + return freeIndices.popBack(); + } + else + { + CUstream s; + cuStreamCreate(&s, 0); + sarray.pushBack(s); + return sarray.size(); + } + } + void reset() + { + freeIndices.resize(sarray.size()); + for (uint32_t i = 0 ; i < sarray.size() ; i++) + { + freeIndices[i] = i + 1; + } + } + bool empty() + { + return freeIndices.size() == 0; + } + +private: + shdfnd::Array<CUstream> sarray; + shdfnd::Array<uint32_t> freeIndices; +}; + +class KernelBar +{ +public: + KernelBar() + { + reset(); + } + void reset() + { + start = 0xffffffff; + stop = 0; + } + + uint32_t start; + uint32_t stop; +}; + +const int SIZE_COMPLETION_RING = 1024; + +struct CudaBatch +{ + CUevent blockingEvent; + CUstream blockingStream; // sync on stream instead of event if lsb is zero (faster) + PxBaseTask* continuationTask; +}; + +struct ReadyTask +{ + PxGpuTask* task; + uint32_t iteration; +}; + +class PxGpuWorkerThread : public shdfnd::Thread +{ + PX_NOCOPY(PxGpuWorkerThread) +public: + PxGpuWorkerThread(); + ~PxGpuWorkerThread(); + + void setCudaContext(PxCudaContextManager& ctx); + void emitStartEvent(const char *id); + void emitStopEvent(const char *id); + + /* API to TaskManager */ + void startSimulation(); + void stopSimulation(); + + /* API to GPU tasks */ + void addCompletionPrereq(PxBaseTask& task); + + /* PxGpuTask execution thread */ + void execute(); + void pollSubmitted(shdfnd::Array<ReadyTask> *ready); + void processActiveTasks(); + void flushBatch(CUevent endEvent, CUstream, PxBaseTask* task); + void launchCopyKernel(PxGpuCopyDesc* desc, uint32_t count, CUstream stream); + + /* Blocking wait thread */ + void blockingWaitFunc(); + + StreamCache mCachedStreams; + shdfnd::Array<PxBaseTask*> mCompletionTasks; + JobQueue mSubmittedTaskList; + volatile int mActiveGroups; + shdfnd::Sync mInputReady; + shdfnd::Sync mRecordEventQueued; + PxCudaContextManager* mCtxMgr; + bool mNewTasksSubmitted; + bool mFailureDetected; + + bool mUsingConcurrentStreams; + + CudaBatch mCompletionRing[ SIZE_COMPLETION_RING ]; + volatile int mCompletionRingPush; + volatile int mCompletionRingPop; + + EventPool mCachedBlockingEvents; + EventPool mCachedNonBlockingEvents; + + volatile int mCountActiveScenes; + + uint32_t* mSmStartTimes; + uint32_t mSmClockFreq; + + shdfnd::Array<ReadyTask> mReady[ PxGpuTaskHint::NUM_GPU_TASK_HINTS ]; + + KernelWrangler* mUtilKernelWrapper; + + CUevent mStartEvent; + + shdfnd::Mutex mMutex; +}; + +class BlockingWaitThread : public shdfnd::Thread +{ +public: + BlockingWaitThread(PxGpuWorkerThread& worker) : mWorker(worker) {} + ~BlockingWaitThread() {} + + void execute(); + +protected: + PxGpuWorkerThread& mWorker; + +private: + BlockingWaitThread& operator=(const BlockingWaitThread&); +}; + +#define GD_CHECK_CALL(call) { CUresult ret = call; \ + if( CUDA_SUCCESS != ret ) { mFailureDetected=true; PX_ASSERT(!ret); } } + +} + +#endif // PXTASK_GPUDISPATCHER_H diff --git a/PxShared/src/cudamanager/include/PhysXDeviceSettings.h b/PxShared/src/cudamanager/include/PhysXDeviceSettings.h new file mode 100644 index 0000000..5358915 --- /dev/null +++ b/PxShared/src/cudamanager/include/PhysXDeviceSettings.h @@ -0,0 +1,56 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PXCUDACONTEXTMANAGER_PHYSXDEVICESETTINGS_H +#define PXCUDACONTEXTMANAGER_PHYSXDEVICESETTINGS_H + +#include "task/PxTaskDefine.h" + +#if PX_SUPPORT_GPU_PHYSX + +namespace physx +{ + class PxErrorCallback; + + /** + Helper functions to expose control panel functionality + */ + class PhysXDeviceSettings + { + private: + PhysXDeviceSettings() {} + + public: + static int getSuggestedCudaDeviceOrdinal(PxErrorCallback& errc); + static int isUsingDedicatedGPU(); + static bool isSLIEnabled(void* graphicsDevice); + }; +} + +#endif + +#endif // PXCUDACONTEXTMANAGER_PHYSXDEVICESETTINGS_H diff --git a/PxShared/src/cudamanager/src/BlockingWait.cpp b/PxShared/src/cudamanager/src/BlockingWait.cpp new file mode 100644 index 0000000..fada532 --- /dev/null +++ b/PxShared/src/cudamanager/src/BlockingWait.cpp @@ -0,0 +1,120 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#include "task/PxTaskDefine.h" + +#if PX_SUPPORT_GPU_PHYSX + +#include "task/PxTask.h" +#include "task/PxGpuTask.h" +#include "cudamanager/PxCudaContextManager.h" + +#include "PsString.h" + +#if PX_SUPPORT_PXTASK_PROFILING +#include "foundation/PxFoundation.h" +#include "foundation/PxProfiler.h" +#endif + +#include "GpuDispatcher.h" + +using namespace physx; + +/* Blocking thread / GPU Profiling Event Code */ + +void PxGpuWorkerThread::blockingWaitFunc() +{ + mCtxMgr->acquireContext(); + + while (mCompletionRingPop != mCompletionRingPush) + { + CudaBatch& b = mCompletionRing[ mCompletionRingPop ]; + PxBaseTask* t = b.continuationTask; + + if (!b.blockingEvent) + { + PX_ASSERT(b.continuationTask != 0); + + /* No blocking necessary, just allow continuation task to run */ + } + else if (!mFailureDetected) + { + emitStartEvent("GpuDispatcher.BlockingWaitEvent"); + + if (1 & ~intptr_t(b.blockingStream)) + { + GD_CHECK_CALL(cuStreamSynchronize(b.blockingStream)); + } + else + { + GD_CHECK_CALL(cuEventSynchronize(b.blockingEvent)); + } + + emitStopEvent("GpuDispatcher.BlockingWaitEvent"); + } + + if (b.blockingEvent) + { + mCachedBlockingEvents.add(b.blockingEvent); + } + if (t) + { + t->removeReference(); + } + mCompletionRingPop = (mCompletionRingPop + 1) % SIZE_COMPLETION_RING; + } + + mCtxMgr->releaseContext(); +} + + +/* Blocking wait thread + + All this thread does is block waiting for CUDA Record Events to + be signaled. + */ + +void BlockingWaitThread::execute() +{ + setName("GpuDispatcher.BlockingWait"); + bool running = true; + while (running) + { + mWorker.mRecordEventQueued.wait(); + if (quitIsSignalled()) + { + running = false; + } + + mWorker.mRecordEventQueued.reset(); + mWorker.blockingWaitFunc(); + } + quit(); +} + +#endif + diff --git a/PxShared/src/cudamanager/src/CUDA/UtilKernels.cu b/PxShared/src/cudamanager/src/CUDA/UtilKernels.cu new file mode 100644 index 0000000..3c73364 --- /dev/null +++ b/PxShared/src/cudamanager/src/CUDA/UtilKernels.cu @@ -0,0 +1,164 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#include "cudamanager/PxGpuCopyDesc.h" +#include "foundation/PxSimpleTypes.h" + +#include <cuda.h> + +using namespace physx; + +extern "C" __host__ void initUtilKernels() {} + +extern "C" __global__ +void Saturate( ) +{ + // NOP +} + +__device__ +void performCopy( const physx::PxGpuCopyDesc& desc, uint32_t totalBlocks ) +{ + if( desc.type == physx::PxGpuCopyDesc::DeviceMemset32 ) + { + uint32_t *dest = (uint32_t*) desc.dest; + uint32_t wordCount = desc.bytes >> 2; + size_t word = blockIdx.x * blockDim.x + threadIdx.x; + size_t stride = blockDim.x * totalBlocks; + for( ; word < wordCount ; word += stride ) + dest[ word ] = desc.source; + return; + } + + /* The idea here is to maximize throughput with minimal register and thread counts */ + /* Manually unrolled 4 times, the compiler refuses to do it for me */ + + if( (desc.source & 0x7) != 0 || (desc.dest & 0x7) != 0 || (desc.bytes & 0x7) != 0) + { + /* Input is word aligned */ + + uint32_t *dest = (uint32_t*) desc.dest; + uint32_t *source = (uint32_t*) desc.source; + uint32_t wordCount = desc.bytes >> 2; + size_t word = blockIdx.x * blockDim.x + threadIdx.x; + size_t stride = blockDim.x * totalBlocks; + while( word < wordCount ) + { + uint32_t a0, a1, a2, a3, a4, a5; + a0 = source[ word ]; + if( word + stride < wordCount ) + a1 = source[ word + stride ]; + if( word + stride*2 < wordCount ) + a2 = source[ word + stride*2 ]; + if( word + stride*3 < wordCount ) + a3 = source[ word + stride*3 ]; + if( word + stride*4 < wordCount ) + a4 = source[ word + stride*4 ]; + if( word + stride*5 < wordCount ) + a5 = source[ word + stride*5 ]; + + dest[ word ] = a0; + if( word + stride < wordCount ) + dest[ word + stride ] = a1; + if( word + stride*2 < wordCount ) + dest[ word + stride*2 ] = a2; + if( word + stride*3 < wordCount ) + dest[ word + stride*3 ] = a3; + if( word + stride*4 < wordCount ) + dest[ word + stride*4 ] = a4; + if( word + stride*5 < wordCount ) + dest[ word + stride*5 ] = a5; + + word += stride*6; + } + } + else + { + /* Input is DWord aligned */ + + uint2 *dest = (uint2*) desc.dest; + uint2 *source = (uint2*) desc.source; + uint32_t dwordCount = desc.bytes >> 3; + size_t word = blockIdx.x * blockDim.x + threadIdx.x; + size_t stride = blockDim.x * totalBlocks; + while( word < dwordCount ) + { + uint2 a0, a1, a2, a3, a4, a5; + a0 = source[ word ]; + if( word + stride < dwordCount ) + a1 = source[ word + stride ]; + if( word + stride*2 < dwordCount ) + a2 = source[ word + stride*2 ]; + if( word + stride*3 < dwordCount ) + a3 = source[ word + stride*3 ]; + if( word + stride*4 < dwordCount ) + a4 = source[ word + stride*4 ]; + if( word + stride*5 < dwordCount ) + a5 = source[ word + stride*5 ]; + + dest[ word ] = a0; + if( word + stride < dwordCount ) + dest[ word + stride ] = a1; + if( word + stride*2 < dwordCount ) + dest[ word + stride*2 ] = a2; + if( word + stride*3 < dwordCount ) + dest[ word + stride*3 ] = a3; + if( word + stride*4 < dwordCount ) + dest[ word + stride*4 ] = a4; + if( word + stride*5 < dwordCount ) + dest[ word + stride*5 ] = a5; + + word += stride*6; + } + } + + __threadfence_system(); +} + +extern "C" __global__ +void MemCopyAsync( physx::PxGpuCopyDesc desc ) +{ + performCopy( desc, gridDim.x ); +} + + +extern "C" __global__ +void MemCopyBatchedAsync( physx::PxGpuCopyDesc *desc ) +{ + __shared__ physx::PxGpuCopyDesc sdesc; + + if( threadIdx.x < sizeof(physx::PxGpuCopyDesc) / sizeof(uint32_t) ) + { + uint32_t *dest = (uint32_t*)&sdesc; + uint32_t *source = (uint32_t*)(desc + blockIdx.y); + dest[ threadIdx.x ] = source[ threadIdx.x ]; + __threadfence_block(); + } + __syncthreads(); + + performCopy( sdesc, gridDim.x ); +} diff --git a/PxShared/src/cudamanager/src/CudaContextManager.cpp b/PxShared/src/cudamanager/src/CudaContextManager.cpp new file mode 100644 index 0000000..b5b6efc --- /dev/null +++ b/PxShared/src/cudamanager/src/CudaContextManager.cpp @@ -0,0 +1,988 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2014 NVIDIA Corporation. All rights reserved. + +#include "foundation/PxAssert.h" +#include "foundation/PxErrorCallback.h" +#include "foundation/PxMath.h" +#include "foundation/PxPreprocessor.h" + +#include "cudamanager/PxCudaContextManager.h" +#include "task/PxGpuDispatcher.h" + +#include "CudaMemoryManager.h" +#include "GpuDispatcher.h" +#include "PhysXDeviceSettings.h" + +#include "PsMutex.h" +#include "PsThread.h" +#include "PsUserAllocated.h" +#include "PsString.h" + +#include <cuda.h> + +#if PX_WIN32 || PX_WIN64 + +#ifdef PX_SECURE_LOAD_LIBRARY +#include "nvSecureLoadLibrary.h" +#endif + +#pragma warning (push) +#pragma warning (disable : 4668) //'symbol' is not defined as a preprocessor macro, replacing with '0' for 'directives' +#include <windows.h> +#pragma warning (pop) + +class IDirect3DDevice9; +class IDirect3DResource9; +class IDirect3DVertexBuffer9; +#include <cudad3d9.h> + +class IDXGIAdapter; +class ID3D10Device; +class ID3D10Resource; +#include <cudad3d10.h> + +struct ID3D11Device; +struct ID3D11Resource; +#include <cudad3d11.h> + +#endif // PX_WINDOWS_FAMILY + +#if PX_LINUX +#include <dlfcn.h> +static void* GetProcAddress(void* handle, const char* name) { return dlsym(handle, name); } +#endif + +#include <GL/gl.h> +#include <cudaGL.h> +#include <assert.h> + +#include "foundation/PxErrors.h" +#include "foundation/PxErrorCallback.h" + +#define CU_INIT_UUID +#include "CudaNode3DLowLatencyInterface.h" + +#define ENABLE_DEVICE_INFO_BRINGUP 0 + +#include "GPUProfile.h" + +#if ENABLE_CUDA_DEVICE_RESET +#include "cudaProfiler.h" +#endif + +#if USE_PERFKIT +#pragma warning (push) +#pragma warning (disable : 4099) +#pragma warning (disable : 4191) +#define NVPM_INITGUID +#include <stdio.h> +#include "cuda.h" +#include "../../../../../../../externals/nvPerfKit/4.1.0.14260/inc/NvPmApi.Manager.h" +static NvPmApiManager S_NVPMManager; +extern NvPmApiManager *GetNvPmApiManager() {return &S_NVPMManager;} +const NvPmApi *GetNvPmApi() {return S_NVPMManager.Api();} +NVPMContext hNVPMContext(0); + +void initPerfKit() +{ + //Sync with GPU + cuCtxSynchronize(); + + // Reset counters + uint32_t nCount; + GetNvPmApi()->Sample(hNVPMContext, NULL, &nCount); +} + +void endPerfKit() +{ + //Sync with GPU + cuCtxSynchronize(); + + uint32_t nCount; + GetNvPmApi()->Sample(hNVPMContext, NULL, &nCount); + + uint64_t value; + uint64_t cycle; + + uint64_t sum = 0; + uint64_t maxVal = 0; + char name[512]; + + int nvStatus = 0; + + PX_UNUSED(value); + PX_UNUSED(cycle); + PX_UNUSED(sum); + PX_UNUSED(maxVal); + PX_UNUSED(name); + PX_UNUSED(nvStatus); + + printf("counters:\n"); + +#if COUNT_L2_TO_L1_BYTES + nvStatus |= GetNvPmApi()->GetCounterValueByName(hNVPMContext, "l2_read_bytes", 0, &value, &cycle); + printf("L2->L1 bytes %d\n",value); +#elif COUNT_SM_TO_L1_QUERIES + nvStatus |= GetNvPmApi()->GetCounterValueByName(hNVPMContext, "tex_cache_sector_queries", 0, &value, &cycle); + printf("SM->L1 queries %d\n",value); +#endif + +#if COUNT_INST_EXECUTED || COUNT_STORE_INST_EXECUTED || COUNT_ACTIVE_CYCLES || COUNT_ACTIVE_WARPS + for (int i = 0; i != SM_COUNT; i++) + { +#if COUNT_INST_EXECUTED + sprintf_s(name, 512, "sm_inst_executed_vsm%d", i); +#elif COUNT_STORE_INST_EXECUTED + sprintf_s(name, 512, "sm_inst_executed_global_stores_vsm%d", i); +#elif COUNT_ACTIVE_CYCLES + sprintf_s(name, 512, "sm_active_cycles_vsm%d", i); +#elif COUNT_ACTIVE_WARPS + sprintf_s(name, 512, "sm_active_warps_vsm%d", i); +#endif + nvStatus |= GetNvPmApi()->GetCounterValueByName(hNVPMContext, name, 0, &value, &cycle); + + sum += value; + maxVal = physx::PxMax(maxVal, value); + } +#if COUNT_ACTIVE_CYCLES + printf("sum %I64d\n", sum); +#else + printf("sum %I64d\n", sum); +#endif + + if (!nvStatus) + { + PX_ASSERT(false); + } +#endif +} + +#pragma warning (pop) +#endif + +namespace physx +{ + +#if PX_VC +#pragma warning(disable: 4191) //'operator/operation' : unsafe conversion from 'type of expression' to 'type required' +#endif + +#define MIN_SM_MAJOR_VERSION 2 +#define MIN_SM_MINOR_VERSION 0 + +class CudaCtxMgr : public PxCudaContextManager, public shdfnd::UserAllocated +{ +public: + CudaCtxMgr(const PxCudaContextManagerDesc& desc, PxErrorCallback& errorCallback); + ~CudaCtxMgr(); + + bool safeDelayImport(PxErrorCallback& errorCallback); + CUcontext acquireContext(); + void releaseContext(); + + /* All these methods can be called without acquiring the context */ + + PxCudaMemoryManager* getMemoryManager(); + PxGpuDispatcher* getGpuDispatcher(); + + bool contextIsValid() const; + bool supportsArchSM10() const; // G80 + bool supportsArchSM11() const; // G92 + bool supportsArchSM12() const; + bool supportsArchSM13() const; // GT200 + bool supportsArchSM20() const; // GF100 + bool supportsArchSM30() const; // GK100 + bool supportsArchSM35() const; // GK110 + bool supportsArchSM50() const; // GM100 + bool supportsArchSM52() const; // GM200 + bool isIntegrated() const; // true if GPU is integrated (MCP) part + bool canMapHostMemory() const; // true if GPU map host memory to GPU + int getDriverVersion() const; + size_t getDeviceTotalMemBytes() const; + int getMultiprocessorCount() const; + int getSharedMemPerBlock() const; + int getSharedMemPerMultiprocessor() const; + unsigned int getMaxThreadsPerBlock() const; + unsigned int getClockRate() const; + + const char* getDeviceName() const; + const CUdevprop* getDeviceProperties() const; + + PxCudaInteropMode::Enum getInteropMode() const; + + void setUsingConcurrentStreams(bool); + bool getUsingConcurrentStreams() const; + + bool registerResourceInCudaD3D(CUgraphicsResource& resource, void* resourcePointer, PxCudaInteropRegisterFlags flags); + bool registerResourceInCudaGL(CUgraphicsResource& resource, uint32_t buffer, PxCudaInteropRegisterFlags flags); + bool unregisterResourceInCuda(CUgraphicsResource resource); + + /* + \brief Determine if the user has configured a dedicated PhysX GPU in the NV Control Panel + \returns 1 if there is a dedicated PhysX GPU + \returns 0 if there is NOT a dedicated PhysX GPU + \returns -1 if the routine is not implemented + */ + int usingDedicatedGPU() const; + + void release(); + +private: + + int mSceneCount; + bool mIsValid; + bool mOwnContext; + CUdevice mDevHandle; + CUcontext mCtx; + CudaMemMgr* mMemMgr; + + GpuDispatcherImpl* mDispatcher; + CUetblPhysXInterface* m_physXInterface; + + /* Cached device attributes, so threads can query w/o context */ + int mComputeCapMajor; + int mComputeCapMinor; + int mIsIntegrated; + int mCanMapHost; + int mDriverVersion; + size_t mTotalMemBytes; + int mMultiprocessorCount; + int mMaxThreadsPerBlock; + char mDeviceName[128]; + int mSharedMemPerBlock; + int mSharedMemPerMultiprocessor; + int mClockRate; + PxCudaInteropMode::Enum mInteropMode; + bool mUsingConcurrentStreams; + +#if PX_DEBUG + static uint32_t mManagerRefCount; + static uint32_t mContextRefCountTls; +#endif +}; + +#if PX_DEBUG +uint32_t CudaCtxMgr::mManagerRefCount = 0; +uint32_t CudaCtxMgr::mContextRefCountTls = 0; +#endif + +bool CudaCtxMgr::contextIsValid() const +{ + return mIsValid; +} +bool CudaCtxMgr::supportsArchSM10() const +{ + return mIsValid; +} +bool CudaCtxMgr::supportsArchSM11() const +{ + return mIsValid && (mComputeCapMinor >= 1 || mComputeCapMajor > 1); +} +bool CudaCtxMgr::supportsArchSM12() const +{ + return mIsValid && (mComputeCapMinor >= 2 || mComputeCapMajor > 1); +} +bool CudaCtxMgr::supportsArchSM13() const +{ + return mIsValid && (mComputeCapMinor >= 3 || mComputeCapMajor > 1); +} +bool CudaCtxMgr::supportsArchSM20() const +{ + return mIsValid && mComputeCapMajor >= 2; +} +bool CudaCtxMgr::supportsArchSM30() const +{ + return mIsValid && mComputeCapMajor >= 3; +} +bool CudaCtxMgr::supportsArchSM35() const +{ + return mIsValid && ((mComputeCapMajor > 3) || (mComputeCapMajor == 3 && mComputeCapMinor >= 5)); +} +bool CudaCtxMgr::supportsArchSM50() const +{ + return mIsValid && mComputeCapMajor >= 5; +} +bool CudaCtxMgr::supportsArchSM52() const +{ + return mIsValid && ((mComputeCapMajor > 5) || (mComputeCapMajor == 5 && mComputeCapMinor >= 2)); +} + +bool CudaCtxMgr::isIntegrated() const +{ + return mIsValid && mIsIntegrated; +} +bool CudaCtxMgr::canMapHostMemory() const +{ + return mIsValid && mCanMapHost; +} +int CudaCtxMgr::getDriverVersion() const +{ + return mDriverVersion; +} +size_t CudaCtxMgr::getDeviceTotalMemBytes() const +{ + return mTotalMemBytes; +} +int CudaCtxMgr::getMultiprocessorCount() const +{ + return mMultiprocessorCount; +} +int CudaCtxMgr::getSharedMemPerBlock() const +{ + return mSharedMemPerBlock; +} +int CudaCtxMgr::getSharedMemPerMultiprocessor() const +{ + return mSharedMemPerMultiprocessor; +} +unsigned int CudaCtxMgr::getMaxThreadsPerBlock() const +{ + return (unsigned int)mMaxThreadsPerBlock; +} +unsigned int CudaCtxMgr::getClockRate() const +{ + return (unsigned int)mClockRate; +} + +const char* CudaCtxMgr::getDeviceName() const +{ + if (mIsValid) + { + return mDeviceName; + } + else + { + return "Invalid"; + } +} + +PxCudaInteropMode::Enum CudaCtxMgr::getInteropMode() const +{ + return mInteropMode; +} + +void CudaCtxMgr::setUsingConcurrentStreams(bool value) +{ + mUsingConcurrentStreams = value; +} + +bool CudaCtxMgr::getUsingConcurrentStreams() const +{ + return mUsingConcurrentStreams; +} + +PxCudaMemoryManager* CudaCtxMgr::getMemoryManager() +{ + if (mIsValid) + { + return mMemMgr; + } + else + { + return NULL; + } +} + +PxGpuDispatcher* CudaCtxMgr::getGpuDispatcher() +{ + if (mIsValid) + { + return mDispatcher; + } + else + { + return NULL; + } +} + +int CudaCtxMgr::usingDedicatedGPU() const +{ + if (PxCudaInteropMode::NO_INTEROP == getInteropMode()) + { + return PhysXDeviceSettings::isUsingDedicatedGPU(); + } + else + { + return 0; // not a dedicated GPU + } +} + +#define CUT_SAFE_CALL(call) { CUresult ret = call; \ + if( CUDA_SUCCESS != ret ) { PX_ASSERT(0); } } + +/* If a context is not provided, an ordinal must be given */ +CudaCtxMgr::CudaCtxMgr(const PxCudaContextManagerDesc& desc, PxErrorCallback& errorCallback) + : mSceneCount(0) + , mOwnContext(false) + , mMemMgr(0) + , mDispatcher(0) + , m_physXInterface(0) + , mInteropMode(desc.interopMode) + , mUsingConcurrentStreams(true) +{ + CUresult status; + mIsValid = false; + mDeviceName[0] = 0; + + if (safeDelayImport(errorCallback) == false) + { + // The table where this info is found is here: https://wiki.nvidia.com/nvcompute/index.php/NVCompute#CUDA_Planning + errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "NVIDIA Release 331 graphics driver and above is required for GPU acceleration.", __FILE__, __LINE__); + return; + } + + if (desc.ctx == 0) + { + int flags = CU_CTX_LMEM_RESIZE_TO_MAX | CU_CTX_SCHED_BLOCKING_SYNC | CU_CTX_MAP_HOST; + class FoundationErrorReporter : public PxErrorCallback + { + public: + FoundationErrorReporter(PxErrorCallback& ec) + : errorCallback(&ec) + { + } + + virtual void reportError(PxErrorCode::Enum code, const char* message, const char* file, int line) + { + errorCallback->reportError( code, message, file, line); + } + + PxErrorCallback* errorCallback; + } foundationErrorReporter(errorCallback); + + int devOrdinal = PhysXDeviceSettings::getSuggestedCudaDeviceOrdinal(foundationErrorReporter); + if (devOrdinal < 0) + { + errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "No PhysX capable GPU suggested.", __FILE__, __LINE__); + errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "If you have a PhysX capable GPU, verify that PhysX is not set to CPU in the NVIDIA Control Panel.", __FILE__, __LINE__); + return; + } + + status = cuInit(0); + if (CUDA_SUCCESS != status) + { + errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuInit failed", __FILE__, __LINE__); + return; + } + + // Try to create the context on Node3DLowLatency. + // If that does not work, try to create the cuda context using cuCtxCreatePhysX, + // since we must be on a driver that does not support cuCtxCreateOnNode3DLowLatency. + cuGetExportTable((const void**)&m_physXInterface, (const CUuuid*)&CU_ETID_PhysXInterface); + + // if using a dedicated GPU or SLI we disable D3D interop (which is not supported over multiple GPUs) + // this ensures the users control panel setting is always respected + bool sliEnabled = false; + if (mInteropMode != PxCudaInteropMode::NO_INTEROP && desc.graphicsDevice != NULL) + { + sliEnabled = PhysXDeviceSettings::isSLIEnabled(desc.graphicsDevice) == 1 ? true : false; + } + + if (PhysXDeviceSettings::isUsingDedicatedGPU() == 1 || sliEnabled) + { + if (mInteropMode == PxCudaInteropMode::D3D9_INTEROP || + mInteropMode == PxCudaInteropMode::D3D10_INTEROP || + mInteropMode == PxCudaInteropMode::D3D11_INTEROP) + { + mInteropMode = PxCudaInteropMode::NO_INTEROP; + if (sliEnabled) + { + errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "D3D/CUDA interop cannot be used in tandem with SLI, disabling interop. Query PxCudaContextManager::getInteropMode() for interop status.", + __FILE__,__LINE__); + } + } + } + + if (mInteropMode == PxCudaInteropMode::NO_INTEROP) + { + status = cuDeviceGet(&mDevHandle, devOrdinal); + if (CUDA_SUCCESS != status) + { + errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuDeviceGet failed",__FILE__,__LINE__); + return; + } + + if (m_physXInterface) + status = m_physXInterface->cuCtxCreateOnNode3DLowLatency(&mCtx, (unsigned int)flags, mDevHandle); + else + status = cuCtxCreate(&mCtx, (unsigned int)flags, mDevHandle); + if (CUDA_SUCCESS != status) + { + errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuCtxCreate failed",__FILE__,__LINE__); + return; + } + mOwnContext = true; + } + else if (mInteropMode == PxCudaInteropMode::OGL_INTEROP) + { + status = cuDeviceGet(&mDevHandle, devOrdinal); + if (CUDA_SUCCESS != status) + { + errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuDeviceGet failed",__FILE__,__LINE__); + return; + } + + status = cuGLCtxCreate(&mCtx, (unsigned int)flags, mDevHandle); + if (CUDA_SUCCESS != status) + { + errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuCtxGLCreate failed",__FILE__,__LINE__); + return; + } + + status = cuGLInit(); + if (CUDA_SUCCESS != status) + { + errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuGLInit failed",__FILE__,__LINE__); + return; + } + mOwnContext = true; + } +#if PX_WIN32 || PX_WIN64 + else if (mInteropMode == PxCudaInteropMode::D3D9_INTEROP) + { + status = cuD3D9CtxCreate(&mCtx, &mDevHandle, (unsigned int)flags, + reinterpret_cast<IDirect3DDevice9*>(desc.graphicsDevice)); + + if (CUDA_SUCCESS != status) + { + errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuD3D9CtxCreate failed",__FILE__,__LINE__); + return; + } + mOwnContext = true; + } + else if (mInteropMode == PxCudaInteropMode::D3D10_INTEROP) + { + status = cuD3D10CtxCreate(&mCtx, &mDevHandle, (unsigned int)flags, + reinterpret_cast<ID3D10Device*>(desc.graphicsDevice)); + + if (CUDA_SUCCESS != status) + { + errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuD3D10CtxCreate failed",__FILE__,__LINE__); + return; + } + mOwnContext = true; + } + else if (mInteropMode == PxCudaInteropMode::D3D11_INTEROP) + { + status = cuD3D11CtxCreate(&mCtx, &mDevHandle, (unsigned int)flags, + reinterpret_cast<ID3D11Device*>(desc.graphicsDevice)); + + if (CUDA_SUCCESS != status) + { + errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuD3D11CtxCreate failed",__FILE__,__LINE__); + return; + } + mOwnContext = true; + } +#endif //PX_WIN32 || PX_WIN64 + else + { + errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "Requested interop type is not supported!",__FILE__,__LINE__); + return; + } + } + else + { + mCtx = *desc.ctx; + status = cuCtxGetDevice(&mDevHandle); + if (CUDA_SUCCESS != status) + { + errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuCtxGetDevice failed",__FILE__,__LINE__); + return; + } + } + + // Verify we can at least allocate a CUDA event from this context + CUevent testEvent; + if (CUDA_SUCCESS == cuEventCreate(&testEvent, 0)) + { + cuEventDestroy(testEvent); + } + else + { + errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "CUDA context validation failed",__FILE__,__LINE__); + return; + } + + status = cuDeviceGetName(mDeviceName, sizeof(mDeviceName), mDevHandle); + if (CUDA_SUCCESS != status) + { + errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuDeviceGetName failed",__FILE__,__LINE__); + return; + } + + cuDeviceGetAttribute(&mSharedMemPerBlock, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, mDevHandle); + cuDeviceGetAttribute(&mSharedMemPerMultiprocessor, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR, mDevHandle); + cuDeviceGetAttribute(&mClockRate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, mDevHandle); + cuDeviceGetAttribute(&mComputeCapMajor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, mDevHandle); + cuDeviceGetAttribute(&mComputeCapMinor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, mDevHandle); + cuDeviceGetAttribute(&mIsIntegrated, CU_DEVICE_ATTRIBUTE_INTEGRATED, mDevHandle); + cuDeviceGetAttribute(&mCanMapHost, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, mDevHandle); + cuDeviceGetAttribute(&mMultiprocessorCount, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, mDevHandle); + cuDeviceGetAttribute(&mMaxThreadsPerBlock, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, mDevHandle); + + status = cuDeviceTotalMem((size_t*)&mTotalMemBytes, mDevHandle); + if (CUDA_SUCCESS != status) + { + errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuDeviceTotalMem failed",__FILE__,__LINE__); + return; + } + + // minimum compute capability is MIN_SM_MAJOR_VERSION.MIN_SM_MINOR_VERSION + if ((mComputeCapMajor < MIN_SM_MAJOR_VERSION) || + (mComputeCapMajor == MIN_SM_MAJOR_VERSION && mComputeCapMinor < MIN_SM_MINOR_VERSION)) + { + char buffer[256]; + physx::shdfnd::snprintf(buffer, 256, "Minimum GPU compute capability %d.%d is required", MIN_SM_MAJOR_VERSION, MIN_SM_MINOR_VERSION); + errorCallback.reportError(PxErrorCode::eDEBUG_WARNING,buffer,__FILE__,__LINE__); + return; + } + + mMemMgr = PX_NEW(CudaMemMgr)(*this, errorCallback); + if (mMemMgr == NULL) + { + errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "CudaMemMgr failed: Unable to allocate heaps",__FILE__,__LINE__); + return; + } + + bool succ = true; + for (uint32_t i = 0; i < PxCudaBufferMemorySpace::COUNT; i++) + { + PxCudaBufferType type(PxCudaBufferMemorySpace::Enum(i), PxCudaBufferFlags::F_READ_WRITE); + succ &= mMemMgr->setBaseSize(type, desc.memoryBaseSize[i]); + succ &= mMemMgr->setPageSize(type, desc.memoryPageSize[i]); + succ &= mMemMgr->setMaxMemorySize(type, desc.maxMemorySize[i]); + PX_ASSERT(succ); + if (!succ) + { + errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "CudaMemMgr failed: Invalid memory parameter",__FILE__,__LINE__); + return; + } + } + +#if PX_DEBUG + if(!mManagerRefCount++) + mContextRefCountTls = shdfnd::TlsAlloc(); + if(!shdfnd::TlsGet(mContextRefCountTls)) + CUT_SAFE_CALL(cuCtxSetCurrent(0)); +#endif + + mIsValid = true; + mDispatcher = PX_NEW(GpuDispatcherImpl)(errorCallback, *this); + if (!mDispatcher || mDispatcher->failureDetected()) + { + errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "Failed to create functional GPU dispatcher",__FILE__,__LINE__); + mIsValid = false; + return; + } + + mDispatcher->start(); + +#if ENABLE_DEVICE_INFO_BRINGUP + // Device info (Enable for Amodel and Emulator testing) + errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "Device Name: %s", mDeviceName); + errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "Shared Memory Per Block: %d", mSharedMemPerBlock); + errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "Shared Memory Per Multiprocessor: %d", mSharedMemPerMultiprocessor); + errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "Number of SM: %d", mMultiprocessorCount); + errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "Max Threads Per Block: %d", mMaxThreadsPerBlock); +#endif + +#if USE_PERFKIT + { +#if _WIN64 + wchar_t * dllName = L"..\\..\\..\\..\\..\\externals\\nvPerfKit\\4.1.0.14260\\bin\\win7_x64\\NvPmApi.Core.dll"; +#else + wchar_t * dllName = L"..\\..\\..\\..\\..\\externals\\nvPerfKit\\4.1.0.14260\\bin\\win7_x86\\NvPmApi.Core.dll"; +#endif + + NVPMRESULT nvResult; + + if ((nvResult = GetNvPmApiManager()->Construct(dllName)) != NVPM_OK) + { + printf("perfkit error 1\n"); + return; + } + + if ((nvResult = GetNvPmApi()->Init()) != NVPM_OK) + { + printf("perfkit error 2\n"); + return; + } + + acquireContext(); + + CUcontext ctx; + cuCtxGetCurrent(&ctx); + if ((nvResult = GetNvPmApi()->CreateContextFromCudaContext((APIContextHandle)ctx, &hNVPMContext)) != NVPM_OK) + { + printf("perfkit error 3\n"); + return; // This is an error condition + } + + uint32_t nvStatus = 0; + +#if COUNT_L2_TO_L1_BYTES + nvStatus |= GetNvPmApi()->AddCounterByName(hNVPMContext, "l2_read_bytes"); +#elif COUNT_SM_TO_L1_QUERIES + nvStatus |= GetNvPmApi()->AddCounterByName(hNVPMContext, "tex_cache_sector_queries"); +#endif + +#if COUNT_INST_EXECUTED || COUNT_STORE_INST_EXECUTED || COUNT_ACTIVE_CYCLES || COUNT_ACTIVE_WARPS + char name[512]; + for (int i = 0; i != SM_COUNT; i++) + { +#if COUNT_INST_EXECUTED + sprintf_s(name,512,"sm_inst_executed_vsm%d",i); +#elif COUNT_STORE_INST_EXECUTED + sprintf_s(name, 512, "sm_inst_executed_global_stores_vsm%d",i); +#elif COUNT_ACTIVE_CYCLES + sprintf_s(name, 512, "sm_active_cycles_vsm%d",i); +#elif COUNT_ACTIVE_WARPS + sprintf_s(name, 512, "sm_active_warps_vsm%d",i); +#endif + nvStatus |= GetNvPmApi()->AddCounterByName(hNVPMContext, name); + } +#elif COUNT_GPU_BUSY + nvStatus |= GetNvPmApi()->AddCounterByName(hNVPMContext, "gpu_busy"); +#endif + + if (nvStatus != 0) + { + printf("perfkit error 4\n"); + return; // This is an error condition + } + } +#endif +} + +/* Some driver version mismatches can cause delay import crashes. Load NVCUDA.dll + * manually, verify its version number, then allow delay importing to bind all the + * APIs. + */ +bool CudaCtxMgr::safeDelayImport(PxErrorCallback& errorCallback) +{ +#if PX_WIN32 || PX_WIN64 +#ifdef PX_SECURE_LOAD_LIBRARY + HMODULE hCudaDriver = nvLoadSystemLibrary("nvcuda.dll"); +#else + HMODULE hCudaDriver = LoadLibrary("nvcuda.dll"); +#endif +#elif PX_LINUX + void* hCudaDriver = dlopen("libcuda.so", RTLD_NOW); +#endif + if (!hCudaDriver) + { + errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "nvcuda.dll not found or could not be loaded.", __FILE__, __LINE__); + return false; + } + + typedef CUresult(CUDAAPI * pfnCuDriverGetVersion_t)(int*); + pfnCuDriverGetVersion_t pfnCuDriverGetVersion = (pfnCuDriverGetVersion_t) GetProcAddress(hCudaDriver, "cuDriverGetVersion"); + if (!pfnCuDriverGetVersion) + { + errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "cuDriverGetVersion missing in nvcuda.dll.", __FILE__, __LINE__); + return false; + } + + CUresult status = pfnCuDriverGetVersion(&mDriverVersion); + if (status != CUDA_SUCCESS) + { + errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "Retrieving CUDA driver version failed.", __FILE__, __LINE__); + return false; + } + + /* Let's require a driver version >= to the version we compile against + * Currently, CUDA_VERSION is 6000 or 6.0, but APEX still uses CUDA 5.0 so we can't assert on 6.0 yet. + */ + PX_COMPILE_TIME_ASSERT(5000 <= CUDA_VERSION); + + if (mDriverVersion < CUDA_VERSION) + { + char buffer[256]; + physx::shdfnd::snprintf(buffer, 256, "CUDA driver version is %u, expected at least %u.", mDriverVersion, CUDA_VERSION); + errorCallback.reportError(PxErrorCode::eDEBUG_INFO, buffer, __FILE__,__LINE__); + return false; + } + + /* Now trigger delay import and API binding */ + status = cuDriverGetVersion(&mDriverVersion); + if (status != CUDA_SUCCESS) + { + errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "Failed to bind CUDA API.", __FILE__, __LINE__); + return false; + } + + /* Not strictly necessary, but good practice */ +#if PX_WIN32 | PX_WIN64 + FreeLibrary(hCudaDriver); +#elif PX_LINUX + dlclose(hCudaDriver); +#endif + + + return true; +} + +void CudaCtxMgr::release() +{ + delete this; +} + +CudaCtxMgr::~CudaCtxMgr() +{ + if (mDispatcher) + { + releaseGpuDispatcher(*mDispatcher); + } + if (mMemMgr) + { + delete mMemMgr; + } + if (mOwnContext) + { + CUT_SAFE_CALL(cuCtxDestroy(mCtx)); + } + +#if PX_DEBUG + if(!--mManagerRefCount) + shdfnd::TlsFree(mContextRefCountTls); +#endif + +#if ENABLE_CUDA_DEVICE_RESET + CUT_SAFE_CALL(cuProfilerStop()); +#endif +} + +bool CudaCtxMgr::registerResourceInCudaGL(CUgraphicsResource& resource, uint32_t buffer, PxCudaInteropRegisterFlags flags) +{ + CUresult ret = CUDA_ERROR_UNKNOWN; + + acquireContext(); + + PX_ASSERT(mInteropMode == PxCudaInteropMode::OGL_INTEROP); + + ret = cuGraphicsGLRegisterBuffer(&resource, (GLuint) buffer, uint32_t(flags)); + + releaseContext(); + + return ret == CUDA_SUCCESS; +} + +bool CudaCtxMgr::registerResourceInCudaD3D(CUgraphicsResource& resource, void* resourcePointer, PxCudaInteropRegisterFlags flags) +{ + CUresult ret = CUDA_ERROR_UNKNOWN; +#if PX_WINDOWS_FAMILY + acquireContext(); + + switch (mInteropMode) + { + case PxCudaInteropMode::D3D9_INTEROP: + ret = cuGraphicsD3D9RegisterResource(&resource, (IDirect3DResource9*)resourcePointer, uint32_t(flags)); + break; + case PxCudaInteropMode::D3D10_INTEROP: + ret = cuGraphicsD3D10RegisterResource(&resource, (ID3D10Resource*)resourcePointer, uint32_t(flags)); + break; + case PxCudaInteropMode::D3D11_INTEROP: + ret = cuGraphicsD3D11RegisterResource(&resource, (ID3D11Resource*)resourcePointer, uint32_t(flags)); + break; + case PxCudaInteropMode::NO_INTEROP: + case PxCudaInteropMode::OGL_INTEROP: + case PxCudaInteropMode::COUNT: + default: + PX_ALWAYS_ASSERT_MESSAGE("unexpected state in registerResourceInCuda3D"); + } + + releaseContext(); +#else + PX_UNUSED(resource); + PX_UNUSED(resourcePointer); + PX_UNUSED(flags); +#endif //PX_WINDOWS_FAMILY + return ret == CUDA_SUCCESS; +} + +bool CudaCtxMgr::unregisterResourceInCuda(CUgraphicsResource resource) +{ + CUresult ret = CUDA_ERROR_UNKNOWN; + + acquireContext(); + + ret = cuGraphicsUnregisterResource(resource); + + releaseContext(); + + return ret == CUDA_SUCCESS; +} + +CUcontext CudaCtxMgr::acquireContext() +{ + CUcontext ctx = 0; + CUT_SAFE_CALL(cuCtxGetCurrent(&ctx)); + + if (ctx != mCtx) + { +#if PX_DEBUG + PX_ASSERT(!shdfnd::TlsGet(mContextRefCountTls)); +#endif + CUT_SAFE_CALL(cuCtxSetCurrent(mCtx)); + } + +#if PX_DEBUG + char* refCount = (char*)shdfnd::TlsGet(mContextRefCountTls); + shdfnd::TlsSet(mContextRefCountTls, ++refCount); +#endif + + return mCtx; +} + +void CudaCtxMgr::releaseContext() +{ +#if PX_DEBUG + char* refCount = (char*)shdfnd::TlsGet(mContextRefCountTls); + shdfnd::TlsSet(mContextRefCountTls, --refCount); + // see DE8475 + if(!refCount) + CUT_SAFE_CALL(cuCtxSetCurrent(0)); +#endif +} + +#if PX_SUPPORT_GPU_PHYSX +extern "C" void initUtilKernels(); + +PxCudaContextManager* createCudaContextManager(const PxCudaContextManagerDesc& desc, PxErrorCallback& errorCallback) +{ + //this call is needed to force UtilKernels linkage in case someone links PxCudaContextManager as Static Library! + initUtilKernels(); + + return PX_NEW(CudaCtxMgr)(desc, errorCallback); +} + +#endif + +} // end physx namespace + + diff --git a/PxShared/src/cudamanager/src/CudaKernelWrangler.cpp b/PxShared/src/cudamanager/src/CudaKernelWrangler.cpp new file mode 100644 index 0000000..7579d63 --- /dev/null +++ b/PxShared/src/cudamanager/src/CudaKernelWrangler.cpp @@ -0,0 +1,242 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#include "task/PxGpuDispatcher.h" +#include "cudamanager/PxCudaContextManager.h" + +#include "foundation/PxAssert.h" +#include "foundation/PxErrorCallback.h" + +#include "PsString.h" + +#include "CudaKernelWrangler.h" + +#include <cuda.h> +#include <texture_types.h> + +/** + * Workaround hacks for using nvcc --compiler output object files + * without linking with CUDART. We must implement our own versions + * of these functions that the object files are hard-coded to call into. + */ + +#define CUT_SAFE_CALL(call) { CUresult ret = call; if( CUDA_SUCCESS != ret ) { PX_ASSERT(!ret); } } + +#define MAX_MODULES 64 // Max number of .cu files you will build +static void* gModuleTable[ MAX_MODULES ]; +static int gNumModules = 0; + +#define MAX_FUNCTIONS 256 // Max number of kernel of entry points +typedef struct +{ + int moduleIndex; + const char* functionName; +} cuFuncDesc; +static cuFuncDesc gFunctionTable[ MAX_FUNCTIONS ]; +static int gNumFunctions = 0; + +using namespace physx::shdfnd; +using namespace physx; + +KernelWrangler::KernelWrangler(PxGpuDispatcher& gd, PxErrorCallback& errorCallback, const char** funcNames, uint16_t numFuncs) + : mError(false) + , mCuFunctions(PX_DEBUG_EXP("CuFunctions")) + , mCuModules(PX_DEBUG_EXP("CuModules")) + , mGpuDispatcher(gd) + , mErrorCallback(errorCallback) +{ + PxScopedCudaLock _lock_(*gd.getCudaContextManager()); + + /* Formally load the CUDA modules, get CUmodule handles */ + mCuModules.resize((uint32_t)gNumModules); + for (int i = 0 ; i < gNumModules ; ++i) + { + CUresult ret = cuModuleLoadDataEx(&mCuModules[(uint32_t)i], gModuleTable[i], 0, NULL, NULL); + if (ret != CUDA_SUCCESS && ret != CUDA_ERROR_NO_BINARY_FOR_GPU) + { + mErrorCallback.reportError(PxErrorCode::eINTERNAL_ERROR, "Failed to load CUDA module data.", __FILE__, __LINE__); + mError = true; + return; + } + } + + /* matchup funcNames to CUDA modules, get CUfunction handles */ + mCuFunctions.resize(numFuncs); + mCuFuncModIndex.resize(numFuncs); + for (uint32_t i = 0 ; i < numFuncs ; ++i) + { + for (int j = 0; ; ++j) + { + if(j == gNumFunctions) + { + char buffer[256]; + physx::shdfnd::snprintf(buffer, 256, "Could not find registered CUDA function '%s'.", funcNames[i]); + mErrorCallback.reportError(PxErrorCode::eINTERNAL_ERROR, buffer, __FILE__, __LINE__); + mError = true; + return; + } + + if (!physx::shdfnd::strcmp(gFunctionTable[j].functionName, funcNames[i])) + { + mCuFuncModIndex[i] = (uint16_t)gFunctionTable[j].moduleIndex; + CUresult ret = cuModuleGetFunction(&mCuFunctions[i], mCuModules[mCuFuncModIndex[i]], funcNames[i]); + if (ret != CUDA_SUCCESS) + { + char buffer[256]; + physx::shdfnd::snprintf(buffer, 256, "Could not find CUDA module containing function '%s'.", funcNames[i]); + mErrorCallback.reportError(PxErrorCode::eINTERNAL_ERROR, buffer, __FILE__, __LINE__); + mError = true; + return; + } + break; + } + } + } +} + +KernelWrangler::~KernelWrangler() +{ + if (mCuModules.size()) + { + PxScopedCudaLock _lock_(*mGpuDispatcher.getCudaContextManager()); + + for (uint32_t i = 0 ; i < mCuModules.size() ; i++) + if(mCuModules[i]) + CUT_SAFE_CALL(cuModuleUnload(mCuModules[i])); + } +} + +void const* const* KernelWrangler::getImages() +{ + return gModuleTable; +} + +int KernelWrangler::getNumImages() +{ + return gNumModules; +} + +/* + * These calls are all made _before_ main() during static initialization + * of this DLL. + */ + +#include <driver_types.h> + +#if PX_WINDOWS_FAMILY +#define CUDARTAPI __stdcall +#endif + +struct uint3; +struct dim3; + +extern "C" +void** CUDARTAPI __cudaRegisterFatBinary(void* fatBin) +{ + //HACK to get real fatbin in CUDA 4.0 + struct CUIfatbinStruct + { + int magic; + int version; + void *fatbinArray; + char *fatbinFile; + }; + const CUIfatbinStruct *fatbinStruct = (const CUIfatbinStruct *)fatBin; + if (fatbinStruct->magic == 0x466243B1) + { + fatBin = fatbinStruct->fatbinArray; + } + + if (gNumModules < MAX_MODULES) + { + gModuleTable[ gNumModules ] = fatBin; + return (void**)(size_t) gNumModules++; + } + return NULL; +} + +extern "C" +void CUDARTAPI __cudaUnregisterFatBinary(void** fatCubinHandle) +{ + gModuleTable[(int)(size_t) fatCubinHandle ] = 0; +} + +extern "C" +void CUDARTAPI __cudaRegisterTexture(void**, const struct textureReference*, const void**, const char*, int, int, int) +{ +} + +extern "C" void CUDARTAPI __cudaRegisterVar(void**, char*, char*, const char*, int, int, int, int) +{ +} + + +extern "C" void CUDARTAPI __cudaRegisterShared(void**, void**) +{ +} + +extern "C" +void CUDARTAPI __cudaRegisterFunction(void** fatCubinHandle, const char*, + char*, const char* deviceName, int, uint3*, uint3*, dim3*, dim3*, int*) +{ + if (gNumFunctions < MAX_FUNCTIONS) + { + // We need this association of function to module in order to find textures and globals + gFunctionTable[ gNumFunctions ].moduleIndex = (int)(size_t) fatCubinHandle; + gFunctionTable[ gNumFunctions ].functionName = deviceName; + gNumFunctions++; + } +} + +/* These functions are implemented just to resolve link dependencies */ + +extern "C" +cudaError_t CUDARTAPI cudaLaunch(const char* entry) +{ + PX_UNUSED(entry); + return cudaSuccess; +} + +extern "C" +cudaError_t CUDARTAPI cudaSetupArgument(const void*, size_t, size_t) +{ + return cudaSuccess; +} + +extern "C" +struct cudaChannelFormatDesc CUDARTAPI cudaCreateChannelDesc( + int x, int y, int z, int w, enum cudaChannelFormatKind f) +{ + struct cudaChannelFormatDesc desc; + desc.x = x; + desc.y = y; + desc.z = z; + desc.w = w; + desc.f = f; + return desc; +} + diff --git a/PxShared/src/cudamanager/src/CudaMemoryManager.cpp b/PxShared/src/cudamanager/src/CudaMemoryManager.cpp new file mode 100644 index 0000000..b1c6f94 --- /dev/null +++ b/PxShared/src/cudamanager/src/CudaMemoryManager.cpp @@ -0,0 +1,649 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#include "cudamanager/PxCudaContextManager.h" +#include "foundation/PxMath.h" +#include "foundation/PxMemory.h" + +#include "CudaMemoryManager.h" +#include "HeapManagerRef.h" + +#include <cuda.h> + +#define DEVICE_BASE_SIZE (0) +#define DEVICE_PAGE_SIZE ( 2 * 1024*1024) +#define PINNED_BASE_SIZE (0) +#define PINNED_PAGE_SIZE ( 2 * 1024*1024) +#define WC_BASE_SIZE (0) +#define WC_PAGE_SIZE ( 2 * 1024*1024) +#define MIN_BLOCK_SIZE 2048 + + +#define CMM_DELETE_SINGLE(x) { if(x) delete x; } +#define CMM_DELETE_ARRAY(x) { if(x) delete [] x; } + +using namespace physx; + +CudaMemMgr::CudaMemMgr(PxCudaContextManager& mgr, physx::PxErrorCallback& errorCallback) + : mErrorCallback(errorCallback) + , mBufferPool("mBufferPool", 1024) + , mInitialized(false) + , mMgr(mgr) + , mDebugDisableAllocs(false) +{ + for (uint32_t i = 0; i < PxCudaBufferMemorySpace::COUNT; i++) + { + mHeap[i] = NULL; + mMemoryAllocator[i] = NULL; + mMemoryMaxSize[i] = size_t(-1); + } + + mMemoryBaseSize[PxCudaBufferMemorySpace::T_GPU] = DEVICE_BASE_SIZE; + mMemoryBaseSize[PxCudaBufferMemorySpace::T_PINNED_HOST] = PINNED_BASE_SIZE; + mMemoryBaseSize[PxCudaBufferMemorySpace::T_WRITE_COMBINED] = WC_BASE_SIZE; + mMemoryBaseSize[PxCudaBufferMemorySpace::T_HOST] = 0; + + mMemoryPageSize[PxCudaBufferMemorySpace::T_GPU] = DEVICE_PAGE_SIZE; + mMemoryPageSize[PxCudaBufferMemorySpace::T_PINNED_HOST] = PINNED_PAGE_SIZE; + mMemoryPageSize[PxCudaBufferMemorySpace::T_WRITE_COMBINED] = WC_PAGE_SIZE; + mMemoryPageSize[PxCudaBufferMemorySpace::T_HOST] = PINNED_PAGE_SIZE; +} + + +CudaMemMgr::~CudaMemMgr() +{ + for (uint32_t i = 0; i < PxCudaBufferMemorySpace::COUNT; i++) + { + CMM_DELETE_SINGLE(mHeap[i]); + CMM_DELETE_SINGLE(mMemoryAllocator[i]); + } +} + + +PX_INLINE bool CudaMemMgr::initialize() +{ + if (mInitialized) + { + return true; + } + + for (uint32_t i = 0; i < PxCudaBufferMemorySpace::COUNT; i++) + { + mHeap[i] = PX_NEW(HeapManagerRef)(mErrorCallback, false); + PX_ASSERT(mHeap[i]); + } + + mMemoryAllocator[PxCudaBufferMemorySpace::T_GPU] = PX_NEW(DeviceMemAllocator)(mMgr, mMemoryMaxSize[PxCudaBufferMemorySpace::T_GPU]); + mMemoryAllocator[PxCudaBufferMemorySpace::T_PINNED_HOST] = PX_NEW(PinnedMemAllocator)(mMgr, mMemoryMaxSize[PxCudaBufferMemorySpace::T_PINNED_HOST]); + mMemoryAllocator[PxCudaBufferMemorySpace::T_WRITE_COMBINED] = PX_NEW(WriteCombinedMemAllocator)(mMgr, mMemoryMaxSize[PxCudaBufferMemorySpace::T_WRITE_COMBINED]); + mMemoryAllocator[PxCudaBufferMemorySpace::T_HOST] = PX_NEW(HostMemAllocator)(mMemoryMaxSize[PxCudaBufferMemorySpace::T_HOST]); + + bool succ = true; + for (uint32_t i = 0; i < PxCudaBufferMemorySpace::COUNT; i++) + { + succ &= mHeap[i]->init(mMemoryAllocator[i], mMemoryBaseSize[i], mMemoryPageSize[i], MIN_BLOCK_SIZE); + PX_ASSERT(succ); + } + + for (uint32_t i = 0; i < PxCudaBufferMemorySpace::COUNT; i++) + { + succ &= mHeap[i] && mMemoryAllocator[i]; + } + + if (!succ) + { + for (uint32_t i = 0; i < PxCudaBufferMemorySpace::COUNT; i++) + { + CMM_DELETE_SINGLE(mHeap[i]); + CMM_DELETE_SINGLE(mMemoryAllocator[i]); + } + mInitialized = false; + } + + return mInitialized = succ;; +} + + +bool CudaMemMgr::setPageSize(const PxCudaBufferType& type, size_t size) +{ + shdfnd::Mutex::ScopedLock lock(mMutex); + + if (!mInitialized) + { + mMemoryPageSize[type.memorySpace] = PxMax(mMemoryPageSize[type.memorySpace], size); + return true; + } + else + { + bool ret = mHeap[type.memorySpace]->setPageSize(size); + mMemoryPageSize[type.memorySpace] = ret ? size : mMemoryPageSize[type.memorySpace]; + return ret; + } +} + + +bool CudaMemMgr::setBaseSize(const PxCudaBufferType& type, size_t size) +{ + shdfnd::Mutex::ScopedLock lock(mMutex); + + if (mInitialized || (((size - 1)&size) != 0)) + { + return false; + } + else + { + mMemoryBaseSize[type.memorySpace] = PxMax(mMemoryBaseSize[type.memorySpace], size); + return true; + } +} + + +size_t CudaMemMgr::getBaseSize(const PxCudaBufferType& type) +{ + return mMemoryBaseSize[type.memorySpace]; +} + + +size_t CudaMemMgr::getPageSize(const PxCudaBufferType& type) +{ + return mMemoryPageSize[type.memorySpace]; +} + + +bool CudaMemMgr::setMaxMemorySize(const PxCudaBufferType& type, size_t size) +{ + shdfnd::Mutex::ScopedLock lock(mMutex); + + if (mInitialized) + { + switch (type.memorySpace) + { + case PxCudaBufferMemorySpace::T_GPU: + static_cast<DeviceMemAllocator*>(mMemoryAllocator[type.memorySpace])->setMaxSize(size); + return true; + break; + case PxCudaBufferMemorySpace::T_PINNED_HOST: + static_cast<PinnedMemAllocator*>(mMemoryAllocator[type.memorySpace])->setMaxSize(size); + return true; + break; + case PxCudaBufferMemorySpace::T_HOST: + static_cast<HostMemAllocator*>(mMemoryAllocator[type.memorySpace])->setMaxSize(size); + return true; + break; + case PxCudaBufferMemorySpace::T_WRITE_COMBINED: + static_cast<WriteCombinedMemAllocator*>(mMemoryAllocator[type.memorySpace])->setMaxSize(size); + return true; + break; + case PxCudaBufferMemorySpace::COUNT: + default: + PX_ASSERT(!"unknown memory type"); + break; + } + } + else + { + mMemoryMaxSize[type.memorySpace] = PxMax(mMemoryMaxSize[type.memorySpace], size); + return true; + } + + return false; +} + +size_t CudaMemMgr::getMaxMemorySize(const PxCudaBufferType& type) +{ + if (mInitialized) + { + switch (type.memorySpace) + { + case PxCudaBufferMemorySpace::T_GPU: + return static_cast<DeviceMemAllocator*>(mMemoryAllocator[type.memorySpace])->getMaxSize(); + break; + case PxCudaBufferMemorySpace::T_PINNED_HOST: + return static_cast<PinnedMemAllocator*>(mMemoryAllocator[type.memorySpace])->getMaxSize(); + break; + case PxCudaBufferMemorySpace::T_HOST: + return static_cast<HostMemAllocator*>(mMemoryAllocator[type.memorySpace])->getMaxSize(); + break; + case PxCudaBufferMemorySpace::T_WRITE_COMBINED: + return static_cast<WriteCombinedMemAllocator*>(mMemoryAllocator[type.memorySpace])->getMaxSize(); + break; + case PxCudaBufferMemorySpace::COUNT: + default: + PX_ASSERT(!"unknown memory type"); + break; + } + } + return 0; +} + +bool CudaMemMgr::reserve(const PxCudaBufferType& type, size_t size) +{ + shdfnd::Mutex::ScopedLock lock(mMutex); + + if (!mInitialized) + { + if (!initialize()) + { + return false; + } + } + + return mHeap[type.memorySpace]->reserve(size); +} + + +PxCudaBuffer* CudaMemMgr::alloc(const PxCudaBufferType& type, size_t size, PX_ALLOC_INFO_PARAMS_DEF()) +{ + PxCudaBufferPtr addr = alloc(type.memorySpace, size, PX_ALLOC_INFO_PARAMS_INPUT()); + + shdfnd::Mutex::ScopedLock lock(mMutex); + CudaBuffer* buffer = NULL; + if (addr) + { + buffer = mBufferPool.construct(type); + if (buffer) + { + buffer->init(addr, size, *this, PX_ALLOC_INFO_PARAMS_INPUT()); + } + } + return buffer; +} + +PxCudaBufferPtr CudaMemMgr::alloc(PxCudaBufferMemorySpace::Enum memorySpace, size_t size, PX_ALLOC_INFO_PARAMS_DEF()) +{ + shdfnd::Mutex::ScopedLock lock(mMutex); + + if (!mInitialized) + { + if (!initialize()) + { + return 0; + } + } + + if (mDebugDisableAllocs) + { + return 0; + } + + return reinterpret_cast<PxCudaBufferPtr>(mHeap[memorySpace]->alloc(size, PX_ALLOC_INFO_PARAMS_INPUT())); +} + +bool CudaMemMgr::free(PxCudaBufferMemorySpace::Enum memorySpace, PxCudaBufferPtr addr) +{ + shdfnd::Mutex::ScopedLock lock(mMutex); + + if (!mInitialized) + { + return false; + } + + if (addr) + { + return mHeap[memorySpace]->free((void*)(addr)); + } + else + { + return false; + } +} + +bool CudaMemMgr::realloc(PxCudaBufferMemorySpace::Enum memorySpace, PxCudaBufferPtr addr, size_t size, PX_ALLOC_INFO_PARAMS_DEF()) +{ + shdfnd::Mutex::ScopedLock lock(mMutex); + + if (!mInitialized) + { + return false; + } + + if (!size) + { + return false; + } + + if (mDebugDisableAllocs) + { + return NULL; + } + + bool ret = false; + if (addr) + { + ret = mHeap[memorySpace]->realloc((void*)(addr), size, PX_ALLOC_INFO_PARAMS_INPUT()); + } + + return ret; +} + +void CudaMemMgr::getStats(const PxCudaBufferType& type, PxCudaMemoryManagerStats& outStats) +{ + shdfnd::Mutex::ScopedLock lock(mMutex); + + if (!mInitialized) + { + return; + } + + ApexHeapStats hpStats; + mHeap[type.memorySpace]->getStats(hpStats, HeapStatsFlags::F_BASIC_STATS | HeapStatsFlags::F_ALLOC_ID_STATS); + + outStats.heapSize = hpStats.heapSize; + outStats.totalAllocated = hpStats.totalAllocated; + outStats.maxAllocated = hpStats.maxAllocated; + PxMemCopy(outStats.allocIdStats, hpStats.allocIdStats, sizeof(PxAllocIdStats)*PxAllocId::NUM_IDS); +} + + +bool CudaMemMgr::free(CudaBuffer& buffer) +{ + PxCudaBufferMemorySpace::Enum memSpace = buffer.getTypeFast().memorySpace; + PxCudaBufferPtr addr = buffer.getPtrFast(); + + { + shdfnd::Mutex::ScopedLock lock(mMutex); + mBufferPool.destroy(&buffer); + } + + return free(memSpace, addr); +} + + +bool CudaMemMgr::realloc(CudaBuffer& buffer, size_t size, PX_ALLOC_INFO_PARAMS_DEF()) +{ + return realloc(buffer.getTypeFast().memorySpace, buffer.getPtrFast(), size, PX_ALLOC_INFO_PARAMS_INPUT()); +} + +PxCudaBufferPtr CudaMemMgr::getMappedPinnedPtr(PxCudaBufferPtr hostPtr) +{ + shdfnd::Mutex::ScopedLock lock(mMutex); + void* base = mHeap[PxCudaBufferMemorySpace::T_PINNED_HOST]->findBaseAddress((void*)hostPtr); + if (base) + { + size_t offset = ((PinnedMemAllocator*)mMemoryAllocator[PxCudaBufferMemorySpace::T_PINNED_HOST])->getMappedPinnedOffset(base); + return hostPtr + offset; + } + return 0; +} + +bool CudaBuffer::free() +{ + return mMemManager->free(*this); +} + + +bool CudaBuffer::realloc(size_t size, PX_ALLOC_INFO_PARAMS_DEF()) +{ + return mMemManager->realloc(*this, size, PX_ALLOC_INFO_PARAMS_INPUT()); +} + + +DeviceMemAllocator::DeviceMemAllocator(PxCudaContextManager& mgr, size_t maxSize) + : mMgr(mgr) + , mMaxSize(maxSize) + , mAllocSize(0) +{} + + +DeviceMemAllocator::~DeviceMemAllocator() +{ + PX_ASSERT(mAllocSize == 0); +} + + +void* DeviceMemAllocator::alloc(const size_t size) +{ + if (mAllocSize + size > mMaxSize) + { + return NULL; + } + else + { + PxScopedCudaLock lock(mMgr); + CUdeviceptr dPtr; + CUresult result = cuMemAlloc(&dPtr, uint32_t(size)); + + if (result == CUDA_SUCCESS) + { + mAllocSize += size; + return (void*)(size_t)(dPtr); + } + else + { + PX_ASSERT_WITH_MESSAGE(0, "Failed to allocate device memory."); + return NULL; + } + } +} + + +void DeviceMemAllocator::free(void* addr, const size_t size) +{ + PxScopedCudaLock lock(mMgr); + PX_ASSERT(mAllocSize >= size); + CUresult result = cuMemFree((CUdeviceptr)(size_t)(addr)); + PX_UNUSED(result); + PX_ASSERT(result == CUDA_SUCCESS); + mAllocSize -= size; +} + + +PinnedMemAllocator::PinnedMemAllocator(PxCudaContextManager& mgr, size_t maxSize) + : mMgr(mgr) + , mMaxSize(maxSize) + , mAllocSize(0) +{ +} + + +PinnedMemAllocator::~PinnedMemAllocator() +{ + PX_ASSERT(mAllocSize == 0); +} + + +void* PinnedMemAllocator::alloc(const size_t size) +{ + if (mAllocSize + size > mMaxSize) + { + return NULL; + } + else + { + PxScopedCudaLock lock(mMgr); + void* hPtr; + unsigned int flags = 0; + + if (mMgr.canMapHostMemory()) + { + flags |= CU_MEMHOSTALLOC_DEVICEMAP; + } + CUresult result = cuMemHostAlloc(&hPtr, uint32_t(size), flags); + + if (result == CUDA_SUCCESS) + { + if (hPtr) + { + mAllocSize += size; + } + + if (mMgr.canMapHostMemory()) + { + CUdeviceptr dptr = 0; + cuMemHostGetDevicePointer(&dptr, hPtr, 0); + mMappedPinnedPtrs.insert(hPtr, size_t(dptr)); + } + + return hPtr; + } + else + { + PX_ASSERT_WITH_MESSAGE(0, "Failed to allocate pinned memory."); + return NULL; + } + } +} + + +void PinnedMemAllocator::free(void* addr, const size_t size) +{ + PxScopedCudaLock lock(mMgr); + PX_ASSERT(mAllocSize >= size); + + if (mMgr.canMapHostMemory()) + { + PX_ASSERT(mMappedPinnedPtrs.find(addr)); + mMappedPinnedPtrs.erase(addr); + } + + CUresult result = cuMemFreeHost(addr); + PX_UNUSED(result); + PX_ASSERT(result == CUDA_SUCCESS); + mAllocSize -= size; +} + + +WriteCombinedMemAllocator::WriteCombinedMemAllocator(PxCudaContextManager& mgr, size_t maxSize) + : mMgr(mgr) + , mMaxSize(maxSize) + , mAllocSize(0) +{ + mWcMemSupport = mMgr.getDriverVersion() >= 2020 ? WcMem::SUPPORTED : WcMem::NOT_SUPPORTED; +} + + +WriteCombinedMemAllocator::~WriteCombinedMemAllocator() +{ + PX_ASSERT(mAllocSize == 0); +} + + +bool WriteCombinedMemAllocator::isWcMemSupported() +{ + if (mWcMemSupport == WcMem::SUPPORTED) + { + return true; + } + else + { + PX_ASSERT(mWcMemSupport == WcMem::NOT_SUPPORTED); + return false; + } +} + + +void* WriteCombinedMemAllocator::alloc(const size_t size) +{ + if (mAllocSize + size > mMaxSize) + { + return NULL; + } + else + { + PxScopedCudaLock lock(mMgr); + void* hPtr = NULL; + + unsigned int flags = CU_MEMHOSTALLOC_WRITECOMBINED; + + if (mMgr.canMapHostMemory()) + { + flags |= CU_MEMHOSTALLOC_DEVICEMAP; + } + + bool success = isWcMemSupported() && (cuMemHostAlloc(&hPtr, size, flags) == CUDA_SUCCESS); + if (success) + { + if (hPtr) + { + mAllocSize += size; + } + + return hPtr; + } + else + { + PX_ASSERT_WITH_MESSAGE(0, "Failed to allocate write combined memory."); + return NULL; + } + } +} + + +void WriteCombinedMemAllocator::free(void* addr, const size_t size) +{ + PxScopedCudaLock lock(mMgr); + PX_ASSERT(mAllocSize >= size); + CUresult result = cuMemFreeHost(addr); + PX_ASSERT(result == CUDA_SUCCESS); + PX_UNUSED(result); + mAllocSize -= size; +} + + +HostMemAllocator::HostMemAllocator(size_t maxSize) + : mMaxSize(maxSize) + , mAllocSize(0) +{ +} + + +HostMemAllocator::~HostMemAllocator() +{ + PX_ASSERT(mAllocSize == 0); +} + + +void* HostMemAllocator::alloc(const size_t size) +{ + if (mAllocSize + size > mMaxSize) + { + return NULL; + } + else + { + void* ret = PX_ALLOC(size, "host memory"); + if (ret) + { + mAllocSize += size; + return ret; + } + else + { + PX_ASSERT_WITH_MESSAGE(0, "Failed to allocate host memory."); + return NULL; + } + } +} + + +void HostMemAllocator::free(void* addr, const size_t size) +{ + PX_ASSERT(mAllocSize >= size); + PX_FREE(addr); + mAllocSize -= size; +} + diff --git a/PxShared/src/cudamanager/src/CudaMemoryManager.h b/PxShared/src/cudamanager/src/CudaMemoryManager.h new file mode 100644 index 0000000..071b4ab --- /dev/null +++ b/PxShared/src/cudamanager/src/CudaMemoryManager.h @@ -0,0 +1,297 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PXCUDACONTEXTMANAGER_CUDAMEMORYMANAGER_H +#define PXCUDACONTEXTMANAGER_CUDAMEMORYMANAGER_H + +#include "task/PxTaskDefine.h" +#include "HeapManagerInterface.h" + +#include "PsPool.h" +#include "PsMutex.h" +#include "PsUserAllocated.h" +#include "PsHashMap.h" + +namespace physx +{ + +class CudaBuffer; +class HeapManagerInterface; +class CudaMemMgr; +class PxCudaContextManager; + +class CudaBuffer: public PxCudaBuffer +{ +public: + PX_INLINE CudaBuffer(const PxCudaBufferType& type) + : mType(type) + {} + +// Ni Interface + bool free(); + bool realloc(size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED)); + const PxCudaBufferType& getType() const + { + return getTypeFast(); + } + PxCudaBufferPtr getPtr() const + { + return getPtrFast(); + } + size_t getSize() const + { + return getSizeFast(); + } + PxCudaMemoryManager* getCudaMemoryManager() const + { + return getCudaMemoryManagerFast(); + } +// + PX_INLINE const PxCudaBufferType& getTypeFast() const + { + return mType; + } + PX_INLINE PxCudaBufferPtr getPtrFast() const + { + return mPtr; + } + PX_INLINE void setPtr(PxCudaBufferPtr val) + { + mPtr = val; + } + PX_INLINE size_t getSizeFast() const + { + return mSize; + } + PX_INLINE void setSize(size_t val) + { + mSize = val; + } + PX_INLINE PxCudaMemoryManager* getCudaMemoryManagerFast() const + { + return reinterpret_cast<PxCudaMemoryManager*>(mMemManager); + } + PX_INLINE void init(PxCudaBufferPtr ptr, size_t size, CudaMemMgr& manager, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED)) + { + mPtr = ptr; + mSize = size; + mMemManager = &manager; + mAllocInfo = PxAllocInfo(PX_ALLOC_INFO_PARAMS_INPUT()); + } + + void operator=(const CudaBuffer& in) + { + const_cast<PxCudaBufferType&>(mType) = in.mType; + mPtr = in.mPtr; + mSize = in.mSize; + mMemManager = in.mMemManager; + } + +private: + const PxCudaBufferType mType; + PxCudaBufferPtr mPtr; + size_t mSize; + CudaMemMgr* mMemManager; + PxAllocInfo mAllocInfo; +}; + + +class CudaMemMgr: public PxCudaMemoryManager, public shdfnd::UserAllocated +{ + PX_NOCOPY(CudaMemMgr) +public: + CudaMemMgr(PxCudaContextManager& mMgr, physx::PxErrorCallback& errorCallback); + virtual ~CudaMemMgr(); + + PxCudaBuffer* alloc(const PxCudaBufferType& type, size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED)); + PxCudaBufferPtr alloc(PxCudaBufferMemorySpace::Enum memorySpace, size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED)); + bool free(PxCudaBufferMemorySpace::Enum memorySpace, PxCudaBufferPtr addr); + bool realloc(PxCudaBufferMemorySpace::Enum memorySpace, PxCudaBufferPtr addr, size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED)); + void getStats(const PxCudaBufferType& type, PxCudaMemoryManagerStats& outStats); + bool reserve(const PxCudaBufferType& type, size_t size); + bool setPageSize(const PxCudaBufferType& type, size_t size); + bool setMaxMemorySize(const PxCudaBufferType& type, size_t size); + size_t getBaseSize(const PxCudaBufferType& type); + size_t getPageSize(const PxCudaBufferType& type); + size_t getMaxMemorySize(const PxCudaBufferType& type); + void debugDisableAllocs() + { + mDebugDisableAllocs = true; + } + PxCudaBufferPtr getMappedPinnedPtr(PxCudaBufferPtr hostPtr); + + // internals + bool free(CudaBuffer& buffer); + bool realloc(CudaBuffer& buffer, size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED)); + bool setBaseSize(const PxCudaBufferType& type, size_t size); + +private: + PX_INLINE bool initialize(); + physx::PxErrorCallback& mErrorCallback; + HeapManagerInterface* mHeap[PxCudaBufferMemorySpace::COUNT]; + HeapManagerInterface::Allocator* mMemoryAllocator[PxCudaBufferMemorySpace::COUNT]; + size_t mMemoryBaseSize[PxCudaBufferMemorySpace::COUNT]; + size_t mMemoryPageSize[PxCudaBufferMemorySpace::COUNT]; + size_t mMemoryMaxSize[PxCudaBufferMemorySpace::COUNT]; + shdfnd::Pool<CudaBuffer> mBufferPool; + bool mInitialized; + PxCudaContextManager& mMgr; + shdfnd::Mutex mMutex; + bool mDebugDisableAllocs; +}; + +// TODO, give MemoryAllocator prefix or namespace +class DeviceMemAllocator: public HeapManagerInterface::Allocator, public shdfnd::UserAllocated +{ + PX_NOCOPY(DeviceMemAllocator) +public: + DeviceMemAllocator(PxCudaContextManager& mgr, size_t maxSize); + virtual ~DeviceMemAllocator(); + + virtual void* alloc(const size_t size); + virtual void free(void* addr, const size_t size); + + void setMaxSize(size_t maxSize) + { + mMaxSize = maxSize; + } + size_t getMaxSize() + { + return mMaxSize; + } + +private: + PxCudaContextManager& mMgr; + size_t mMaxSize; + size_t mAllocSize; +}; + + +class PinnedMemAllocator: public HeapManagerInterface::Allocator, public shdfnd::UserAllocated +{ + PX_NOCOPY(PinnedMemAllocator) +public: + PinnedMemAllocator(PxCudaContextManager& mMgr, size_t maxSize); + virtual ~PinnedMemAllocator(); + + virtual void* alloc(const size_t size); + virtual void free(void* addr, const size_t size); + + void setMaxSize(size_t maxSize) + { + mMaxSize = maxSize; + } + size_t getMaxSize() + { + return mMaxSize; + } + + size_t getMappedPinnedOffset(void* base) + { + + PX_ASSERT(base); + const shdfnd::HashMap<void*, size_t>::Entry* entry = mMappedPinnedPtrs.find(base); + PX_ASSERT(entry); + return entry->second - size_t(base); + } + +private: + PxCudaContextManager& mMgr; + size_t mMaxSize; + size_t mAllocSize; + shdfnd::HashMap<void*, size_t> mMappedPinnedPtrs; +}; + + +class HostMemAllocator: public HeapManagerInterface::Allocator, public shdfnd::UserAllocated +{ + PX_NOCOPY(HostMemAllocator) +public: + HostMemAllocator(size_t maxSize); + virtual ~HostMemAllocator(); + + virtual void* alloc(const size_t size); + virtual void free(void* addr, const size_t size); + + void setMaxSize(size_t maxSize) + { + mMaxSize = maxSize; + } + size_t getMaxSize() + { + return mMaxSize; + } + +private: + size_t mMaxSize; + size_t mAllocSize; +}; + + +class WriteCombinedMemAllocator: public HeapManagerInterface::Allocator, public shdfnd::UserAllocated +{ + PX_NOCOPY(WriteCombinedMemAllocator) +public: + WriteCombinedMemAllocator(PxCudaContextManager& mgr, size_t maxSize); + virtual ~WriteCombinedMemAllocator(); + + virtual void* alloc(const size_t size); + virtual void free(void* addr, const size_t size); + + void setMaxSize(size_t maxSize) + { + mMaxSize = maxSize; + } + size_t getMaxSize() + { + return mMaxSize; + } + +private: + struct WcMem + { + enum Enum + { + NOT_CHECKED, + SUPPORTED, + NOT_SUPPORTED + }; + }; + + bool isWcMemSupported(); + +private: + int mCudaOrdinal; + PxCudaContextManager& mMgr; + WcMem::Enum mWcMemSupport; + size_t mMaxSize; + size_t mAllocSize; +}; + +} // end physx namespace + +#endif // PXCUDACONTEXTMANAGER_CUDAMEMORYMANAGER_H diff --git a/PxShared/src/cudamanager/src/CudaNode3DLowLatencyInterface.h b/PxShared/src/cudamanager/src/CudaNode3DLowLatencyInterface.h new file mode 100644 index 0000000..f20d87a --- /dev/null +++ b/PxShared/src/cudamanager/src/CudaNode3DLowLatencyInterface.h @@ -0,0 +1,128 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PXCUDACONTEXTMANAGER_CUDANODE3DLOWLATENCYINTERFACE_H +#define PXCUDACONTEXTMANAGER_CUDANODE3DLOWLATENCYINTERFACE_H + +#include <cuda.h> + +namespace physx +{ +// Utility macros for defining and using UUID values for use with +// the CUDA driver. +// +// CU_INIT_UUID must be #defined in exactly one translation unit +// per linkage unit (i.e. one .c or .cpp file per binary). This +// allows multiple .c and .cpp files to include headers that define +// UUIDs using CU_DEFINE_UUID: The translation unit that #defines +// CU_INIT_UUID will define and initialize the UUIDs, and all other +// translation units will link to that definition. + +// Define helper macro: CU_INIT_EXTERN_CONST +// In C, global consts have external linkage by default. In C++, +// global consts have internal linkage by default, and require the +// "extern" storage class specifier to have external linkage. C++ +// allows using "extern" with initializers, but it is illegal in C. +// Thus, there is no common syntax for C and C++ to declare and +// initialize global constants with external linkage. This macro +// helps reduce duplication of other macros by factoring out the +// C/C++ discrepancy. +#ifdef __cplusplus +#define CU_INIT_EXTERN_CONST extern const +#else +#define CU_INIT_EXTERN_CONST const +#endif + +// Define macro CU_DEFINE_UUID. The parameters are the commonly +// used "int-short-short-char[8]" style, which can be generated by +// Microsoft's guidgen.exe tool, Visual Studio's "Create GUID" +// option in the Tools menu (select style #2), and many web-based +// UUID generator tools. Here's an example of what "Create GUID" +// style #2 generates: +// +// DEFINE_GUID( <<name>>, +// 0x2446054, 0xbb8e, 0x4b2f, 0x8b, 0xfc, 0xa4, 0xfe, 0x44, 0x9, 0x8f, 0xb8); +// +// So to use one of these with CUDA, just change the macro to +// CU_DEFINE_UUID and choose a symbol name. For example: +// +// CU_DEFINE_UUID( MyUuid, +// 0x2446054, 0xbb8e, 0x4b2f, 0x8b, 0xfc, 0xa4, 0xfe, 0x44, 0x9, 0x8f, 0xb8); +// +#if defined(CU_INIT_UUID) +#define CU_CHAR(x) (char)(unsigned char)((x) & 0xff) +// Define the symbol as exportable to other translation units, and +// initialize the value. Inner set of parens is necessary because +// "bytes" array needs parens within the struct initializer, which +// also needs parens. +#define CU_DEFINE_UUID(name, a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ + CU_INIT_EXTERN_CONST CUuuid name = \ + { \ + { \ + CU_CHAR(a), CU_CHAR((a) >> 8), CU_CHAR((a) >> 16), CU_CHAR((a) >> 24), \ + CU_CHAR(b), CU_CHAR((b) >> 8), \ + CU_CHAR(c), CU_CHAR((c) >> 8), \ + CU_CHAR(d0), \ + CU_CHAR(d1), \ + CU_CHAR(d2), \ + CU_CHAR(d3), \ + CU_CHAR(d4), \ + CU_CHAR(d5), \ + CU_CHAR(d6), \ + CU_CHAR(d7) \ + } \ + } +#else +// Declare the symbol to be imported from another translation unit. +#define CU_DEFINE_UUID(name, a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ + extern const CUuuid name +#endif + +//------------------------------------------------------------------ +// Cuda Private API Interfaces for PhysX +//------------------------------------------------------------------ + +// This provides backdoor interfaces used by PhysX +CU_DEFINE_UUID(CU_ETID_PhysXInterface, 0x8c0ba50c, 0x0410, 0x9a92, 0x89, 0xa7, 0xd0, 0xdf, 0x10, 0xe7, 0x72, 0x86); + +typedef struct CUetblPhysXInterface_st +{ + /* Size of this structure */ + size_t size; + + /* Create a new CUDA context on Node3dLowLatency. + * - will usually it will just call cuCtxCreateOnNode3DLowLatency. + */ + CUresult (CUDAAPI *cuCtxCreateOnNode3DLowLatency)( + CUcontext *pctx, + unsigned int flags, + CUdevice dev); + +} CUetblPhysXInterface; +} + +#endif // PXCUDACONTEXTMANAGER_CUDANODE3DLOWLATENCYINTERFACE_H diff --git a/PxShared/src/cudamanager/src/GpuDispatcher.cpp b/PxShared/src/cudamanager/src/GpuDispatcher.cpp new file mode 100644 index 0000000..0d05a97 --- /dev/null +++ b/PxShared/src/cudamanager/src/GpuDispatcher.cpp @@ -0,0 +1,942 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#include "task/PxTaskDefine.h" + +#if PX_SUPPORT_GPU_PHYSX + +#include "task/PxGpuDispatcher.h" +#include "task/PxCpuDispatcher.h" +#include "cudamanager/PxCudaContextManager.h" +#include "cudamanager/PxGpuCopyDesc.h" + +#include "CudaKernelWrangler.h" +#include "GpuDispatcher.h" + +#if PX_SUPPORT_PXTASK_PROFILING +#include "foundation/PxProfiler.h" +#endif + +#include "PsArray.h" +#include "PsSync.h" +#include "PsInlineArray.h" +#include "PsUserAllocated.h" +#include "PsAtomic.h" + +#if PX_VC +#pragma warning(disable: 4062) //enumerator 'identifier' in switch of enum 'enumeration' is not handled +#endif + +using namespace physx; + +// hack to run CUDA calls in a single thread +// used to capture CUDA APIC traces +// #define SINGLE_CUDA_THREAD 1 + +/* Kernels in UtilsKernels.cu */ +const char* UtilKernelNames[] = +{ + "Saturate", + "MemCopyAsync", + "MemCopyBatchedAsync", +}; +enum +{ + KERNEL_SATURATE, + KERNEL_MEMCOPY, + KERNEL_MEMCOPY_BATCHED +}; + +/* ==================== LaunchTask =========================== */ + +/** +\brief A task that maintains a list of dependent tasks. + +This task maintains a list of dependent tasks that have their reference counts +reduced on completion of the task. + +The refcount is incremented every time a dependent task is added. +*/ +class physx::FanoutTask : public PxBaseTask, public shdfnd::UserAllocated +{ + PX_NOCOPY(FanoutTask) +public: + FanoutTask(const char* name) : mRefCount(0), mName(name), mNotifySubmission(false) {} + + virtual void run() {} + virtual const char* getName(void) const { return mName; } + + virtual void removeReference() + { + shdfnd::Mutex::ScopedLock lock(mMutex); + if (!shdfnd::atomicDecrement(&mRefCount)) + { + // prevents access to mReferencesToRemove until release + shdfnd::atomicIncrement(&mRefCount); + mNotifySubmission = false; + PX_ASSERT(mReferencesToRemove.empty()); + // save continuation and dependents + for (uint32_t i = 0; i < mDependents.size(); i++) + mReferencesToRemove.pushBack(mDependents[i]); + mDependents.clear(); + mTm->getCpuDispatcher()->submitTask(*this); + } + } + + /** + \brief Increases reference count + */ + virtual void addReference() + { + shdfnd::Mutex::ScopedLock lock(mMutex); + shdfnd::atomicIncrement(&mRefCount); + mNotifySubmission = true; + } + + /** + \brief Return the ref-count for this task + */ + PX_INLINE int32_t getReference() const + { + return mRefCount; + } + + /** + Adds a dependent task. It also sets the task manager querying it from the dependent task. + The refcount is incremented every time a dependent task is added. + */ + void addDependent(PxBaseTask& dependent) + { + shdfnd::Mutex::ScopedLock lock(mMutex); + shdfnd::atomicIncrement(&mRefCount); + mTm = dependent.getTaskManager(); + mDependents.pushBack(&dependent); + dependent.addReference(); + mNotifySubmission = true; + } + + /** + Reduces reference counts of the continuation task and the dependent tasks, also + clearing the copy of continuation and dependents task list. + */ + virtual void release() + { + shdfnd::InlineArray<PxBaseTask*, 10> referencesToRemove; + + { + shdfnd::Mutex::ScopedLock lock(mMutex); + + const uint32_t contCount = mReferencesToRemove.size(); + referencesToRemove.reserve(contCount); + for (uint32_t i=0; i < contCount; ++i) + referencesToRemove.pushBack(mReferencesToRemove[i]); + + mReferencesToRemove.clear(); + // allow access to mReferencesToRemove again + if (mNotifySubmission) + { + removeReference(); + } + else + { + physx::shdfnd::atomicDecrement(&mRefCount); + } + + // the scoped lock needs to get freed before the continuation tasks get (potentially) submitted because + // those continuation tasks might trigger events that delete this task and corrupt the memory of the + // mutex (for example, assume this task is a member of the scene then the submitted tasks cause the simulation + // to finish and then the scene gets released which in turn will delete this task. When this task then finally + // continues the heap memory will be corrupted. + } + + for (uint32_t i=0; i < referencesToRemove.size(); ++i) + referencesToRemove[i]->removeReference(); + } + +private: + friend class LaunchTask; + + volatile int32_t mRefCount; + const char* mName; + shdfnd::InlineArray<PxBaseTask*, 4> mDependents; + shdfnd::InlineArray<PxBaseTask*, 4> mReferencesToRemove; + bool mNotifySubmission; + shdfnd::Mutex mMutex; // guarding mDependents and mNotifySubmission +}; + +class physx::LaunchTask : public physx::FanoutTask +{ +public: + LaunchTask() : FanoutTask("GpuDispatcher.launch"), mIsBatchStarted(false) + { + } + + virtual void run() + { + if (mReferencesToRemove.size() >= 1) + mIsBatchStarted = true; + } + + bool mIsBatchStarted; +}; + +class physx::BlockTask : public PxLightCpuTask, public shdfnd::UserAllocated +{ + PX_NOCOPY(BlockTask) +public: + BlockTask(PxGpuWorkerThread* dispatcher, physx::LaunchTask* launchTask) + : mDispatcher(dispatcher), mLaunchTask(launchTask), mSyncTask(NULL) + {} + + virtual const char* getName(void) const + { + return "GpuDispatcher.block"; + } + + virtual void removeReference() + { + shdfnd::Mutex::ScopedLock lock(mMutex); + if (!physx::shdfnd::atomicDecrement(&mRefCount)) + { + // prevents access to mSyncTask until release + physx::shdfnd::atomicIncrement(&mRefCount); + PX_ASSERT(!mSyncTask); + shdfnd::swap(mSyncTask, mCont); + mTm->getCpuDispatcher()->submitTask(*this); + } + } + + virtual void run() + { + if (mLaunchTask->mIsBatchStarted) + { + mDispatcher->mCtxMgr->acquireContext(); + CUevent stopEv = mDispatcher->mCachedBlockingEvents.get(); + CUstream stream = (CUstream)0; + mSyncTask->addReference(); + mDispatcher->flushBatch(stopEv, stream, mSyncTask); + mDispatcher->mCtxMgr->releaseContext(); + mLaunchTask->mIsBatchStarted = false; + } + } + + virtual void setContinuation(PxBaseTask* continuation) + { + // this function is called multiple times, skip after first call + shdfnd::Mutex::ScopedLock lock(mMutex); + physx::shdfnd::atomicIncrement(&mRefCount); + if (!mCont) + { + mCont = continuation; + mTm = mCont->getTaskManager(); + mCont->addReference(); + } + } + + virtual void release() + { + shdfnd::Mutex::ScopedLock lock(mMutex); + mSyncTask->removeReference(); + mSyncTask = NULL; + // allow access to mSyncTask again + if (mCont) + { + removeReference(); + } + else + { + physx::shdfnd::atomicDecrement(&mRefCount); + } + } + + PxGpuWorkerThread* mDispatcher; + physx::LaunchTask* mLaunchTask; + PxBaseTask* mSyncTask; + shdfnd::Mutex mMutex; // guarding mCont +}; + +/* ==================== API functions =========================== */ + +void physx::releaseGpuDispatcher(PxGpuDispatcher& gd) +{ + GpuDispatcherImpl* impl = (GpuDispatcherImpl*) &gd; + delete impl; +} + +PxCudaContextManager* GpuDispatcherImpl::getCudaContextManager() +{ + return mDispatcher->mCtxMgr; +} + +GpuDispatcherImpl::GpuDispatcherImpl(PxErrorCallback& errorCallback, PxCudaContextManager& ctx) : + mDispatcher(NULL), + mBlockingThread(NULL), + mLaunchTask(NULL), + mBlockTask(NULL), + mSyncTask(NULL) +{ + mDispatcher = PX_NEW(PxGpuWorkerThread); + if (!mDispatcher) + { + forceFailureMode(); + return; + } + + mDispatcher->setCudaContext(ctx); + + mDispatcher->mUtilKernelWrapper = PX_NEW(KernelWrangler)(*this, errorCallback, UtilKernelNames, sizeof(UtilKernelNames) / sizeof(char*)); + if (!mDispatcher->mUtilKernelWrapper || mDispatcher->mUtilKernelWrapper->hadError()) + { + forceFailureMode(); + return; + } + + mLaunchTask = PX_NEW(LaunchTask); + mBlockTask = PX_NEW(BlockTask)(mDispatcher, mLaunchTask); + mSyncTask = PX_NEW(FanoutTask)("GpuDispatcher.sync"); +} + +GpuDispatcherImpl::~GpuDispatcherImpl() +{ + if (mBlockingThread) + { + mBlockingThread->signalQuit(); + PX_ASSERT(mDispatcher); + mDispatcher->mRecordEventQueued.set(); + mBlockingThread->waitForQuit(); + delete mBlockingThread; + } + + if (mDispatcher) + { + mDispatcher->signalQuit(); + mDispatcher->mInputReady.set(); + mDispatcher->waitForQuit(); + delete mDispatcher; + } + + if (mLaunchTask) + PX_DELETE(mLaunchTask); + + if (mBlockTask) + PX_DELETE(mBlockTask); + + if (mSyncTask) + PX_DELETE(mSyncTask); +} + +void GpuDispatcherImpl::start() +{ +#ifndef SINGLE_CUDA_THREAD + mDispatcher->start(shdfnd::Thread::getDefaultStackSize()); +#else + mDispatcher->execute(); +#endif + + mBlockingThread = PX_NEW(BlockingWaitThread)(*mDispatcher); +#ifndef SINGLE_CUDA_THREAD + mBlockingThread->start(shdfnd::Thread::getDefaultStackSize()); +#endif +} + +void GpuDispatcherImpl::startSimulation() +{ + mDispatcher->startSimulation(); +} + +void GpuDispatcherImpl::stopSimulation() +{ + mDispatcher->stopSimulation(); +} + +void GpuDispatcherImpl::startGroup() +{ + shdfnd::atomicIncrement(&mDispatcher->mActiveGroups); +} + +void GpuDispatcherImpl::submitTask(PxTask& task) +{ + mDispatcher->mSubmittedTaskList.push(&task); +} + +void GpuDispatcherImpl::finishGroup() +{ + if (!shdfnd::atomicDecrement(&mDispatcher->mActiveGroups)) + { +#ifdef SINGLE_CUDA_THREAD + mDispatcher->mCtxMgr->acquireContext(); + mDispatcher->processActiveTasks(); + mDispatcher->mCtxMgr->releaseContext(); + mDispatcher->blockingWaitFunc(); +#endif + mDispatcher->mInputReady.set(); + } +} + +void GpuDispatcherImpl::addCompletionPrereq(PxBaseTask& task) +{ + mDispatcher->addCompletionPrereq(task); +} + +bool GpuDispatcherImpl::failureDetected() const +{ + return mDispatcher->mFailureDetected; +} + +void GpuDispatcherImpl::forceFailureMode() +{ + mDispatcher->mFailureDetected = true; +} + +void GpuDispatcherImpl::launchCopyKernel(PxGpuCopyDesc* desc, uint32_t count, CUstream stream) +{ + mDispatcher->launchCopyKernel(desc, count, stream); +} + +PxBaseTask& GpuDispatcherImpl::getPreLaunchTask() +{ + PX_ASSERT(mLaunchTask); + return *mLaunchTask; +} + +void GpuDispatcherImpl::addPreLaunchDependent(PxBaseTask& dependent) +{ + PX_ASSERT(mLaunchTask); + mLaunchTask->addDependent(dependent); +} + +PxBaseTask& GpuDispatcherImpl::getPostLaunchTask() +{ + PX_ASSERT(mBlockTask); + return *mBlockTask; +} + +void GpuDispatcherImpl::addPostLaunchDependent(PxBaseTask& dependent) +{ + PX_ASSERT(mSyncTask && mBlockTask); + mSyncTask->addDependent(dependent); + mBlockTask->setContinuation(mSyncTask); + mSyncTask->removeReference(); +} + +/* ==================== Worker Thread =========================== */ + +PxGpuWorkerThread::PxGpuWorkerThread() + : mActiveGroups(0) + , mCtxMgr(NULL) + , mFailureDetected(false) + , mCompletionRingPush(0) + , mCompletionRingPop(0) + , mCachedBlockingEvents(CU_EVENT_BLOCKING_SYNC) + , mCachedNonBlockingEvents(0) + , mCountActiveScenes(0) + , mSmStartTimes(0) + , mUtilKernelWrapper(0) +{ +} + +void PxGpuWorkerThread::setCudaContext(PxCudaContextManager& ctx) +{ + mCtxMgr = &ctx; +} + +PxGpuWorkerThread::~PxGpuWorkerThread() +{ + mCtxMgr->acquireContext(); + mCachedBlockingEvents.clear(); + mCachedNonBlockingEvents.clear(); + mCachedStreams.reset(); + while (!mCachedStreams.empty()) + { + GD_CHECK_CALL(cuStreamDestroy(mCachedStreams.get(mCachedStreams.popBack()))); + } + mCtxMgr->releaseContext(); + + if (mSmStartTimes) + { + PX_FREE(mSmStartTimes); + } + if (mUtilKernelWrapper) + { + // will acquire the context for itself + PX_DELETE(mUtilKernelWrapper); + } +} + +void PxGpuWorkerThread::emitStartEvent(const char *id) +{ + PX_UNUSED(id); +#if PX_SUPPORT_PXTASK_PROFILING + PX_PROFILE_START_CROSSTHREAD(id,0); +#endif +} + +void PxGpuWorkerThread::emitStopEvent(const char *id) +{ + PX_UNUSED(id); +#if PX_SUPPORT_PXTASK_PROFILING + PX_PROFILE_STOP_CROSSTHREAD(id,0); +#endif +} + +/* A TaskManager is informing us that its simulation is being stepped */ +void PxGpuWorkerThread::startSimulation() +{ + mUsingConcurrentStreams = mCtxMgr->getUsingConcurrentStreams(); + + shdfnd::atomicIncrement(&mCountActiveScenes); +} + + +/* A TaskManager is informing us that its simulation has ended */ +void PxGpuWorkerThread::stopSimulation() +{ + if (shdfnd::atomicDecrement(&mCountActiveScenes) == 0) + mCachedStreams.reset(); +} + + +/* PxGpuDispatcher worker thread main loop */ +void PxGpuWorkerThread::execute() +{ + setName("GpuDispatcher.Worker"); + +#ifndef SINGLE_CUDA_THREAD + bool running = true; + while (running) + { + mInputReady.wait(); + + if (quitIsSignalled()) + { + break; + } + + if (!mSubmittedTaskList.empty()) + { + mCtxMgr->acquireContext(); + processActiveTasks(); + mCtxMgr->releaseContext(); + } + } + + quit(); +#endif +} + +/* Expected to be called by a GPU task, or a function called by a GPU + * task. The context is important because I think it does not require + * any locking since only one GPU task's launchInstance() function will + * be running at a time, per GpuDispatcherImpl (per CUDA context). + */ +void PxGpuWorkerThread::addCompletionPrereq(PxBaseTask& task) +{ + if (mFailureDetected) + { + return; + } + + emitStartEvent("GpuDispatcher.AddCompletionEvent"); + task.addReference(); + mCompletionTasks.pushBack(&task); + emitStopEvent("GpuDispatcher.AddCompletionEvent"); +} + +namespace +{ + template <typename T0> + PX_NOINLINE CUresult launchKernelGrid(CUfunction func, unsigned int gridWidth, unsigned int gridHeight, unsigned int numThreads, unsigned int sharedMem, CUstream stream, T0 v0) + { + void* kernelParams[] = + { + &v0, + }; + return cuLaunchKernel(func, gridWidth, gridHeight, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); + } +} + +void PxGpuWorkerThread::launchCopyKernel(PxGpuCopyDesc* desc, uint32_t count, CUstream stream) +{ + if (!mCtxMgr->canMapHostMemory()) + { + for (uint32_t i = 0 ; i < count ; i++) + { + PX_ASSERT(desc->isValid()); + switch (desc->type) + { + case PxGpuCopyDesc::DeviceMemset32: + GD_CHECK_CALL(cuMemsetD32Async(desc->dest, (uint32_t) desc->source, desc->bytes >> 2, stream)); + break; + case PxGpuCopyDesc::DeviceToDevice: + GD_CHECK_CALL(cuMemcpyDtoDAsync(desc->dest, desc->source, desc->bytes, stream)); + break; + case PxGpuCopyDesc::DeviceToHost: + GD_CHECK_CALL(cuMemcpyDtoHAsync((void*) desc->dest, desc->source, desc->bytes, stream)); + break; + case PxGpuCopyDesc::HostToDevice: + GD_CHECK_CALL(cuMemcpyHtoDAsync(desc->dest, (void*) desc->source, desc->bytes, stream)); + break; + } + + desc++; + } + } + else if (count == 1) + { + CUfunction func = mUtilKernelWrapper->getCuFunction(KERNEL_MEMCOPY); + uint32_t smCount = (uint32_t)mCtxMgr->getMultiprocessorCount(); + + PX_ASSERT(desc->isValid()); + + CUdeviceptr dptr; + switch (desc->type) + { + case PxGpuCopyDesc::DeviceToHost: + dptr = mCtxMgr->getMemoryManager()->getMappedPinnedPtr(PxCudaBufferPtr(desc->dest)); + desc->dest = dptr; + break; + case PxGpuCopyDesc::HostToDevice: + dptr = mCtxMgr->getMemoryManager()->getMappedPinnedPtr(PxCudaBufferPtr(desc->source)); + desc->source = dptr; + break; + case PxGpuCopyDesc::DeviceMemset32: + case PxGpuCopyDesc::DeviceToDevice: + //do nothing, cases are here for GCCs warning system + break; + } + + uint32_t numThreads; + if (mCtxMgr->supportsArchSM20()) + { + numThreads = 256; + } + else + { + numThreads = 128; + } + uint32_t blocks = uint32_t(desc->bytes / (numThreads * 4 * 6)); + if (blocks == 0) + { + blocks = 1; + } + if (blocks > smCount) + { + blocks = smCount; + } + + GD_CHECK_CALL( + launchKernel(func, blocks, numThreads, 0, stream, *desc) + ); + } + else + { + CUfunction func = mUtilKernelWrapper->getCuFunction(KERNEL_MEMCOPY_BATCHED); + CUdeviceptr dptr; + + for (uint32_t i = 0 ; i < count ; i++) + { + PX_ASSERT(desc[i].isValid()); + + switch (desc[i].type) + { + case PxGpuCopyDesc::DeviceToHost: + dptr = mCtxMgr->getMemoryManager()->getMappedPinnedPtr(PxCudaBufferPtr(desc[i].dest)); + desc[i].dest = dptr; + break; + case PxGpuCopyDesc::HostToDevice: + dptr = mCtxMgr->getMemoryManager()->getMappedPinnedPtr(PxCudaBufferPtr(desc[i].source)); + desc[i].source = dptr; + break; + case PxGpuCopyDesc::DeviceMemset32: + case PxGpuCopyDesc::DeviceToDevice: + //do nothing, cases are here for GCCs warning system + break; + } + } + + uint32_t numThreads, numBlocksX; + if (mCtxMgr->supportsArchSM20()) + { + numThreads = 256; + numBlocksX = 1; + } + else + { + numThreads = 128; + numBlocksX = 2; + } + + dptr = mCtxMgr->getMemoryManager()->getMappedPinnedPtr(PxCudaBufferPtr(desc)); + + GD_CHECK_CALL( + launchKernelGrid(func, numBlocksX, count, numThreads, 0, stream, + dptr) + ); + } +} + +void PxGpuWorkerThread::flushBatch(CUevent endEvent, CUstream syncStream, PxBaseTask* task) +{ + shdfnd::Mutex::ScopedLock lock(mMutex); + + if (mFailureDetected) + { + return; + } + + if (endEvent && (1 & intptr_t(syncStream))) + { + // blocking record event on stream 0, flushes current push buffer + GD_CHECK_CALL(cuEventRecord(endEvent, 0)); + } else { + CUresult ret = cuStreamQuery(0); // flushes current push buffer + PX_ASSERT(ret == CUDA_SUCCESS || ret == CUDA_ERROR_NOT_READY); + PX_UNUSED(ret); + } + + int next = (mCompletionRingPush + 1) % SIZE_COMPLETION_RING; + while (next == mCompletionRingPop) + { + // lordy, I hope we never get here. + PX_ALWAYS_ASSERT(); + mCtxMgr->releaseContext(); + sleep(1); + mCtxMgr->acquireContext(); + } + + CudaBatch& b = mCompletionRing[ mCompletionRingPush ]; + b.blockingEvent = endEvent; + b.blockingStream = syncStream; + b.continuationTask = task; + + mCompletionRingPush = next; + mRecordEventQueued.set(); +} + +// Take any submitted tasks into its appropriate read list +void PxGpuWorkerThread::pollSubmitted(shdfnd::Array<ReadyTask>* ready) +{ + mInputReady.reset(); + PxGpuTask* gt; + while ((gt = (PxGpuTask*) mSubmittedTaskList.popBack()) != 0) + { + PxGpuTaskHint::Enum type = gt->getTaskHint(); + ReadyTask* r = &ready[ type ].insert(); + r->task = gt; + r->iteration = 0; + if (mUsingConcurrentStreams && gt->mStreamIndex == 0) + { + gt->mStreamIndex = mCachedStreams.popBack(); + } + } +} + +void PxGpuWorkerThread::processActiveTasks() +{ + emitStartEvent("GpuDispatcher.ProcessTasksEvent"); + + if (mFailureDetected) + { + while (!mSubmittedTaskList.empty()) + { + mInputReady.reset(); + mSubmittedTaskList.popBack()->release(); + } + emitStopEvent("GpuDispatcher.ProcessTasksEvent"); + return; + } + + for (uint32_t i = 0; i < PxGpuTaskHint::NUM_GPU_TASK_HINTS; i++) + { + mReady[i].clear(); + } + + //OutputDebugString("batch beginning\n"); + + const CUevent nonBlockEv = mCachedNonBlockingEvents.get(); + bool workToFlush = false; + bool tasksRemain = false; + PxGpuTaskHint::Enum curMode = PxGpuTaskHint::HostToDevice; + + pollSubmitted(mReady); + + do + { + // cycle current run mode when necessary + if (mReady[ curMode ].size() == 0) + { + if (curMode == PxGpuTaskHint::HostToDevice) + { + curMode = PxGpuTaskHint::Kernel; + } + else if (curMode == PxGpuTaskHint::Kernel) + { + curMode = PxGpuTaskHint::DeviceToHost; + } + else + { + curMode = PxGpuTaskHint::HostToDevice; + } + } + + uint32_t singleStream = mReady[curMode].empty() ? 0 : mReady[curMode].front().task->mStreamIndex; + + while (mReady[ curMode ].size()) + { + bool needwfi = false; + for (uint32_t i = 0 ; i < mReady[ curMode ].size() ; i++) + { + ReadyTask& r = mReady[ curMode ][ i ]; + + if (r.task->mPreSyncRequired) + { + // If mPreSyncRequired is set *before* the task is run, it implies + // a WFI must be inserted before this task issues any work. Multiple + // ready tasks may have this flag, so to avoid inserting multiple WFI + // requests, we skip marked tasks in this pass and note a WFI is needed. + needwfi = true; + r.task->mPreSyncRequired = false; + } + else + { + const CUstream s = (r.task->mStreamIndex > 0) ? mCachedStreams.get(r.task->mStreamIndex) : 0; +#if PX_PROFILE + r.task->mTm->emitStartEvent(*r.task); +#endif + bool active = r.task->launchInstance(s, int(r.iteration++)); +#if PX_PROFILE + r.task->mTm->emitStopEvent(*r.task); +#endif + if(singleStream != r.task->mStreamIndex) + singleStream = 0; + + // If the launchInstance() call reported a non-recoverable error, gracefully + // release all scheduled tasks + if (mFailureDetected) + { + // Release all ready tasks + for (uint32_t h = 0; h < PxGpuTaskHint::NUM_GPU_TASK_HINTS; h++) + { + for (uint32_t j = 0 ; j < mReady[ h ].size() ; j++) + { + mReady[ h ][ j ].task->release(); + } + mReady[ h ].clear(); + } + + // Release all submitted tasks, until idle + while (!mSubmittedTaskList.empty()) + { + mInputReady.reset(); + mSubmittedTaskList.popBack()->release(); + } + return; + } + + workToFlush = true; + if (r.task->mPreSyncRequired) + { + // This task has asked for a sync point, meaning it has launched a copy + // or a kernel that must be completed before any later tasks are allowed + // to start. Insert a WFI and clear the needwfi flag + GD_CHECK_CALL(cuEventRecord(nonBlockEv, 0)); + needwfi = false; + r.task->mPreSyncRequired = false; + } + + if (!active) + { + r.task->release(); + mReady[ curMode ].replaceWithLast(i); + pollSubmitted(mReady); + i -= 1; + } + } + } + + if (needwfi) + { + GD_CHECK_CALL(cuEventRecord(nonBlockEv, 0)); + } + } + + /* We have completed one of the three phases */ + + tasksRemain = false; + for (int e = (int) PxGpuTaskHint::HostToDevice ; e != (int) PxGpuTaskHint::NUM_GPU_TASK_HINTS ; e++) + { + tasksRemain |= (mReady[ e ].size() != 0); + } + + if (!mCompletionTasks.empty()) + { + workToFlush = true; + } + + if (workToFlush && (tasksRemain == false || curMode == PxGpuTaskHint::DeviceToHost)) + { + //OutputDebugString("batch ending\n"); + + while (mCompletionTasks.size()) + { + PxBaseTask* t = mCompletionTasks.popBack(); + if (workToFlush) + { + CUevent stopEv = mCachedBlockingEvents.get(); + CUstream stream = singleStream ? mCachedStreams.get(singleStream) : (CUstream)1; + flushBatch(stopEv, stream, t); + workToFlush = false; + } + else + { + flushBatch(0, 0, t); + } + } + if (workToFlush) + { + /* Getting here is probably an indication of a bug in your task graph, + * but it is possible to get this warning if you have CPU tasks that + * can delay GpuTasks. So, consider this warning "training wheels" and + * disable it if you know your graph is correct. + */ + // SJB - Disabling this warning, APEX does this every frame because + // of how BasicIOS and IOFX interact. + //shdfnd::getFoundation().error(PX_WARN, + // "CUDA work generated without a completion dependency!"); + CUevent stopEv = mCachedBlockingEvents.get(); + flushBatch(stopEv, (CUstream)1, NULL); + } + } + } + while (tasksRemain); + + mCachedNonBlockingEvents.add(nonBlockEv); + + emitStopEvent("GpuDispatcher.ProcessTasksEvent"); +} + +#endif diff --git a/PxShared/src/cudamanager/src/HeapManagerInterface.h b/PxShared/src/cudamanager/src/HeapManagerInterface.h new file mode 100644 index 0000000..7fe7f2e --- /dev/null +++ b/PxShared/src/cudamanager/src/HeapManagerInterface.h @@ -0,0 +1,156 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +//----------------------------------------------------------------------------// +// HeapManagerInterface.h +//----------------------------------------------------------------------------// + +#ifndef PXCUDACONTEXTMANAGER_HEAPMANAGERINTERFACE_H +#define PXCUDACONTEXTMANAGER_HEAPMANAGERINTERFACE_H + +#include "task/PxTaskDefine.h" +#include "foundation/PxSimpleTypes.h" +#include "cudamanager/PxCudaMemoryManager.h" + +#include <string.h> + +namespace physx +{ +struct HeapStatsFlags +{ + enum Enum + { + F_BASIC_STATS = 1 << 0, + F_INTERNAL_FRAGMENTATION = 1 << 1, + F_BIGGEST_FREE_BLOCK = 1 << 2, + F_HISTOGRAM = 1 << 3, + F_ALLOC_ID_STATS = 1 << 4, + F_ALL = 0xFFFFFFFF, + }; +}; + +#define BITSPERWORD sizeof(size_t)*8 + +class ApexHeapStats +{ +public: + ApexHeapStats(): + heapSize(0), + totalAllocated(0), + maxAllocated(0), + internalFragmentation(0), + maxInternalFragmentation(0), + biggestFreeBlock(0), + numEntries(0) + {} + + PX_INLINE void reset() + { + memset(this, 0, sizeof(ApexHeapStats)); + } + + // F_BASIC_STATS + size_t heapSize; + size_t totalAllocated; + size_t maxAllocated; + + // F_INTERNAL_FRAGMENTATION + size_t internalFragmentation; + size_t maxInternalFragmentation; + + // F_BIGGEST_FREE_BLOCK + size_t biggestFreeBlock; + + // F_HISTOGRAM + size_t freeBuddyHistogram[BITSPERWORD]; + size_t allocatedBuddyHistogram[BITSPERWORD]; + size_t numEntries; + + // F_ALLOC_ID_STATS + PxAllocIdStats allocIdStats[PxAllocId::NUM_IDS]; +}; + + +class HeapManagerInterface +{ +public: + // simple allocator interface over which the heap manager does its base allocation and allocates further pages + class Allocator + { + public: + virtual ~Allocator() {}; + + virtual void* alloc(const size_t size) = 0; + virtual void free(void* addr, const size_t size) = 0; + }; + + virtual ~HeapManagerInterface() {}; + + // INTERFACE METHODS + // init the HeapManager by passing it a block of memory and the smallest size of a memory block. + // returns true if init was successful + virtual bool init(Allocator* memAllocator, const size_t baseSize, const size_t pageSize, const size_t minBlockSize, const size_t maxIntFrag = size_t(-1)) = 0; + + // Changes the page size. The size of allocations over the supplied Allocator are a multiple of the pageSize. + // returns true if the page size was valid. (!0, >minBlockSize, pow2) + virtual bool setPageSize(size_t pageSize) = 0; + + // returns the address of an allocated block for the requested size. + // returns a NULL ptr if alloc failed. + virtual void* alloc(const size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED)) = 0; + + // returns true if the block at the given address could be resized to size + // returns false if this failed. Manual free and alloc is still possible but needs a memcopy. + virtual bool realloc(void* addr, const size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED)) = 0; + + // frees a given block. + // returns true if the operation was successful + virtual bool free(void* addr) = 0; + + // deallocates all empty pages + virtual void freeEmptyPages() = 0; + + // ensures that there there is free memory of at least the requested size + // returns true if the operation was successful. Free memory was already big enough or new pages were allocated successfully. + virtual bool reserve(size_t size) = 0; + + // returns stats into a ApexHeapStats object, stats can be selected with HeapManagerStatsFlags. + // returns true if the operation was successful + virtual bool getStats(ApexHeapStats& stats, const uint32_t flags) = 0; + + // discretisize memory into an array such that it can be visualized + // returns true if the operation was successful + virtual bool visualizeMemory(uint8_t* array, const size_t size) = 0; + + // returns the base address of the page containing the memory block at addr. + // returns NULL if addr doesn't correspond to a page + virtual void* findBaseAddress(void* addr) = 0; +}; + +} // end physx namespace + +#endif // PXCUDACONTEXTMANAGER_HEAPMANAGERINTERFACE_H diff --git a/PxShared/src/cudamanager/src/HeapManagerLinkedList.h b/PxShared/src/cudamanager/src/HeapManagerLinkedList.h new file mode 100644 index 0000000..45a359d --- /dev/null +++ b/PxShared/src/cudamanager/src/HeapManagerLinkedList.h @@ -0,0 +1,204 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +//----------------------------------------------------------------------------// +// HeapManagerLinkedList.h +//----------------------------------------------------------------------------// + +#ifndef PXCUDACONTEXTMANAGER_HEAPMANAGERLINKEDLIST_H +#define PXCUDACONTEXTMANAGER_HEAPMANAGERLINKEDLIST_H + +#include "foundation/PxAssert.h" + +namespace physx +{ + +template <typename T> +class LinkedList +{ +public: + + PX_INLINE LinkedList() + : mHead(NULL) + , mNumElements(0) + { + } + + PX_INLINE void insert(T*& elt) + { + if (mHead) + { + elt->next = mHead->next; + } + mHead = elt; + mNumElements++; + } + + PX_INLINE void insertSorted(T*& elt) + { + if (!mHead) + { + mHead = elt; + mHead->next = NULL; + } + else if (!mHead->next || (mHead->addr > elt->addr)) + { + if (mHead->addr > elt->addr) + { + elt->next = mHead; + mHead = elt; + } + else + { + mHead->next = elt; + elt->next = NULL; + } + } + else + { + T* cur = mHead; + while (cur->next && (elt->addr > cur->next->addr)) + { + cur = cur->next; + } + elt->next = cur->next; + cur->next = elt; + } + mNumElements++; + } + + PX_INLINE T* pop() + { + if (mHead) + { + T* ret = mHead; + mHead = mHead->next; + mNumElements--; + return ret; + } + return NULL; + } + + PX_INLINE bool remove(const T* elt) + { + PX_ASSERT(elt); + if (mHead && mHead == elt) + { + mHead = mHead->next; + mNumElements--; + return true; + } + else + { + T* cur = mHead; + while (cur && cur->next != elt) + { + PX_ASSERT(cur->addr < elt->addr); // assert for sorted list property. + cur = cur->next; + } + if (cur && elt) + { + cur->next = elt->next; + mNumElements--; + return true; + } + } + return false; + } + + PX_INLINE T* find(const size_t addr) + { + T* cur = mHead; + while (cur && cur->addr < addr) + { + cur = cur->next; + } + + return cur && (cur->addr == addr) ? cur : NULL; + } + + PX_INLINE T* findAndPop(const size_t addr) + { + if (mHead == NULL) + { + return NULL; + } + + if (mHead->addr == addr) + { + return pop(); + } + + T* cur = mHead; + T* last = mHead; + while (cur) + { + if (cur->addr == addr) + { + last->next = cur->next; + mNumElements--; + return cur; + } + else if (cur->addr > addr) + { + return NULL; // because list is sorted. + } + else + { + last = cur; + cur = cur->next; + } + } + return NULL; + } + + PX_INLINE size_t getSize() + { + return mNumElements; + } + PX_INLINE T* getHead() + { + return mHead; + } + + // hacky + PX_INLINE void setSize(size_t s) + { + mNumElements = s; + } + PX_INLINE void setHead(T* h) + { + mHead = h; + } +private: + T* mHead; + size_t mNumElements; +}; + +} // end physx namespace + +#endif // PXCUDACONTEXTMANAGER_HEAPMANAGERLINKEDLIST_H diff --git a/PxShared/src/cudamanager/src/HeapManagerRef.cpp b/PxShared/src/cudamanager/src/HeapManagerRef.cpp new file mode 100644 index 0000000..bf3847f --- /dev/null +++ b/PxShared/src/cudamanager/src/HeapManagerRef.cpp @@ -0,0 +1,1380 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#include "CudaMemoryManager.h" +#include "HeapManagerRef.h" + +#include "PsSort.h" +#include "PsArray.h" +#include "PsAllocator.h" +#include "PsString.h" + +#include "foundation/PxMath.h" +#include "foundation/PxErrorCallback.h" +#include "foundation/PxMemory.h" + + +#if DUMP_HEAP_USAGE_TO_FILE +#include "PsWindowsInclude.h" +#endif + +using namespace physx::shdfnd; +using namespace physx; + + +#define CMM_DELETE_SINGLE(x) { if(x) delete x; x = NULL; } +#define CMM_DELETE_ARRAY(x) { if(x) delete [] x; x = NULL; } + +HeapManagerRef::HeapManagerRef(physx::PxErrorCallback& errorCallback, bool enableMutex) + : mHeaps(PX_DEBUG_EXP("HeapManagerRef:mHeaps")) + , mBuddyPool("mBuddyPool", 1024) + , mPageSize(0) + , mMinBlockSize(0) + , mMaxIntFrag(size_t(-1)) + , mNewEmptyPage(false) + , mMemAllocator(NULL) + , mGlobalAllocMem(0) + , mGlobalMaxAllocMem(0) + , mGlobalInternalFragmentation(0) + , mGlobalMaxInternalFragmentation(0) + , mErrorCallback(errorCallback) + +{ + PX_UNUSED(enableMutex); // SJB: heap alloc of shdfnd::Mutex not working for me +} + +HeapManagerRef::~HeapManagerRef() +{ + for (uint32_t i = 0; i < mHeaps.size(); i++) + { + if (mMemAllocator && mHeaps[i].baseAddr) + { + mMemAllocator->free(reinterpret_cast<void*>(mHeaps[i].baseAddr), mHeaps[i].heap->getTotalMemorySize()); + } + CMM_DELETE_SINGLE(mHeaps[i].heap); + } +#if DUMP_HEAP_USAGE_TO_FILE + fclose(mLogFile); +#endif +} + + +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// INTERFACE METHODS +bool HeapManagerRef::init(Allocator* memAllocator, const size_t baseSize, const size_t pageSize, const size_t minBlockSize, const size_t maxIntFrag) +{ + shdfnd::Mutex::ScopedLock lock(mMutex); + // init Heap and do some basic checks. + + // init only once + if (mHeaps.size()) + { + return false; + } + + if (baseSize && (minBlockSize > baseSize)) + { + return false; + } + + if (minBlockSize > pageSize) + { + return false; + } + + if (baseSize && (baseSize % minBlockSize)) + { + return false; + } + + uint8_t minBlockSizeLog2; + if (minBlockSize != findNextPow2(minBlockSizeLog2, minBlockSize, 0, BITSPERWORD)) + { + return false; + } + + if (pageSize != findNextPow2(pageSize, minBlockSizeLog2, BITSPERWORD)) + { + return false; + } + + if (!memAllocator) + { + return false; + } + + mMemAllocator = memAllocator; + mPageSize = pageSize; + mMinBlockSize = minBlockSize; + mMaxIntFrag = maxIntFrag; + + memset(&mGlobalAllocIdStats, 0, sizeof(PxAllocIdStats)*PxAllocId::NUM_IDS); + +#if DUMP_HEAP_USAGE_TO_FILE + char fileName[1024]; + sprintf_s(fileName, 1024, "HeapLog_%p.txt", this); + fopen_s(&mLogFile, fileName, "w"); + fprintf(mLogFile, "HeapSize: %d, BlockSize: %d Addr: 0x0\n", baseSize, minBlockSize); + QueryPerformanceCounter((LARGE_INTEGER*)&m_qpc); + QueryPerformanceFrequency((LARGE_INTEGER*)&m_qpf); +#endif + + // init heap + if (baseSize) + { + return allocateNewHeap(baseSize, true) != NULL; + } + else + { + return true; + } +} + + +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool HeapManagerRef::setPageSize(size_t pageSize) +{ + shdfnd::Mutex::ScopedLock lock(mMutex); + + if (pageSize == 0) + { + return false; + } + + if (mMinBlockSize > pageSize) + { + return false; + } + + if (pageSize != findNextPow2(pageSize, 0, BITSPERWORD)) + { + return false; + } + + mPageSize = pageSize; + return true; +} + +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void* HeapManagerRef::alloc(const size_t size, PX_ALLOC_INFO_PARAMS_DEF()) +{ + shdfnd::Mutex::ScopedLock lock(mMutex); + +#if DUMP_HEAP_USAGE_TO_FILE + unsigned __int64 qpc; + QueryPerformanceCounter((LARGE_INTEGER*)&qpc); + float dtime = (float)((double)(qpc - m_qpc) / (double)m_qpf); + fprintf(mLogFile, "alloc t: %f s: %d", dtime, size); +#endif + + void* ret = NULL; + // try to allocate it in one of the heaps/pages + for (uint32_t i = 0; !ret && i < mHeaps.size(); i++) + { + ret = mHeaps[i].heap->alloc(size, PX_ALLOC_INFO_PARAMS_INPUT()); + } + + // create a new page + if (!ret) + { + Heap* heap = allocateNewPages(size); + if (heap) + { + ret = heap->alloc(size, PX_ALLOC_INFO_PARAMS_INPUT()); + } + } + +#if DUMP_HEAP_USAGE_TO_FILE + fprintf(mLogFile, " a: 0x%p\n", ret); +#endif + + return ret; +} + +//(10/20/2009 feodorb) TODO: decide whether we move the binary search +//somewhere away from here. Stands here for std::lower_bound replacement +template<typename T> +static uint32_t findUpperBound(const physx::shdfnd::Array<T>& refArray, const T& refValue) +{ + uint32_t start = 0, end = refArray.size(); + while (end - start > 0) + { + uint32_t midPoint = start + ((end - start) >> 1); + + if (!(refValue < refArray[midPoint])) + { + start = midPoint + 1; + } + else + { + end = midPoint; + } + } + return start; +} + +Heap* HeapManagerRef::findHeap(void* addr) const +{ + HeapManagerPage searchPage; + searchPage.baseAddr = reinterpret_cast<size_t>(addr); + + uint32_t upperBound = findUpperBound(mHeaps, searchPage); + PX_ASSERT(upperBound == 0 || + (searchPage.baseAddr >= mHeaps[upperBound - 1].baseAddr && + searchPage.baseAddr < mHeaps[upperBound - 1].baseAddr + mHeaps[upperBound - 1].heap->getTotalMemorySize()) + ); + + return (upperBound > 0) ? mHeaps[upperBound - 1].heap : 0; +} + + +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool HeapManagerRef::realloc(void* addr, const size_t size, PX_ALLOC_INFO_PARAMS_DEF()) +{ + shdfnd::Mutex::ScopedLock lock(mMutex); + +#if DUMP_HEAP_USAGE_TO_FILE + unsigned __int64 qpc; + QueryPerformanceCounter((LARGE_INTEGER*)&qpc); + float dtime = (float)((double)(qpc - m_qpc) / (double)m_qpf); + fprintf(mLogFile, "realloc t: %f s: %d, a: 0x%p\n", dtime, size, addr); +#endif + + Heap* heap = findHeap(addr); + + if (heap != 0) + { + bool ret = heap->realloc(addr, size, PX_ALLOC_INFO_PARAMS_INPUT()); + if (ret && size > 0 && mNewEmptyPage) + { + shrinkMemory(); + } + return ret; + } + return false; +} + + +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool HeapManagerRef::free(void* addr) +{ + shdfnd::Mutex::ScopedLock lock(mMutex); + +#if DUMP_HEAP_USAGE_TO_FILE + unsigned __int64 qpc; + QueryPerformanceCounter((LARGE_INTEGER*)&qpc); + float dtime = (float)((double)(qpc - m_qpc) / (double)m_qpf); + fprintf(mLogFile, "free t: %f a: 0x%p\n", dtime, addr); +#endif + + if (addr == NULL) + { + return false; + } + + Heap* heap = findHeap(addr); + if (heap != 0) + { + bool ret = heap->free(addr); + if (ret && mNewEmptyPage) + { + shrinkMemory(); + } + return ret; + } + return false; +} + + +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +static void addStats(ApexHeapStats& dst, const ApexHeapStats& src, uint32_t flags) +{ + if (flags & HeapStatsFlags::F_BASIC_STATS) + { + dst.heapSize += src.heapSize; + } + if (flags & HeapStatsFlags::F_BIGGEST_FREE_BLOCK) + { + dst.biggestFreeBlock = PxMax(dst.biggestFreeBlock, src.biggestFreeBlock); + } + if (flags & HeapStatsFlags::F_HISTOGRAM) + { + dst.numEntries = PxMax(dst.numEntries, src.numEntries); + for (uint32_t i = 0; i < BITSPERWORD; i++) + { + dst.freeBuddyHistogram[i] += src.freeBuddyHistogram[i]; + dst.allocatedBuddyHistogram[i] += src.allocatedBuddyHistogram[i]; + } + } +} + +PX_INLINE void HeapManagerRef::addToStats(PxAllocId::Enum id, const size_t size, const size_t fragmentation) +{ + PxAllocIdStats& idStats = mGlobalAllocIdStats[id]; + idStats.elements++; + idStats.size += size; + idStats.maxElements = PxMax(idStats.maxElements, idStats.elements); + idStats.maxSize = PxMax(idStats.maxSize, idStats.size); + mGlobalAllocMem += size; + mGlobalMaxAllocMem = PxMax(mGlobalMaxAllocMem, mGlobalAllocMem); + mGlobalInternalFragmentation += fragmentation; + mGlobalMaxInternalFragmentation = PxMax(mGlobalMaxInternalFragmentation, mGlobalInternalFragmentation); +} + + +PX_INLINE void HeapManagerRef::removeFromStats(PxAllocId::Enum id, const size_t size, const size_t fragmentation) +{ + PxAllocIdStats& idStats = mGlobalAllocIdStats[id]; + PX_ASSERT(idStats.elements); + PX_ASSERT(idStats.size >= size); + + idStats.elements--; + idStats.size -= size; + mGlobalAllocMem -= size; + mGlobalInternalFragmentation -= fragmentation; +} + +PX_INLINE void HeapManagerRef::incStats(PxAllocId::Enum id, const size_t change, const size_t fragmentation) +{ + PxAllocIdStats& idStats = mGlobalAllocIdStats[id]; + idStats.size += change; + idStats.maxSize = PxMax(idStats.maxSize, idStats.size); + mGlobalAllocMem += change; + mGlobalMaxAllocMem = PxMax(mGlobalMaxAllocMem, mGlobalAllocMem); + mGlobalInternalFragmentation += fragmentation; + mGlobalMaxInternalFragmentation = PxMax(mGlobalMaxInternalFragmentation, mGlobalInternalFragmentation); +} + +PX_INLINE void HeapManagerRef::decStats(PxAllocId::Enum id, const size_t change, const size_t fragmentation) +{ + PxAllocIdStats& idStats = mGlobalAllocIdStats[id]; + PX_ASSERT(idStats.size >= change); + idStats.size -= change; + mGlobalAllocMem += change; + mGlobalInternalFragmentation += fragmentation; +} + +bool HeapManagerRef::getStats(ApexHeapStats& stats, const uint32_t flags) +{ + shdfnd::Mutex::ScopedLock lock(mMutex); + + ApexHeapStats tmpStats; + stats.reset(); + for (uint32_t i = 0; i < mHeaps.size(); i++) + { + mHeaps[i].heap->getStats(tmpStats, flags); + addStats(stats, tmpStats, flags); + } + if (flags & HeapStatsFlags::F_BASIC_STATS) + { + stats.totalAllocated = mGlobalAllocMem; + stats.maxAllocated = mGlobalMaxAllocMem; + + } + if (flags & HeapStatsFlags::F_INTERNAL_FRAGMENTATION) + { + stats.internalFragmentation = mGlobalInternalFragmentation; + stats.maxInternalFragmentation = mGlobalMaxInternalFragmentation; + } + if (flags & HeapStatsFlags::F_ALLOC_ID_STATS) + { + // stats per allocation ID + PxMemCopy(stats.allocIdStats, mGlobalAllocIdStats, sizeof(PxAllocIdStats)*PxAllocId::NUM_IDS); + } + return true; +} + + +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool HeapManagerRef::visualizeMemory(uint8_t* array, const size_t arraySize) +{ + shdfnd::Mutex::ScopedLock lock(mMutex); + + ApexHeapStats tmpStats; + getStats(tmpStats, HeapStatsFlags::F_BASIC_STATS); + float scale = float(arraySize) / float(tmpStats.heapSize); + uint8_t* start = array; + for (uint32_t i = 0; i < mHeaps.size(); i++) + { + size_t heapSize = mHeaps[i].heap->getTotalMemorySize(); + size_t numVis = size_t(float(heapSize) * scale); + PX_ASSERT(start + numVis <= array + arraySize); + mHeaps[i].heap->visualizeMemory(start, numVis); + start += numVis; + } + return true; +} + + +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void* HeapManagerRef::findBaseAddress(void* addr) +{ + Heap* heap = findHeap(addr); + if (heap) + { + return heap->getBaseAddress(); + } + return NULL; +} + +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +Heap* HeapManagerRef::allocateNewHeap(size_t heapSize, bool isPersistent) +{ + if (!mMemAllocator) + { + return NULL; + } + + void* newPage = mMemAllocator->alloc(heapSize); + if (newPage) + { + HeapManagerPage page; + page.baseAddr = reinterpret_cast<size_t>(newPage); + page.heap = PX_NEW(Heap)(*this, mErrorCallback); + page.isPersistent = isPersistent; + if (page.heap && page.heap->init(page.baseAddr, heapSize, mMinBlockSize, mMaxIntFrag)) + { + mHeaps.pushBack(page); + shdfnd::sort(mHeaps.begin(), (uint32_t) mHeaps.size()); + return page.heap; + } + else + { + mMemAllocator->free(newPage, page.heap->getTotalMemorySize()); + CMM_DELETE_SINGLE(page.heap); + } + } + return NULL; +} + +Heap* HeapManagerRef::allocateNewPages(size_t requestedSize) +{ + uint8_t pageSizeLog2; + uint8_t minBlockSizeLog2; + findNextPow2(minBlockSizeLog2, mMinBlockSize, 0, BITSPERWORD); + findNextPow2(pageSizeLog2, mPageSize, minBlockSizeLog2, BITSPERWORD); + const size_t allocSize = findNextPow2(requestedSize, pageSizeLog2, BITSPERWORD); + return allocateNewHeap(allocSize); +} + +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void HeapManagerRef::removeDeletedHeapsFromList(uint32_t numDeletes) +{ + // remove pages from list, keeping it sorted. + if (numDeletes) + { + const uint32_t numEntries = (uint32_t) mHeaps.size(); + + //seek + uint32_t w = 0; + while (w < (numEntries) && mHeaps[w].heap != NULL) + { + w++; + } + + // remove holes + uint32_t r = w + 1; + while (r < numEntries) + { + if (mHeaps[r].heap == NULL) + { + r++; + } + else + { + mHeaps[w++] = mHeaps[r++]; + } + } + + mHeaps.resize(numEntries - numDeletes); + } +} + +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void HeapManagerRef::resetHeap(HeapManagerPage& page) +{ + PX_ASSERT(page.heap->getAllocatedMemorySize() == 0); + Heap* newHeap = PX_NEW(Heap)(*this, mErrorCallback); + if (newHeap) + { + if (newHeap->init(page.baseAddr, page.heap->getTotalMemorySize(), mMinBlockSize, mMaxIntFrag)) + { + CMM_DELETE_SINGLE(page.heap); + page.heap = newHeap; + } + else + { + CMM_DELETE_SINGLE(newHeap); + } + } +} + + +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void HeapManagerRef::freeEmptyPages() +{ + shdfnd::Mutex::ScopedLock lock(mMutex); + + // release heaps + uint32_t numDeletes = 0; + const uint32_t numEntries = (uint32_t) mHeaps.size(); + for (uint32_t i = 0; i < numEntries; i++) + { + HeapManagerPage& page = mHeaps[i]; + PX_ASSERT(page.heap); + if (page.isPersistent) + { + // for persistent pages: reset without release. + if (page.heap->getAllocatedMemorySize() == 0) + { + resetHeap(page); + } + } + else if (page.heap->getAllocatedMemorySize() == 0) + { + mMemAllocator->free(reinterpret_cast<void*>(page.baseAddr), page.heap->getTotalMemorySize()); + CMM_DELETE_SINGLE(page.heap); + numDeletes++; + } + } + + if (numDeletes) + { + removeDeletedHeapsFromList(numDeletes); + } +} + +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void HeapManagerRef::shrinkMemory() +{ + mNewEmptyPage = false; + + // collect stats + size_t totalAllocated = 0; + size_t totalPageSize = 0; + const uint32_t numEntries = (uint32_t) mHeaps.size(); + for (uint32_t i = 0; i < numEntries; i++) + { + HeapManagerPage& page = mHeaps[i]; + totalAllocated += page.heap->getAllocatedMemorySize(); + totalPageSize += page.heap->getTotalMemorySize(); + PX_ASSERT(totalAllocated <= totalPageSize); + } + + // shrink memory if free non-persistent space is half or more of the allocated pages. + // releasing from the back of address sorted list, other strategies like LRU, best fit are also possible. + if (totalPageSize) + { + float allocScale = float(totalAllocated) / float(totalPageSize); + if (allocScale <= 0.5f) + { + size_t sizeToRelease = totalAllocated ? (totalPageSize - totalAllocated) >> 1 : totalPageSize; + uint32_t numDeletes = 0; + for (uint32_t i = 0; i < numEntries; i++) + { + HeapManagerPage& page = mHeaps[numEntries - i - 1]; + PX_ASSERT(page.heap); + if (page.heap->getAllocatedMemorySize() == 0) + { + if (!page.isPersistent && page.heap->getTotalMemorySize() <= sizeToRelease) + { + mMemAllocator->free(reinterpret_cast<void*>(page.baseAddr), page.heap->getTotalMemorySize()); + sizeToRelease -= page.heap->getTotalMemorySize(); + CMM_DELETE_SINGLE(page.heap); + numDeletes++; + } + else + { + resetHeap(page); + } + } + } + + if (numDeletes) + { + removeDeletedHeapsFromList(numDeletes); + } + } + } +} + +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool HeapManagerRef::reserve(size_t size) +{ + shdfnd::Mutex::ScopedLock lock(mMutex); + + size_t freeSize = 0; + for (uint32_t i = 0; i < mHeaps.size(); i++) + { + freeSize += mHeaps[i].heap->getTotalMemorySize() - mHeaps[i].heap->getAllocatedMemorySize(); + } + + if (freeSize < size) + { + return allocateNewPages(size - freeSize) != NULL; + } + else + { + return true; + } +} + +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +PX_INLINE size_t HeapManagerRef::findNextPow2(const size_t val, const uint8_t min, const uint8_t max) +{ + size_t ret = val; + for (uint8_t i = min; i <= max; i++) + { + ret = size_t(1) << i; + if (ret >= val) + { + break; + } + } + return ret; +} + + +PX_INLINE size_t HeapManagerRef::findNextPow2(uint8_t& pow, const size_t val, const uint8_t min, const uint8_t max) +{ + size_t ret = val; + for (pow = min; pow <= max; pow++) + { + ret = size_t(1) << pow; + if (ret >= val) + { + break; + } + } + return ret; +} + + +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void BuddyList::clear() +{ + Buddy* cur = pop(); + while (cur) + { + heap->getBuddyPool().destroy(cur); + cur = pop(); + } +} + + +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +Heap::~Heap() +{ + for (uint32_t i = 0; i <= mMaxLevel; i++) + { + mFreeBuddiesAtLevel[i].clear(); + } + CMM_DELETE_ARRAY(mFreeBuddiesAtLevel); + + for (Buddy* buddy = mAllocatedBuddies.getHead(); buddy != NULL; buddy = buddy->next) + { + void* address = reinterpret_cast<void*>((buddy->addr << mMinBlockLog2) + mBaseAddr); +#if KEEP_DEBUG_INFO + char buffer[256]; + physx::shdfnd::snprintf(buffer, 256, "Memory leak!\naddress %p file %s, line %d, name %s", address, buddy->file, buddy->line, buddy->allocName); + mErrorCallback.reportError(PxErrorCode::eDEBUG_WARNING, buffer, __FILE__, __LINE__); +#else + char buffer[256]; + physx::shdfnd::snprintf(buffer, 256, "Memory leak at address %p", address); + mErrorCallback.reportError(PxErrorCode::eDEBUG_WARNING, buffer, __FILE__, __LINE__); +#endif + } + + //clear it anyway + mAllocatedBuddies.clear(); +} + + +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Heap::init(size_t baseAddr, const size_t baseSize, const size_t minBlockSize, const size_t maxIntFrag) +{ + if (minBlockSize != HeapManagerRef::findNextPow2(mMinBlockLog2, minBlockSize, 0, BITSPERWORD)) + { + return false; + } + + if ((maxIntFrag != size_t(-1)) && (maxIntFrag != HeapManagerRef::findNextPow2(maxIntFrag, 0, BITSPERWORD))) + { + return false; + } + + mMaxIntFrag = maxIntFrag; + + mMinBlockSize = minBlockSize; + mTotalSize = baseSize; + mBaseAddr = baseAddr; + + if (mBaseAddr == 0) + { + return false; + } + + size_t numBlocks = baseSize >> mMinBlockLog2; + // allow only memory blocks which have a power of 2 in size. and numblocks must be at least 1. + if (numBlocks != HeapManagerRef::findNextPow2(mMaxLevel, numBlocks, 0, sizeof(size_t) * 4)) + { + return false; + } + + mFreeBuddiesAtLevel = PX_NEW(BuddyList)[(unsigned int)(mMaxLevel + 1)]; + if (!mFreeBuddiesAtLevel) + { + return false; + } + + // init size of buddy arrays + for (uint32_t i = 0; i <= mMaxLevel; i++) + { + mFreeBuddiesAtLevel[i].buddySize = size_t(1) << i; + mFreeBuddiesAtLevel[i].heap = this; + } + mAllocatedBuddies.heap = this; + + Buddy* b = mManager.getBuddyPool().construct(); + if (!b) + { + CMM_DELETE_ARRAY(mFreeBuddiesAtLevel); + return false; + } + b->level = mMaxLevel; + + // add buddy to its array + mFreeBuddiesAtLevel[mMaxLevel].insert(b); + + return true; +} + + +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +void* Heap::alloc(const size_t size, PX_ALLOC_INFO_PARAMS_DEF()) +{ + /* + compute needed buddysize -> level + if(mFreeBuddiesAtLevel[level].array.size() > 0) + { + ret = array.popBack() + allocList.pushBack(ret) + return ret.adr+basePtr; + } + else + { + if(nonemptylist at higher levels) + ret = recursive split + else if(nonemptylist at lower levels ) + ret = recursive merge + else + ret = NULL; // heap full or to fragmented + } + */ + + PX_UNUSED(allocId); + PX_UNUSED(allocName); + PX_UNUSED(line); + PX_UNUSED(file); + + if (size == 0 || size > mTotalSize) + { + return NULL; + } + + PX_ASSERT(allocId < PxAllocId::NUM_IDS); + //PX_ASSERT(allocId != PxAllocId::UNASSIGNED); // enable to track unassigned memory + + // compute needed buddysize -> level + uint8_t level = 0; + HeapManagerRef::findNextPow2(level, size, mMinBlockLog2, BITSPERWORD); + level = uint8_t(level - mMinBlockLog2); + + Buddy* ret = NULL; + if (mFreeBuddiesAtLevel[level].getSize() > 0) + { + ret = mFreeBuddiesAtLevel[level].pop(); + } + else + { + // prefer splitting + if (level != mMaxLevel) + { + ret = findBySplitting(level); + } + // else try merging + if (!ret && level != 0) + { + ret = findByMerging(level); + } + } + + if (ret) + { + ret->occupiedSize = size; + size_t addr = ret->addr; + ret->allocId = uint16_t(allocId); +#if KEEP_DEBUG_INFO + ret->file = file; + ret->line = (uint32_t)line; + ret->allocName = allocName; +#endif + + size_t allocSize; + if (mMaxIntFrag != size_t(-1)) + { + allocSize = reduceIntFragment(*ret, mMaxIntFrag); // ret can be changed in here, that's why we store the address + } + else + { + allocSize = size_t(1) << (level + mMinBlockLog2); + mAllocatedBuddies.insertSorted(ret); + } + mAllocMem += allocSize; + mInternalFragmentation += allocSize - size; + mMaxAllocMem = PxMax(mAllocMem, mMaxAllocMem); + mMaxInternalFragmentation = PxMax(mInternalFragmentation, mMaxInternalFragmentation); + mManager.addToStats(allocId, allocSize, allocSize - size); + + PX_ASSERT(sanityTest()); + return reinterpret_cast<void*>((addr << mMinBlockLog2) + mBaseAddr); + } + else + { + PX_ASSERT(sanityTest()); + return NULL; + } +} + + +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Heap::realloc(void* addr, const size_t size, PX_ALLOC_INFO_PARAMS_DEF()) +{ + PX_UNUSED(allocId); + PX_UNUSED(allocName); + PX_UNUSED(line); + PX_UNUSED(file); + + if (addr == NULL || size > mTotalSize) + { + return false; + } + + if (size == 0) + { + //realloc to 0 bytes can't keep the pointer as it was. + return false; + //return free(addr); + } + + size_t inernal_addr = reinterpret_cast<size_t>(addr) - mBaseAddr; + inernal_addr >>= mMinBlockLog2; + + // collect all buddies which are associated with this addr + shdfnd::Array<Buddy*, shdfnd::TempAllocator> budyList; + size_t totalAllocated = 0; + size_t buddyAllocated = 0; + Buddy* found = NULL; + do + { + found = mAllocatedBuddies.find(inernal_addr); + if (!found) + { + return false; + } + budyList.pushBack(found); + inernal_addr += size_t(1) << found->level; + totalAllocated += found->occupiedSize; + buddyAllocated += size_t(1) << (found->level + mMinBlockLog2); + } + while (found && !found->isLastBuddy); + + Buddy* cur = budyList.popBack(); + + // increase size + if (totalAllocated < size) + { + size_t leftSpace = (size_t(1) << (cur->level + mMinBlockLog2)) - cur->occupiedSize; + size_t neededSpace = size - totalAllocated; + if (neededSpace <= leftSpace) + { + cur->occupiedSize += neededSpace; +#if KEEP_DEBUG_INFO + cur->file = file; + cur->line = (uint32_t)line; + cur->allocName = allocName; +#endif + + mInternalFragmentation -= neededSpace; + mManager.decStats(PxAllocId::Enum(cur->allocId), 0, neededSpace); + + // replace + mAllocatedBuddies.remove(cur); + mAllocatedBuddies.insertSorted(cur); + PX_ASSERT(sanityTest()); + return true; + } + else + { + return false; +#ifdef UNREACHABLE + // TODO:try merge free buddies until big enough, + // then add buddy and do internal fragmentation reduction. + + // search for free blocks next to this one. + size_t addr = cur->addr + (size_t(1) << cur->level); + if (!mAllocatedBuddies.find(addr)) + { + return false; + } + + // if not found, return null, let user reallocate + PX_ASSERT(sanityTest()); + return false; +#endif + } + } + // reduce size + else + { + // succededly remove buddies until the requested size is reached. + // if internal fragmentation reduction is turned on, then an allocation can consist of multiple buddies. + mInternalFragmentation -= (size_t(1) << (cur->level + mMinBlockLog2)) - cur->occupiedSize; + mManager.decStats(PxAllocId::Enum(cur->allocId), 0, (size_t(1) << (cur->level + mMinBlockLog2)) - cur->occupiedSize); + size_t diff = totalAllocated - size; + while (diff >= cur->occupiedSize) + { + diff -= cur->occupiedSize; + cur->occupiedSize = 0; + bool succ = mAllocatedBuddies.remove(cur); + PX_UNUSED(succ); + PX_ASSERT(succ); + mFreeBuddiesAtLevel[cur->level].insertSorted(cur); + size_t allocSize = size_t(1) << (cur->level + mMinBlockLog2); + mAllocMem -= allocSize; + mManager.decStats(PxAllocId::Enum(cur->allocId), allocSize, 0); + cur = budyList.popBack(); + } + cur->isLastBuddy = true; + cur->occupiedSize -= diff; + +#if KEEP_DEBUG_INFO + cur->file = file; + cur->line =(uint32_t)line; + cur->allocName = allocName; +#endif + + // replace + bool succ = mAllocatedBuddies.remove(cur); + PX_UNUSED(succ); + PX_ASSERT(succ); + mAllocatedBuddies.insertSorted(cur); + mInternalFragmentation += (size_t(1) << (cur->level + mMinBlockLog2)) - cur->occupiedSize; + mManager.incStats(PxAllocId::Enum(cur->allocId), 0, (size_t(1) << (cur->level + mMinBlockLog2)) - cur->occupiedSize); + PX_ASSERT(sanityTest()); + return true; + } +} + + +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Heap::free(void* addr) +{ + if (addr == NULL) + { + return false; + } + + size_t internalAddr = reinterpret_cast<size_t>(addr) - mBaseAddr; + internalAddr >>= mMinBlockLog2; + + bool ret = true; + + bool dummy = true; + while (dummy) + { + Buddy* b = mAllocatedBuddies.findAndPop(internalAddr); + + if (!b) + { + return false; + } + + size_t allocSize = size_t(1) << (b->level + mMinBlockLog2); + mAllocMem -= allocSize; + mInternalFragmentation -= allocSize - b->occupiedSize; + mManager.removeFromStats(PxAllocId::Enum(b->allocId), allocSize, allocSize - b->occupiedSize); + b->occupiedSize = 0; + + mFreeBuddiesAtLevel[b->level].insertSorted(b); + + // check if this memory block occupied another buddy + if (b->isLastBuddy) + { + break; + } + else + { + internalAddr += size_t(1) << b->level; + } + } + + if (mAllocMem == 0) + { + mManager.notifyEmptyPage(); + } + + PX_ASSERT(sanityTest()); + return ret; +} + + +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Heap::getStats(ApexHeapStats& stats, const uint32_t flags) +{ + if (flags & HeapStatsFlags::F_BASIC_STATS) + { + stats.heapSize = mTotalSize; + stats.totalAllocated = mAllocMem; + stats.maxAllocated = mMaxAllocMem; + } + if (flags & HeapStatsFlags::F_INTERNAL_FRAGMENTATION) + { + // internal fragmentation + stats.internalFragmentation = mInternalFragmentation; + stats.maxInternalFragmentation = mMaxInternalFragmentation; + } + if (flags & HeapStatsFlags::F_BIGGEST_FREE_BLOCK) + { + // bigggest free block + stats.biggestFreeBlock = 0; + uint8_t curLevel = mMaxLevel; + do + { + if (mFreeBuddiesAtLevel[curLevel].getSize()) + { + stats.biggestFreeBlock = mFreeBuddiesAtLevel[curLevel].buddySize << mMinBlockLog2; + break; + } + curLevel--; + } + while (curLevel != 0); + } + if (flags & HeapStatsFlags::F_HISTOGRAM) + { + // histograms + for (uint8_t i = 0; i <= mMaxLevel; i++) + { + stats.freeBuddyHistogram[i] = mFreeBuddiesAtLevel[i].getSize(); + stats.allocatedBuddyHistogram[i] = 0; + } + Buddy* b = mAllocatedBuddies.getHead(); + while (b) + { + stats.allocatedBuddyHistogram[b->level]++; + b = b->next; + } + stats.numEntries = size_t(mMaxLevel + 1); + } + return true; +} + + +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Heap::sanityTest() +{ + size_t sum = 0; + + for (uint8_t i = 0; i <= mMaxLevel; i++) + { + size_t buddiesAtLevel = 0; + for (Buddy* cur = mFreeBuddiesAtLevel[i].getHead(); cur; cur = cur->next) + { + if ((size_t(1) << cur->level != mFreeBuddiesAtLevel[i].buddySize) || + (cur->occupiedSize > size_t(1) << (cur->level + mMinBlockLog2))) + { + return false; + } + sum += mFreeBuddiesAtLevel[i].buddySize << mMinBlockLog2; + buddiesAtLevel++; + } + if (mFreeBuddiesAtLevel[i].getSize() != buddiesAtLevel || + (buddiesAtLevel > (size_t(1) << (mMaxLevel - i)))) + { + return false; + } + } + + size_t numAllocated = 0; + for (Buddy* cur = mAllocatedBuddies.getHead(); cur; cur = cur->next) + { + sum += size_t(1) << (cur->level + mMinBlockLog2); + numAllocated++; + } + + if (numAllocated != mAllocatedBuddies.getSize()) + { + return false; + } + + ptrdiff_t diff = ptrdiff_t(sum - (size_t(1) << (mMaxLevel + mMinBlockLog2))); + if (diff != 0) + { + return false; + } + else + { + return true; + } +} + + +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Heap::visualizeMemory(uint8_t* array, const size_t size) +{ + float scale = (float)size / (size_t(1) << mMaxLevel); + + for (size_t i = 0; i < size; i++) + { + array[i] = 0; + } + for (Buddy* cur = mAllocatedBuddies.getHead(); cur; cur = cur->next) + { + size_t start = (size_t)((float)(cur->addr) * scale); + size_t end = (size_t)((float)(cur->addr + (size_t(1) << size_t(cur->level))) * scale); + PX_ASSERT(start <= size); + PX_ASSERT(end <= size); + for (size_t i = start; i < end; i++) + { + PX_ASSERT(i < size); + array[i] = uint8_t(cur->level + 1); + } + } + + return true; +} + + +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Heap::splitBuddy(Buddy* b) +{ + if (b->level == 0) + { + return false; + } + + b->level--; + size_t newSize = size_t(1) << b->level; + + Buddy* b0 = b; + Buddy* b1 = mManager.getBuddyPool().construct(*b); + PX_ASSERT(b0 && b1); + + b1->addr = b1->addr + newSize; + + mFreeBuddiesAtLevel[b0->level].insertSorted(b0); + mFreeBuddiesAtLevel[b1->level].insertSorted(b1); + return true; +} + + +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool Heap::mergeBuddies(Buddy* b0, Buddy* b1) +{ + if (b0->level != b1->level || b0->level >= mMaxLevel || (b1->addr - b0->addr) != size_t(1) << size_t(b0->level)) + { + return false; + } + + Buddy* b = b0; + b->occupiedSize = 0; + b->isLastBuddy = true; + b->level++; + b->next = NULL; + mFreeBuddiesAtLevel[b->level].insertSorted(b); + + mManager.getBuddyPool().destroy(b1); + return true; +} + + +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +Buddy* Heap::findBySplitting(uint8_t searchLevel) +{ + Buddy* ret = NULL; + + uint8_t curLevel = searchLevel; + + // walk through array of buddy lists and search for a free buddy which is at level >= searchLevel + for (; !mFreeBuddiesAtLevel[curLevel].getSize() && (curLevel < mMaxLevel); curLevel++) + { + ; + } + + // pop buddy at highest level and split until it has the correct level + ret = mFreeBuddiesAtLevel[curLevel].pop(); + for (; ret && (curLevel != searchLevel) && curLevel > 0; curLevel--) + { + splitBuddy(ret); + ret = mFreeBuddiesAtLevel[curLevel - 1].pop(); + } + return ret; +} + + +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +Buddy* Heap::findByMerging(uint8_t searchLevel) +{ + /* + while there is no pair to merge on this level, ask lower level to merge + may ask lower level to merge more than 1 pair after each failure + or just merge all pairs of lower levels + */ + if (searchLevel == 0) + { + return NULL; + } + + uint8_t curLevel = uint8_t(searchLevel - 1); + bool dummy = true; + while (dummy) + { + int32_t shift = (mMaxLevel - (1 << (curLevel + 1))); + shift = shift >= 0 ? shift : 0; + size_t numToFind = size_t(1) << shift; + size_t found = findPairAndMerge(mFreeBuddiesAtLevel[curLevel], numToFind); + if (found) + { + if (curLevel == searchLevel - 1) + { + break; + } + curLevel++; + } + else + { + if (curLevel > 0) + { + curLevel--; + } + else + { + return NULL; + } + } + } + return mFreeBuddiesAtLevel[searchLevel].pop(); +} + + +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +size_t Heap::findPairAndMerge(BuddyList& list, size_t numToFind) +{ + size_t found = 0; + Buddy* cur = list.getHead(); + Buddy* last = list.getHead(); + size_t diff = list.buddySize; + while ((found != numToFind) && cur && cur->next) + { + // find buddy pair b0 and b1, b0 must be at an even address, and b0 and b1 must be neighbours in address space. + // since the list is sorted, we do only compare neighbours in the list. + if (((cur->addr & (size_t(1) << size_t(cur->level))) == 0) && (cur->next->addr - cur->addr == diff)) + { + Buddy* b0 = cur; + Buddy* b1 = cur->next; + + if (cur == list.getHead()) + { + list.setHead(cur->next->next); + cur = list.getHead(); + last = cur; + } + else + { + cur = cur->next->next; + last->next = cur; + } + list.setSize(list.getSize() - 2); + if (mergeBuddies(b0, b1)) + { + found++; + } + } + else + { + last = cur; + cur = cur->next; + } + } + return found; +} + + +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +size_t Heap::reduceIntFragment(Buddy& b, size_t threshold) +{ + /* + while internalFragmentation > threshold + cut buddy in the middle + if cut goes through occupied space + left buddy is full, can be pushed to alloc list clear last buddy flag + else + right buddy is free, push it to free list + */ + size_t allocSize = 0; + Buddy* curB = &b; + curB->isLastBuddy = false; + while (curB->level && ((size_t(1) << (curB->level + mMinBlockLog2)) - curB->occupiedSize) > threshold) + { + //split + Buddy* b0 = mManager.getBuddyPool().construct(*curB); + Buddy* b1 = curB; + b0->level--; + b1->level--; + b1->addr += size_t(1) << size_t(b1->level); + if ((size_t(1) << (b0->level + mMinBlockLog2)) < b0->occupiedSize) + { + b0->occupiedSize = size_t(1) << (b0->level + mMinBlockLog2); + b1->occupiedSize -= b0->occupiedSize; + mAllocatedBuddies.insertSorted(b0); + allocSize += size_t(1) << b1->level; + curB = b1; + } + else + { + b1->occupiedSize = 0; + mFreeBuddiesAtLevel[b1->level].insertSorted(b1); + curB = b0; + } + } + curB->isLastBuddy = true; + allocSize += size_t(1) << curB->level; + mAllocatedBuddies.insertSorted(curB); + return (allocSize << mMinBlockLog2); +} + + diff --git a/PxShared/src/cudamanager/src/HeapManagerRef.h b/PxShared/src/cudamanager/src/HeapManagerRef.h new file mode 100644 index 0000000..e6e585e --- /dev/null +++ b/PxShared/src/cudamanager/src/HeapManagerRef.h @@ -0,0 +1,297 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +//----------------------------------------------------------------------------// +// HeapManagerRef.h +//----------------------------------------------------------------------------// + +#ifndef PXCUDACONTEXTMANAGER_HEAPMANAGERREF_H +#define PXCUDACONTEXTMANAGER_HEAPMANAGERREF_H + +#include "task/PxTaskDefine.h" + +#include "HeapManagerInterface.h" +#include "HeapManagerLinkedList.h" +#include "PsPool.h" +#include "PsMutex.h" +#include "PsArray.h" +#include "PsUserAllocated.h" + +#define DUMP_HEAP_USAGE_TO_FILE 0 + +#ifdef _DEBUG +#define KEEP_DEBUG_INFO 1 +#else +#define KEEP_DEBUG_INFO 0 +#endif + +#if DUMP_HEAP_USAGE_TO_FILE +#include "stdio.h" +#endif + +namespace physx +{ + +class Heap; +struct Buddy +{ + PX_INLINE Buddy() + : addr(0) + , next(0) + , occupiedSize(0) + , allocId(PxAllocId::UNASSIGNED) + , isLastBuddy(true) + , level(0) +#if KEEP_DEBUG_INFO + , file(NULL) + , allocName(NULL) + , line(0) +#endif + {} + + PX_INLINE Buddy(Buddy& b) + : addr(b.addr) + , next(b.next) + , occupiedSize(b.occupiedSize) + , allocId(b.allocId) + , isLastBuddy(b.isLastBuddy) + , level(b.level) +#if KEEP_DEBUG_INFO + , file(b.file) + , allocName(b.allocName) + , line(b.line) +#endif + {} + + size_t addr; + Buddy* next; + size_t occupiedSize; + uint16_t allocId; + uint8_t isLastBuddy; + uint8_t level; +#if KEEP_DEBUG_INFO + const char* file; + const char* allocName; + uint32_t line; +#endif +}; + +struct BuddyList: public LinkedList<Buddy>, public shdfnd::UserAllocated +{ + BuddyList() + : buddySize(0) + , heap(NULL) + {} + void clear(); + + size_t buddySize; // = 2^level + Heap* heap; +}; + +struct HeapManagerPage +{ + PX_INLINE bool operator < (const HeapManagerPage& p) const + { + return baseAddr < p.baseAddr; + } + + PX_INLINE bool operator > (const HeapManagerPage& p) const + { + return baseAddr > p.baseAddr; + } + + size_t baseAddr; + Heap* heap; + bool isPersistent; +}; + + +class HeapManagerRef: public HeapManagerInterface, public shdfnd::UserAllocated +{ + PX_NOCOPY(HeapManagerRef) +public: + HeapManagerRef(physx::PxErrorCallback& errorCallback, bool enableMutex = true); + virtual ~HeapManagerRef(); + + // INTERFACE METHODS + virtual bool init(Allocator* memAllocator, const size_t baseSize, const size_t pageSize, const size_t minBlockSize, const size_t maxIntFrag); + virtual bool setPageSize(size_t pageSize); + virtual void* alloc(const size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED)); + virtual bool realloc(void* addr, const size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED)); + virtual bool free(void* addr); + virtual void freeEmptyPages(); + virtual bool reserve(size_t size); + virtual bool getStats(ApexHeapStats& stats, const uint32_t flags); + virtual bool visualizeMemory(uint8_t* array, const size_t size); + virtual void* findBaseAddress(void* addr); + + // INTERNALS + // searches 2^pow >= val, searches pow in [min, max] + static PX_INLINE size_t findNextPow2(const size_t val, const uint8_t min = 0, const uint8_t max = BITSPERWORD); + static PX_INLINE size_t findNextPow2(uint8_t& pow, const size_t val, const uint8_t min = 0, const uint8_t max = BITSPERWORD); + + PX_INLINE void addToStats(PxAllocId::Enum id, const size_t size, const size_t fragmentation); + PX_INLINE void removeFromStats(PxAllocId::Enum id, const size_t size, const size_t fragmentation); + PX_INLINE void incStats(PxAllocId::Enum id, const size_t change, const size_t fragmentation); + PX_INLINE void decStats(PxAllocId::Enum id, const size_t change, const size_t fragmentation); + + PX_INLINE void notifyEmptyPage() + { + mNewEmptyPage = true; + } + PX_INLINE shdfnd::Pool<Buddy>& getBuddyPool() + { + return mBuddyPool; + } + +private: + Heap* allocateNewHeap(size_t heapSize, bool isPersistent = false); + Heap* allocateNewPages(size_t requestedSize); + void resetHeap(HeapManagerPage& page); + void removeDeletedHeapsFromList(uint32_t numDeletes); + void shrinkMemory(); + + Heap* findHeap(void* addr) const; + +private: + // heaps + shdfnd::Array<HeapManagerPage> mHeaps; + shdfnd::Pool<Buddy> mBuddyPool; + size_t mPageSize; + size_t mMinBlockSize; + size_t mMaxIntFrag; + bool mNewEmptyPage; + // lock + shdfnd::Mutex mMutex; + // page allocator + Allocator* mMemAllocator; + // overall stats + size_t mGlobalAllocMem; + size_t mGlobalMaxAllocMem; + size_t mGlobalInternalFragmentation; + size_t mGlobalMaxInternalFragmentation; + // stats per allocation ID + PxAllocIdStats mGlobalAllocIdStats[PxAllocId::NUM_IDS]; + // error callback + physx::PxErrorCallback& mErrorCallback; + +#if DUMP_HEAP_USAGE_TO_FILE + FILE* mLogFile; + unsigned __int64 m_qpc; + unsigned __int64 m_qpf; +#endif +}; + + +class Heap : public shdfnd::UserAllocated +{ +public: + PX_INLINE Heap(HeapManagerRef& manager, physx::PxErrorCallback& errorCallback) + : mManager(manager) + , mErrorCallback(errorCallback) + , mBaseAddr(0) + , mMinBlockSize(0) + , mFreeBuddiesAtLevel(NULL) + , mMaxIntFrag(0) + , mTotalSize(0) + , mMaxLevel(0) + , mMinBlockLog2(0) + , mAllocMem(0) + , mMaxAllocMem(0) + , mInternalFragmentation(0) + , mMaxInternalFragmentation(0) + {} + + PX_INLINE ~Heap(); + + bool init(size_t baseAddr, const size_t baseSize, const size_t minBlockSize, const size_t maxIntFrag); + void* alloc(const size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED)); + bool realloc(void* addr, const size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED)); + bool free(void* addr); + bool getStats(ApexHeapStats& stats, const uint32_t flags); + bool visualizeMemory(uint8_t* array, const size_t size); + + PX_INLINE size_t getTotalMemorySize() + { + return mTotalSize; + } + PX_INLINE size_t getAllocatedMemorySize() + { + return mAllocMem; + } + PX_INLINE shdfnd::Pool<Buddy>& getBuddyPool() + { + return mManager.getBuddyPool(); + } + PX_INLINE void* getBaseAddress() + { + return (void*)mBaseAddr; + } + +private: + // split buddy b with size 2^level into two buddies with level 2^(level-1) and append those to the free list. deletes b, assumes that b was removed from the list before. + bool splitBuddy(Buddy* b); + // merge 2 buddies to next bigger one. deletes b0 and b1, assumes that they are already removed from their array. + bool mergeBuddies(Buddy* b0, Buddy* b1); + + // split of right, free children of a buddy if the internal fragmentation of a buddy is bigger than a threshold + // returns the size of all allocated buddies + size_t reduceIntFragment(Buddy& b, size_t threshold); + + // find a Buddy by splitting a Buddy at searchLevel + Buddy* findBySplitting(uint8_t searchLevel); + Buddy* findByMerging(uint8_t searchLevel); + size_t findPairAndMerge(BuddyList& list, size_t numToFind); + + bool sanityTest(); + + void operator=(const Heap&) + { + PX_ASSERT(0); + } + +private: + HeapManagerRef& mManager; + physx::PxErrorCallback& mErrorCallback; + size_t mBaseAddr; + size_t mMinBlockSize; + BuddyList* mFreeBuddiesAtLevel; + BuddyList mAllocatedBuddies; + size_t mMaxIntFrag; + size_t mTotalSize; + uint8_t mMaxLevel; // 2^maxLevel <= memorySize + uint8_t mMinBlockLog2; + + size_t mAllocMem; // fragmented + size_t mMaxAllocMem; + size_t mInternalFragmentation; + size_t mMaxInternalFragmentation; +}; + +} // end physx namespace + +#endif // PXCUDACONTEXTMANAGER_HEAPMANAGERREF_H diff --git a/PxShared/src/cudamanager/src/PhysXDevice.h b/PxShared/src/cudamanager/src/PhysXDevice.h new file mode 100644 index 0000000..b066bdc --- /dev/null +++ b/PxShared/src/cudamanager/src/PhysXDevice.h @@ -0,0 +1,119 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef ___PHYS_X_DEVICE_ +#define ___PHYS_X_DEVICE_ + +#include "foundation/PxPreprocessor.h" + +#if PX_WINDOWS +# pragma warning (push) +# pragma warning (disable : 4668) //'symbol' is not defined as a preprocessor macro, replacing with '0' for 'directives' +# include "windows/PsWindowsInclude.h" +# pragma warning (pop) +#endif + +#if PX_WINDOWS + #define PHYSX_DEV_DLL_API extern "C" __declspec(dllexport) + #ifdef _DEBUG + # define PHYSX_DEV_DLL_PRIVATE_API extern "C" __declspec(dllexport) + #else + # define PHYSX_DEV_DLL_PRIVATE_API + #endif +#else + #define PHYSX_DEV_DLL_API + #define PHYSX_DEV_DLL_PRIVATE_API +#endif + +/** typedefs */ +typedef int PHYSX_DEV_STATUS; +typedef unsigned int PhysXDevHandle; + +/** PHYSX_DEV_STATUS values */ +enum +{ + PHYSX_DEV_OK = 0, + PHYSX_DEV_UNKNOWN_ERROR, + PHYSX_DEV_INVALID_HANDLE, + PHYSX_DEV_UNINITIALIZED, + PHYSX_DEV_NV_API_UNAVAILABLE, + PHYSX_DEV_CUDA_UNAVAILABLE, + PHYSX_DEV_CUDA_MEMORY_ALLOC_FAILURE, + PHYSX_DEV_LEGACY_MODE_GPU_HANDLE, + PHYSX_DEV_PHYSX_DEV_UNAVAILABLE, +}; + + +/** + * physxDevInit + * Initialize the PhysX Device information functions. + * Must be called before using any other API functions. + */ +PHYSX_DEV_DLL_API PHYSX_DEV_STATUS physxDevInit(); + +/** + * physxDevClose + * Call this when finished with the PhysX Device API, it + * frees memory that is allocated in physxDevInit + */ +PHYSX_DEV_DLL_API PHYSX_DEV_STATUS physxDevClose(); + +/** + * physxDevGetCudaOrdinal + * Returns the CUDA device ordinal for the given PhysX GPU device + */ +PHYSX_DEV_DLL_API PHYSX_DEV_STATUS physxDevGetCudaOrdinal(int* cudaDevOrdinal, PhysXDevHandle devHandle); +PHYSX_DEV_STATUS physxDevGetCudaOrdinalWrapper(int* cudaDevOrdinal); + +/** + * physxDevGet + * Returns the PhysX GPU device that the PhysX Engine + * will use. If the device is -1, the engine will + * automatically choose which GPU to use. + * + * This function handles the R177/R180 detection first, then decides accordingly + * + * if(180+) + * if(GPU Enabled) ? get NVAPI sel : -1 + * else (177) + * if regkey ? regkey value : -1 (PHYSX_DEV_LEGACY_MODE_GPU_HANDLE returned) + */ +PHYSX_DEV_DLL_API PHYSX_DEV_STATUS physxDevGet(PhysXDevHandle* devHandle); + +/** + * physxDevUsingDedicatedGPU + * Returns whether or not PhysX has a dedicated GPU (set by the user in the NV CPL) + */ +PHYSX_DEV_DLL_API bool physxDevUsingDedicatedGPU(); + +/** + * physxDevSLIEnabled + * Returns whether or not the device pointer specified (D3D device) is in an SLI group + */ +PHYSX_DEV_DLL_API bool physxDevSLIEnabled(void* graphicsDevice); + +#endif diff --git a/PxShared/src/cudamanager/src/PhysXDeviceSettings.cpp b/PxShared/src/cudamanager/src/PhysXDeviceSettings.cpp new file mode 100644 index 0000000..77896c2 --- /dev/null +++ b/PxShared/src/cudamanager/src/PhysXDeviceSettings.cpp @@ -0,0 +1,248 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#include "task/PxTaskDefine.h" + +#if PX_SUPPORT_GPU_PHYSX + +#include "foundation/PxErrorCallback.h" + +#include "PhysXDeviceSettings.h" +#include "PhysXDevice.h" + +#if PX_VC +#pragma warning(disable: 4191) //'operator/operation' : unsafe conversion from 'type of expression' to 'type required' +#endif + +namespace +{ +#if PX_WIN32 || PX_WIN64 + /** funcs for the dynamic loading of the PhysXDevice.dll file */ + typedef PHYSX_DEV_STATUS __cdecl physxDevInit_t(); + typedef PHYSX_DEV_STATUS __cdecl physxDevClose_t(); + typedef PHYSX_DEV_STATUS __cdecl physxDevGet_t(PhysXDevHandle* devHandle); + typedef PHYSX_DEV_STATUS __cdecl physxDevGetCudaOrdinal_t(int* cudaDevOrdinal, PhysXDevHandle devHandle); + typedef bool __cdecl physxDevUsingDedicatedGPU_t(); + typedef bool __cdecl physxDevSLIEnabled_t(void* graphicsDevice); + + /** globals for cuda functions */ + static physxDevInit_t* physxDevInit_f; + static physxDevClose_t* physxDevClose_f; + static physxDevGetCudaOrdinal_t* physxDevGetCudaOrdinal_f; + static physxDevGet_t* physxDevGet_f; + static physxDevUsingDedicatedGPU_t* physxDevUsingDedicatedGPU_f; + static physxDevSLIEnabled_t* physxDevSLIEnabled_f; + + /** globals */ + static HMODULE gPhysXDevModuleH; + + PHYSX_DEV_STATUS initPhysXDeviceLib() + { + PHYSX_DEV_STATUS status; +#if PX_X86 + gPhysXDevModuleH = LoadLibrary("PhysXDevice.dll"); +#else + gPhysXDevModuleH = LoadLibrary("PhysXDevice64.dll"); +#endif + if (!gPhysXDevModuleH) + { + return PHYSX_DEV_PHYSX_DEV_UNAVAILABLE; + } + + physxDevInit_f = (physxDevInit_t*)GetProcAddress(gPhysXDevModuleH, "physxDevInit"); + physxDevClose_f = (physxDevClose_t*)GetProcAddress(gPhysXDevModuleH, "physxDevClose"); + physxDevGetCudaOrdinal_f = (physxDevGetCudaOrdinal_t*)GetProcAddress(gPhysXDevModuleH, "physxDevGetCudaOrdinal"); + physxDevGet_f = (physxDevGet_t*)GetProcAddress(gPhysXDevModuleH, "physxDevGet"); + physxDevUsingDedicatedGPU_f = (physxDevUsingDedicatedGPU_t*)GetProcAddress(gPhysXDevModuleH, "physxDevUsingDedicatedGPU"); + physxDevSLIEnabled_f = (physxDevSLIEnabled_t*)GetProcAddress(gPhysXDevModuleH, "physxDevSLIEnabled"); + + if (!physxDevInit_f || + !physxDevClose_f || + !physxDevGetCudaOrdinal_f || + !physxDevGet_f) + { + FreeLibrary(gPhysXDevModuleH); + return PHYSX_DEV_CUDA_UNAVAILABLE; + } + + status = physxDevInit_f(); + if (PHYSX_DEV_OK != status) + { + FreeLibrary(gPhysXDevModuleH); + return status; + } + + return PHYSX_DEV_OK; + } +#endif // PX_WIN32 || PX_WIN64 + + PHYSX_DEV_STATUS getCudaOrdinal(int* cudaDevOrdinal) + { +#if PX_WIN32 || PX_WIN64 + PHYSX_DEV_STATUS status; + PhysXDevHandle selectedDev; + + status = initPhysXDeviceLib(); + + if (PHYSX_DEV_OK != status) + { + return status; + } + + status = physxDevGet_f(&selectedDev); + physxDevGetCudaOrdinal_f(cudaDevOrdinal, selectedDev); + + physxDevClose_f(); + FreeLibrary(gPhysXDevModuleH); + + if (status == PHYSX_DEV_LEGACY_MODE_GPU_HANDLE) // R177 installed + { + return PHYSX_DEV_LEGACY_MODE_GPU_HANDLE; + } + else + { + return PHYSX_DEV_OK; + } +#elif PX_LINUX + const char* deviceOrdinalString = ::getenv("PHYSX_GPU_DEVICE"); + if (!deviceOrdinalString) + *cudaDevOrdinal = 0; + else + *cudaDevOrdinal = atoi(deviceOrdinalString); + return PHYSX_DEV_OK; +#endif + } + +} + +namespace physx +{ + + int PhysXDeviceSettings::getSuggestedCudaDeviceOrdinal(physx::PxErrorCallback& errc) + { + int cudaDevOrdinal = -1; + switch (getCudaOrdinal(&cudaDevOrdinal)) + { + case PHYSX_DEV_OK: + break; + + case PHYSX_DEV_UNKNOWN_ERROR: + errc.reportError(PxErrorCode::eDEBUG_WARNING, "unknown error during CUDA device detection\n", __FILE__, __LINE__); + break; + + case PHYSX_DEV_NV_API_UNAVAILABLE: + errc.reportError(PxErrorCode::eDEBUG_WARNING, "NVAPI is not available\n", __FILE__, __LINE__); + break; + + case PHYSX_DEV_CUDA_UNAVAILABLE: + errc.reportError(PxErrorCode::eDEBUG_WARNING, "CUDA is not available\n", __FILE__, __LINE__); + break; + + case PHYSX_DEV_PHYSX_DEV_UNAVAILABLE: +#if PX_X86 + errc.reportError(PxErrorCode::eDEBUG_WARNING, "PhysXDevice.dll is not available\n", __FILE__, __LINE__); +#else + errc.reportError(PxErrorCode::eDEBUG_WARNING, "PhysXDevice64.dll is not available\n", __FILE__, __LINE__); +#endif + break; + + default: + errc.reportError(PxErrorCode::eDEBUG_WARNING, "unknown error during CUDA device detection\n", __FILE__, __LINE__); + break; + } + + return cudaDevOrdinal; + } + + int PhysXDeviceSettings::isUsingDedicatedGPU() + { +#if PX_WIN32 || PX_WIN64 + PHYSX_DEV_STATUS status; + bool dedicated = false; + + status = initPhysXDeviceLib(); + + if (PHYSX_DEV_OK != status) + { + return 0; + } + + if (physxDevUsingDedicatedGPU_f) + { + dedicated = physxDevUsingDedicatedGPU_f(); + physxDevClose_f(); + FreeLibrary(gPhysXDevModuleH); + return(dedicated); + } + else + { + physxDevClose_f(); + FreeLibrary(gPhysXDevModuleH); + return(-1); + } +#elif PX_LINUX + // need some way to set this + return 0; +#endif + } + + bool PhysXDeviceSettings::isSLIEnabled(void* graphicsDevice) + { +#if PX_WIN32 || PX_WIN64 + PHYSX_DEV_STATUS status; + status = initPhysXDeviceLib(); + + if (PHYSX_DEV_OK != status) + { + return false; + } + + if (physxDevSLIEnabled_f) + { + bool enabled = physxDevSLIEnabled_f(graphicsDevice); + physxDevClose_f(); + FreeLibrary(gPhysXDevModuleH); + return enabled; + } + else + { + physxDevClose_f(); + FreeLibrary(gPhysXDevModuleH); + return false; + } +#elif PX_LINUX + // Unimplemented for Linux because we don't need it, not because it's really always false. + PX_UNUSED(graphicsDevice); + return false; +#endif + } + +} // end physx namespace + +#endif // PX_SUPPORT_GPU_PHYSX + + diff --git a/PxShared/src/fastxml/include/PsFastXml.h b/PxShared/src/fastxml/include/PsFastXml.h new file mode 100644 index 0000000..e1f1c69 --- /dev/null +++ b/PxShared/src/fastxml/include/PsFastXml.h @@ -0,0 +1,167 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFASTXML_PSFASTXML_H +#define PSFASTXML_PSFASTXML_H + +#include "foundation/PxSimpleTypes.h" // defines basic data types; modify for your platform as needed. +#include "foundation/PxIO.h" +#include "foundation/PxAssert.h" +#include "PsAllocator.h" + +namespace physx +{ +namespace shdfnd +{ + +class FastXml +{ + PX_NOCOPY(FastXml) + + public: + class AttributePairs + { + int argc; + const char** argv; + + public: + AttributePairs() : argc(0), argv(NULL) + { + } + AttributePairs(int c, const char** v) : argc(c), argv(v) + { + } + + PX_INLINE int getNbAttr() const + { + return argc / 2; + } + + const char* getKey(uint32_t index) const + { + PX_ASSERT((index * 2) < uint32_t(argc)); + return argv[index * 2]; + } + + const char* getValue(uint32_t index) const + { + PX_ASSERT((index * 2 + 1) < uint32_t(argc)); + return argv[index * 2 + 1]; + } + + const char* get(const char* attr) const + { + int32_t count = argc / 2; + for(int32_t i = 0; i < count; ++i) + { + const char* key = argv[i * 2], *value = argv[i * 2 + 1]; + if(strcmp(key, attr) == 0) + return value; + } + + return NULL; + } + }; + + /*** + * Callbacks to the user with the contents of the XML file properly digested. + */ + class Callback + { + public: + virtual ~Callback() + { + } + virtual bool processComment(const char* comment) = 0; // encountered a comment in the XML + + // 'element' is the name of the element that is being closed. + // depth is the recursion depth of this element. + // Return true to continue processing the XML file. + // Return false to stop processing the XML file; leaves the read pointer of the stream right after this close + // tag. + // The bool 'isError' indicates whether processing was stopped due to an error, or intentionally canceled early. + virtual bool processClose(const char* element, uint32_t depth, bool& isError) = 0; // process the 'close' + // indicator for a previously + // encountered element + + // return true to continue processing the XML document, false to skip. + virtual bool processElement(const char* elementName, // name of the element + const char* elementData, // element data, null if none + const AttributePairs& attr, // attributes + int32_t lineno) = 0; // line number in the source XML file + + // process the XML declaration header + virtual bool processXmlDeclaration(const AttributePairs&, // attributes + const char* /*elementData*/, int32_t /*lineno*/) + { + return true; + } + + virtual bool processDoctype(const char* /*rootElement*/, // Root element tag + const char* /*type*/, // SYSTEM or PUBLIC + const char* /*fpi*/, // Formal Public Identifier + const char* /*uri*/) // Path to schema file + { + return true; + } + + virtual void* allocate(uint32_t size) + { + return getAllocator().allocate(size, "FastXml", __FILE__, __LINE__); + } + + virtual void deallocate(void* ptr) + { + getAllocator().deallocate(ptr); + } + }; + + virtual bool processXml(PxInputData& buff, bool streamFromMemory = false) = 0; + + virtual const char* getError(int32_t& lineno) = 0; // report the reason for a parsing error, and the line number + // where it occurred. + + FastXml() + { + } + + virtual void release(void) = 0; + + protected: + virtual ~FastXml() + { + } +}; + +FastXml* createFastXml(FastXml::Callback* iface); + +} // shdfnd +} // physx + +#endif // PSFASTXML_PSFASTXML_H diff --git a/PxShared/src/fastxml/src/PsFastXml.cpp b/PxShared/src/fastxml/src/PsFastXml.cpp new file mode 100644 index 0000000..dcb8c37 --- /dev/null +++ b/PxShared/src/fastxml/src/PsFastXml.cpp @@ -0,0 +1,833 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "foundation/PxAssert.h" +#include "foundation/PxMemory.h" +#include "Ps.h" +#include "PsFastXml.h" +#include <stdio.h> +#include <string.h> +#include <new> +#include <ctype.h> + +using namespace physx; + +namespace +{ +#define MIN_CLOSE_COUNT 2 +#define DEFAULT_READ_BUFFER_SIZE (16 * 1024) +#define NUM_ENTITY 5 + +struct Entity +{ + const char* str; + unsigned int strLength; + char chr; +}; + +static const Entity entity[NUM_ENTITY] = { + { "<", 4, '<' }, { "&", 5, '&' }, { ">", 4, '>' }, { """, 6, '\"' }, { "'", 6, '\'' } +}; + +class MyFastXml : public physx::shdfnd::FastXml +{ + public: + enum CharType + { + CT_DATA, + CT_EOF, + CT_SOFT, + CT_END_OF_ELEMENT, // either a forward slash or a greater than symbol + CT_END_OF_LINE + }; + + MyFastXml(Callback* c) + { + mStreamFromMemory = true; + mCallback = c; + memset(mTypes, CT_DATA, sizeof(mTypes)); + mTypes[0] = CT_EOF; + mTypes[uint8_t(' ')] = mTypes[uint8_t('\t')] = CT_SOFT; + mTypes[uint8_t('/')] = mTypes[uint8_t('>')] = mTypes[uint8_t('?')] = CT_END_OF_ELEMENT; + mTypes[uint8_t('\n')] = mTypes[uint8_t('\r')] = CT_END_OF_LINE; + mError = 0; + mStackIndex = 0; + mFileBuf = NULL; + mReadBufferEnd = NULL; + mReadBuffer = NULL; + mReadBufferSize = DEFAULT_READ_BUFFER_SIZE; + mOpenCount = 0; + mLastReadLoc = 0; + for(uint32_t i = 0; i < (MAX_STACK + 1); i++) + { + mStack[i] = NULL; + mStackAllocated[i] = false; + } + } + + char* processClose(char c, const char* element, char* scan, int32_t argc, const char** argv, + FastXml::Callback* iface, bool& isError) + { + AttributePairs attr(argc, argv); + isError = true; // by default, if we return null it's due to an error. + if(c == '/' || c == '?') + { + char* slash = const_cast<char*>(static_cast<const char*>(strchr(element, c))); + if(slash) + *slash = 0; + + if(c == '?' && strcmp(element, "xml") == 0) + { + if(!iface->processXmlDeclaration(attr, 0, mLineNo)) + return NULL; + } + else + { + if(!iface->processElement(element, 0, attr, mLineNo)) + { + mError = "User aborted the parsing process"; + return NULL; + } + + pushElement(element); + + const char* close = popElement(); + + if(!iface->processClose(close, mStackIndex, isError)) + { + return NULL; + } + } + + if(!slash) + ++scan; + } + else + { + scan = skipNextData(scan); + char* data = scan; // this is the data portion of the element, only copies memory if we encounter line feeds + char* dest_data = 0; + while(*scan && *scan != '<') + { + if(getCharType(scan) == CT_END_OF_LINE) + { + if(*scan == '\r') + mLineNo++; + dest_data = scan; + *dest_data++ = ' '; // replace the linefeed with a space... + scan = skipNextData(scan); + while(*scan && *scan != '<') + { + if(getCharType(scan) == CT_END_OF_LINE) + { + if(*scan == '\r') + mLineNo++; + *dest_data++ = ' '; // replace the linefeed with a space... + scan = skipNextData(scan); + } + else + { + *dest_data++ = *scan++; + } + } + break; + } + else if('&' == *scan) + { + dest_data = scan; + while(*scan && *scan != '<') + { + if('&' == *scan) + { + if(*(scan + 1) && *(scan + 1) == '#' && *(scan + 2)) + { + if(*(scan + 2) == 'x') + { + // Hexadecimal. + if(!*(scan + 3)) + break; + + char* q = scan + 3; + q = strchr(q, ';'); + + if(!q || !*q) + PX_ASSERT(0); + + --q; + char ch = char(*q > '9' ? (tolower(*q) - 'a' + 10) : *q - '0'); + if(*(--q) != tolower('x')) + ch |= char(*q > '9' ? (tolower(*q) - 'a' + 10) : *q - '0') << 4; + + *dest_data++ = ch; + } + else + { + // Decimal. + if(!*(scan + 2)) + break; + + const char* q = scan + 2; + q = strchr(q, ';'); + + if(!q || !*q) + PX_ASSERT(0); + + --q; + char ch = *q - '0'; + if(*(--q) != '#') + ch |= (*q - '0') * 10; + + *dest_data++ = ch; + } + + char* start = scan; + char* end = strchr(start, ';'); + if(end) + { + *end = 0; + scan = end + 1; + } + + continue; + } + + for(int i = 0; i < NUM_ENTITY; ++i) + { + if(strncmp(entity[i].str, scan, entity[i].strLength) == 0) + { + *dest_data++ = entity[i].chr; + scan += entity[i].strLength; + break; + } + } + } + else + { + *dest_data++ = *scan++; + } + } + break; + } + else + ++scan; + } + + if(*scan == '<') + { + if(scan[1] != '/') + { + PX_ASSERT(mOpenCount > 0); + mOpenCount--; + } + if(dest_data) + { + *dest_data = 0; + } + else + { + *scan = 0; + } + + scan++; // skip it.. + + if(*data == 0) + data = 0; + + if(!iface->processElement(element, data, attr, mLineNo)) + { + mError = "User aborted the parsing process"; + return 0; + } + + pushElement(element); + + // check for the comment use case... + if(scan[0] == '!' && scan[1] == '-' && scan[2] == '-') + { + scan += 3; + while(*scan && *scan == ' ') + ++scan; + + char* comment = scan; + char* comment_end = strstr(scan, "-->"); + if(comment_end) + { + *comment_end = 0; + scan = comment_end + 3; + if(!iface->processComment(comment)) + { + mError = "User aborted the parsing process"; + return 0; + } + } + } + else if(*scan == '/') + { + scan = processClose(scan, iface, isError); + if(scan == NULL) + { + return NULL; + } + } + } + else + { + mError = "Data portion of an element wasn't terminated properly"; + return NULL; + } + } + + if(mOpenCount < MIN_CLOSE_COUNT) + { + scan = readData(scan); + } + + return scan; + } + + char* processClose(char* scan, FastXml::Callback* iface, bool& isError) + { + const char* start = popElement(), *close = start; + if(scan[1] != '>') + { + scan++; + close = scan; + while(*scan && *scan != '>') + scan++; + *scan = 0; + } + + if(0 != strcmp(start, close)) + { + mError = "Open and closing tags do not match"; + return 0; + } + + if(!iface->processClose(close, mStackIndex, isError)) + { + // we need to set the read pointer! + uint32_t offset = uint32_t(mReadBufferEnd - scan) - 1; + uint32_t readLoc = mLastReadLoc - offset; + mFileBuf->seek(readLoc); + return NULL; + } + ++scan; + + return scan; + } + + virtual bool processXml(physx::PxInputData& fileBuf, bool streamFromMemory) + { + releaseMemory(); + mFileBuf = &fileBuf; + mStreamFromMemory = streamFromMemory; + return processXml(mCallback); + } + + // if we have finished processing the data we had pending.. + char* readData(char* scan) + { + for(uint32_t i = 0; i < (mStackIndex + 1); i++) + { + if(!mStackAllocated[i]) + { + const char* text = mStack[i]; + if(text) + { + uint32_t tlen = uint32_t(strlen(text)); + mStack[i] = static_cast<const char*>(mCallback->allocate(tlen + 1)); + PxMemCopy(const_cast<void*>(static_cast<const void*>(mStack[i])), text, tlen + 1); + mStackAllocated[i] = true; + } + } + } + + if(!mStreamFromMemory) + { + if(scan == NULL) + { + uint32_t seekLoc = mFileBuf->tell(); + mReadBufferSize = (mFileBuf->getLength() - seekLoc); + } + else + { + return scan; + } + } + + if(mReadBuffer == NULL) + { + mReadBuffer = static_cast<char*>(mCallback->allocate(mReadBufferSize + 1)); + } + uint32_t offset = 0; + uint32_t readLen = mReadBufferSize; + + if(scan) + { + offset = uint32_t(scan - mReadBuffer); + uint32_t copyLen = mReadBufferSize - offset; + if(copyLen) + { + PX_ASSERT(scan >= mReadBuffer); + memmove(mReadBuffer, scan, copyLen); + mReadBuffer[copyLen] = 0; + readLen = mReadBufferSize - copyLen; + } + offset = copyLen; + } + + uint32_t readCount = mFileBuf->read(&mReadBuffer[offset], readLen); + + while(readCount > 0) + { + + mReadBuffer[readCount + offset] = 0; // end of string terminator... + mReadBufferEnd = &mReadBuffer[readCount + offset]; + + const char* scan_ = &mReadBuffer[offset]; + while(*scan_) + { + if(*scan_ == '<' && scan_[1] != '/') + { + mOpenCount++; + } + scan_++; + } + + if(mOpenCount < MIN_CLOSE_COUNT) + { + uint32_t oldSize = uint32_t(mReadBufferEnd - mReadBuffer); + mReadBufferSize = mReadBufferSize * 2; + char* oldReadBuffer = mReadBuffer; + mReadBuffer = static_cast<char*>(mCallback->allocate(mReadBufferSize + 1)); + PxMemCopy(mReadBuffer, oldReadBuffer, oldSize); + mCallback->deallocate(oldReadBuffer); + offset = oldSize; + uint32_t readSize = mReadBufferSize - oldSize; + readCount = mFileBuf->read(&mReadBuffer[offset], readSize); + if(readCount == 0) + break; + } + else + { + break; + } + } + mLastReadLoc = mFileBuf->tell(); + + return mReadBuffer; + } + + bool processXml(FastXml::Callback* iface) + { + bool ret = true; + + const int MAX_ATTRIBUTE = 2048; // can't imagine having more than 2,048 attributes in a single element right? + + mLineNo = 1; + + char* element, *scan = readData(0); + + while(*scan) + { + + scan = skipNextData(scan); + + if(*scan == 0) + break; + + if(*scan == '<') + { + + if(scan[1] != '/') + { + PX_ASSERT(mOpenCount > 0); + mOpenCount--; + } + scan++; + + if(*scan == '?') // Allow xml declarations + { + scan++; + } + else if(scan[0] == '!' && scan[1] == '-' && scan[2] == '-') + { + scan += 3; + while(*scan && *scan == ' ') + scan++; + char* comment = scan, *comment_end = strstr(scan, "-->"); + if(comment_end) + { + *comment_end = 0; + scan = comment_end + 3; + if(!iface->processComment(comment)) + { + mError = "User aborted the parsing process"; + return false; + } + } + continue; + } + else if(scan[0] == '!') // Allow doctype + { + scan++; + + // DOCTYPE syntax differs from usual XML so we parse it here + + // Read DOCTYPE + const char* tag = "DOCTYPE"; + if(!strstr(scan, tag)) + { + mError = "Invalid DOCTYPE"; + return false; + } + + scan += strlen(tag); + + // Skip whites + while(CT_SOFT == getCharType(scan)) + ++scan; + + // Read rootElement + const char* rootElement = scan; + while(CT_DATA == getCharType(scan)) + ++scan; + + char* endRootElement = scan; + + // TODO: read remaining fields (fpi, uri, etc.) + while(CT_END_OF_ELEMENT != getCharType(scan++)) + ; + + *endRootElement = 0; + + if(!iface->processDoctype(rootElement, 0, 0, 0)) + { + mError = "User aborted the parsing process"; + return false; + } + + continue; // Restart loop + } + } + + if(*scan == '/') + { + bool isError; + scan = processClose(scan, iface, isError); + if(!scan) + { + if(isError) + { + mError = "User aborted the parsing process"; + } + return !isError; + } + } + else + { + if(*scan == '?') + scan++; + element = scan; + int32_t argc = 0; + const char* argv[MAX_ATTRIBUTE]; + bool close; + scan = nextSoftOrClose(scan, close); + if(close) + { + char c = *(scan - 1); + if(c != '?' && c != '/') + { + c = '>'; + } + *scan++ = 0; + bool isError; + scan = processClose(c, element, scan, argc, argv, iface, isError); + if(!scan) + { + if(isError) + { + mError = "User aborted the parsing process"; + } + return !isError; + } + } + else + { + if(*scan == 0) + { + return ret; + } + + *scan = 0; // place a zero byte to indicate the end of the element name... + scan++; + + while(*scan) + { + scan = skipNextData(scan); // advance past any soft seperators (tab or space) + + if(getCharType(scan) == CT_END_OF_ELEMENT) + { + char c = *scan++; + if('?' == c) + { + if('>' != *scan) //?> + { + PX_ASSERT(0); + return false; + } + + scan++; + } + bool isError; + scan = processClose(c, element, scan, argc, argv, iface, isError); + if(!scan) + { + if(isError) + { + mError = "User aborted the parsing process"; + } + return !isError; + } + break; + } + else + { + if(argc >= MAX_ATTRIBUTE) + { + mError = "encountered too many attributes"; + return false; + } + argv[argc] = scan; + scan = nextSep(scan); // scan up to a space, or an equal + if(*scan) + { + if(*scan != '=') + { + *scan = 0; + scan++; + while(*scan && *scan != '=') + scan++; + if(*scan == '=') + scan++; + } + else + { + *scan = 0; + scan++; + } + + if(*scan) // if not eof... + { + scan = skipNextData(scan); + if(*scan == '"') + { + scan++; + argc++; + argv[argc] = scan; + argc++; + while(*scan && *scan != 34) + scan++; + if(*scan == '"') + { + *scan = 0; + scan++; + } + else + { + mError = "Failed to find closing quote for attribute"; + return false; + } + } + else + { + // mError = "Expected quote to begin attribute"; + // return false; + // PH: let's try to have a more graceful fallback + argc--; + while(*scan != '/' && *scan != '>' && *scan != 0) + scan++; + } + } + } // if( *scan ) + } // if ( mTypes[*scan] + } // if( close ) + } // if( *scan == '/' + } // while( *scan ) + } + + if(mStackIndex) + { + mError = "Invalid file format"; + return false; + } + + return ret; + } + + const char* getError(int32_t& lineno) + { + const char* ret = mError; + lineno = mLineNo; + mError = 0; + return ret; + } + + virtual void release(void) + { + Callback* c = mCallback; // get the user allocator interface + MyFastXml* f = this; // cast the this pointer + f->~MyFastXml(); // explicitely invoke the destructor for this class + c->deallocate(f); // now free up the memory associated with it. + } + + private: + virtual ~MyFastXml(void) + { + releaseMemory(); + } + + PX_INLINE void releaseMemory(void) + { + mFileBuf = NULL; + mCallback->deallocate(mReadBuffer); + mReadBuffer = NULL; + mStackIndex = 0; + mReadBufferEnd = NULL; + mOpenCount = 0; + mLastReadLoc = 0; + mError = NULL; + for(uint32_t i = 0; i < (mStackIndex + 1); i++) + { + if(mStackAllocated[i]) + { + mCallback->deallocate(const_cast<void*>(static_cast<const void*>(mStack[i]))); + mStackAllocated[i] = false; + } + mStack[i] = NULL; + } + } + + PX_INLINE CharType getCharType(char* scan) const + { + return mTypes[uint8_t(*scan)]; + } + + PX_INLINE char* nextSoftOrClose(char* scan, bool& close) + { + while(*scan && getCharType(scan) != CT_SOFT && *scan != '>') + scan++; + close = *scan == '>'; + return scan; + } + + PX_INLINE char* nextSep(char* scan) + { + while(*scan && getCharType(scan) != CT_SOFT && *scan != '=') + scan++; + return scan; + } + + PX_INLINE char* skipNextData(char* scan) + { + // while we have data, and we encounter soft seperators or line feeds... + while(*scan && (getCharType(scan) == CT_SOFT || getCharType(scan) == CT_END_OF_LINE)) + { + if(*scan == '\n') + mLineNo++; + scan++; + } + return scan; + } + + void pushElement(const char* element) + { + PX_ASSERT(mStackIndex < uint32_t(MAX_STACK)); + if(mStackIndex < uint32_t(MAX_STACK)) + { + if(mStackAllocated[mStackIndex]) + { + mCallback->deallocate(const_cast<void*>(static_cast<const void*>(mStack[mStackIndex]))); + mStackAllocated[mStackIndex] = false; + } + mStack[mStackIndex++] = element; + } + } + + const char* popElement(void) + { + PX_ASSERT(mStackIndex > 0); + if(mStackAllocated[mStackIndex]) + { + mCallback->deallocate(const_cast<void*>(static_cast<const void*>(mStack[mStackIndex]))); + mStackAllocated[mStackIndex] = false; + } + mStack[mStackIndex] = NULL; + return mStackIndex ? mStack[--mStackIndex] : NULL; + } + + static const int MAX_STACK = 2048; + + CharType mTypes[256]; + + physx::PxInputData* mFileBuf; + + char* mReadBuffer; + char* mReadBufferEnd; + + uint32_t mOpenCount; + uint32_t mReadBufferSize; + uint32_t mLastReadLoc; + + int32_t mLineNo; + const char* mError; + uint32_t mStackIndex; + const char* mStack[MAX_STACK + 1]; + bool mStreamFromMemory; + bool mStackAllocated[MAX_STACK + 1]; + Callback* mCallback; +}; +} + +namespace physx +{ +namespace shdfnd +{ + +FastXml* createFastXml(FastXml::Callback* iface) +{ + MyFastXml* m = static_cast<MyFastXml*>(iface->allocate(sizeof(MyFastXml))); + if(m) + { + new (m) MyFastXml(iface); + } + return static_cast<FastXml*>(m); +} +} +} diff --git a/PxShared/src/filebuf/include/PsAsciiConversion.h b/PxShared/src/filebuf/include/PsAsciiConversion.h new file mode 100644 index 0000000..7c4fa3a --- /dev/null +++ b/PxShared/src/filebuf/include/PsAsciiConversion.h @@ -0,0 +1,99 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PSFILEBUFFER_PSASCIICONVERSION_H +#define PSFILEBUFFER_PSASCIICONVERSION_H + +/*! +\file +\brief PxAsciiConversion namespace contains string/value helper functions +*/ + +#include "PxMath.h" +#include "PsString.h" +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <float.h> + +namespace physx +{ +namespace general_string_parsing2 +{ +namespace PxAsc +{ + +const uint32_t PxF32StrLen = 24; +const uint32_t PxF64StrLen = 32; +const uint32_t IntStrLen = 32; + +PX_INLINE bool isWhiteSpace(char c); +PX_INLINE const char * skipNonWhiteSpace(const char *scan); +PX_INLINE const char * skipWhiteSpace(const char *scan); + +////////////////////////// +// str to value functions +////////////////////////// +PX_INLINE bool strToBool(const char *str, const char **endptr); +PX_INLINE int8_t strToI8(const char *str, const char **endptr); +PX_INLINE int16_t strToI16(const char *str, const char **endptr); +PX_INLINE int32_t strToI32(const char *str, const char **endptr); +PX_INLINE int64_t strToI64(const char *str, const char **endptr); +PX_INLINE uint8_t strToU8(const char *str, const char **endptr); +PX_INLINE uint16_t strToU16(const char *str, const char **endptr); +PX_INLINE uint32_t strToU32(const char *str, const char **endptr); +PX_INLINE uint64_t strToU64(const char *str, const char **endptr); +PX_INLINE float strToF32(const char *str, const char **endptr); +PX_INLINE double strToF64(const char *str, const char **endptr); +PX_INLINE void strToF32s(float *v,uint32_t count,const char *str, const char**endptr); + + +////////////////////////// +// value to str functions +////////////////////////// +PX_INLINE const char * valueToStr( bool val, char *buf, uint32_t n ); +PX_INLINE const char * valueToStr( int8_t val, char *buf, uint32_t n ); +PX_INLINE const char * valueToStr( int16_t val, char *buf, uint32_t n ); +PX_INLINE const char * valueToStr( int32_t val, char *buf, uint32_t n ); +PX_INLINE const char * valueToStr( int64_t val, char *buf, uint32_t n ); +PX_INLINE const char * valueToStr( uint8_t val, char *buf, uint32_t n ); +PX_INLINE const char * valueToStr( uint16_t val, char *buf, uint32_t n ); +PX_INLINE const char * valueToStr( uint32_t val, char *buf, uint32_t n ); +PX_INLINE const char * valueToStr( uint64_t val, char *buf, uint32_t n ); +PX_INLINE const char * valueToStr( float val, char *buf, uint32_t n ); +PX_INLINE const char * valueToStr( double val, char *buf, uint32_t n ); + +#include "PsAsciiConversion.inl" + +} // end of namespace +} // end of namespace +using namespace general_string_parsing2; +} // end of namespace + + +#endif // PSFILEBUFFER_PSASCIICONVERSION_H diff --git a/PxShared/src/filebuf/include/PsAsciiConversion.inl b/PxShared/src/filebuf/include/PsAsciiConversion.inl new file mode 100644 index 0000000..9e1ba14 --- /dev/null +++ b/PxShared/src/filebuf/include/PsAsciiConversion.inl @@ -0,0 +1,566 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +/*! +\file +\brief NvAsciiConversion namespace contains string/value helper functions +*/ + +#include <ctype.h> + +PX_INLINE bool isWhiteSpace(char c) +{ + bool ret = false; + if ( c == 32 || c == 9 || c == 13 || c == 10 || c == ',' ) ret = true; + return ret; +} + +PX_INLINE const char * skipNonWhiteSpace(const char *scan) +{ + while ( !isWhiteSpace(*scan) && *scan) scan++; + if ( *scan == 0 ) scan = NULL; + return scan; +} +PX_INLINE const char * skipWhiteSpace(const char *scan) +{ + while ( isWhiteSpace(*scan) && *scan ) scan++; + if ( *scan == 0 ) scan = NULL; + return scan; +} + +static double strtod_fast(const char * pString) +{ + //--- + // Find the start of the string + const char* pNumberStart = skipWhiteSpace(pString); + + //--- + // Find the end of the string + const char* pNumberEnd = pNumberStart; + + // skip optional sign + if( *pNumberEnd == '-' || *pNumberEnd == '+' ) + ++pNumberEnd; + + // skip optional digits + while( isdigit(*pNumberEnd) ) + ++pNumberEnd; + + // skip optional decimal and digits + if( *pNumberEnd == '.' ) + { + ++pNumberEnd; + + while( isdigit(*pNumberEnd) ) + ++pNumberEnd; + } + + // skip optional exponent + if( *pNumberEnd == 'd' + || *pNumberEnd == 'D' + || *pNumberEnd == 'e' + || *pNumberEnd == 'E' ) + { + ++pNumberEnd; + + if( *pNumberEnd == '-' || *pNumberEnd == '+' ) + ++pNumberEnd; + + while( isdigit(*pNumberEnd) ) + ++pNumberEnd; + } + + //--- + // Process the string + const uint32_t numberLen = (const uint32_t)(pNumberEnd-pNumberStart); + char buffer[32]; + if( numberLen+1 < sizeof(buffer)/sizeof(buffer[0]) ) + { + // copy into buffer and terminate with NUL before calling the + // standard function + memcpy( buffer, pNumberStart, numberLen*sizeof(buffer[0]) ); + buffer[numberLen] = '\0'; + const double result = strtod( buffer, NULL ); + + return result; + } + else + { + // buffer was too small so just call the standard function on the + // source input to get a proper result + return strtod( pString, NULL ); + } +} + +static float strtof_fast(const char* pString) +{ + return (float)strtod_fast(pString); +} + + +////////////////////////// +// str to value functions +////////////////////////// +PX_INLINE bool strToBool(const char *str, const char **endptr) +{ + bool ret = false; + const char *begin = skipWhiteSpace(str); + const char *end = skipNonWhiteSpace(begin); + + if( !end ) + end = begin + strlen(str); + + size_t len = (size_t)(end - begin); + if ( physx::shdfnd::strnicmp(begin,"true", len) == 0 || physx::shdfnd::strnicmp(begin,"1", len) == 0 ) + ret = true; + + if( endptr ) + *endptr = skipNonWhiteSpace(begin); + + return ret; +} + +PX_INLINE int8_t strToI8(const char *str, const char **endptr) +{ + int8_t ret; + const char *begin = skipWhiteSpace(str); + const char *end = skipNonWhiteSpace(begin); + + if( !end ) + end = begin + strlen(str); + + if( strncmp(begin, "INT8_MIN", (size_t)(end-begin)) == 0) + ret = INT8_MIN; + else if( strncmp(begin, "INT8_MAX", (size_t)(end-begin)) == 0) + ret = INT8_MAX; + else if( strncmp(begin, "PX_MIN_I8", (size_t)(end-begin)) == 0) + ret = INT8_MIN; + else if( strncmp(begin, "PX_MAX_I8", (size_t)(end-begin)) == 0) + ret = INT8_MAX; + else + ret = (int8_t)strtol(begin, 0, 0); //FIXME + + if( endptr ) + *endptr = skipNonWhiteSpace(begin); + + return ret; +} + +PX_INLINE int16_t strToI16(const char *str, const char **endptr) +{ + int16_t ret; + const char *begin = skipWhiteSpace(str); + const char *end = skipNonWhiteSpace(begin); + + if( !end ) + end = begin + strlen(str); + + if( strncmp(begin, "INT16_MIN", (size_t)(end-begin)) == 0) + ret = INT16_MIN; + else if( strncmp(begin, "INT16_MAX", (size_t)(end-begin)) == 0) + ret = INT16_MAX; + else if( strncmp(begin, "PX_MIN_I16", (size_t)(end-begin)) == 0) + ret = INT16_MIN; + else if( strncmp(begin, "PX_MAX_I16", (size_t)(end-begin)) == 0) + ret = INT16_MAX; + else + ret = (int16_t)strtol(begin, 0, 0); //FIXME + + if( endptr ) + *endptr = skipNonWhiteSpace(begin); + + return ret; +} + +PX_INLINE int32_t strToI32(const char *str, const char **endptr) +{ + int32_t ret; + const char *begin = skipWhiteSpace(str); + const char *end = skipNonWhiteSpace(begin); + + if( !end ) + end = begin + strlen(str); + + if( strncmp(begin, "INT32_MIN", (size_t)(end-begin)) == 0) + ret = INT32_MIN; + else if( strncmp(begin, "INT32_MAX", (size_t)(end-begin)) == 0) + ret = INT32_MAX; + else if( strncmp(begin, "PX_MIN_I32", (size_t)(end-begin)) == 0) + ret = INT32_MIN; + else if( strncmp(begin, "PX_MAX_I32", (size_t)(end-begin)) == 0) + ret = INT32_MAX; + else + ret = (int32_t)strtol(begin, 0, 0); //FIXME + + if( endptr ) + *endptr = skipNonWhiteSpace(begin); + + return ret; +} + +PX_INLINE int64_t strToI64(const char *str, const char **endptr) +{ + int64_t ret; + const char *begin = skipWhiteSpace(str); + + //FIXME +#ifdef _WIN32 //NV_WINDOWS, NV_XBOX + ret = (int64_t)_strtoi64(begin,0,10); +#else + ret = (int64_t)strtoll(begin,0,10); +#endif + + if( endptr ) + *endptr = skipNonWhiteSpace(begin); + + return ret; +} + +PX_INLINE uint8_t strToU8(const char *str, const char **endptr) +{ + uint8_t ret; + const char *begin = skipWhiteSpace(str); + + ret = (uint8_t)strtoul(begin, 0, 0); + + if( endptr ) + *endptr = skipNonWhiteSpace(begin); + + return ret; +} + +PX_INLINE uint16_t strToU16(const char *str, const char **endptr) +{ + uint16_t ret; + const char *end; + const char *begin = skipWhiteSpace(str); + + end = skipNonWhiteSpace(begin); + if( !end ) + end = begin + strlen(str); + + if( strncmp(begin, "UINT16_MAX", (size_t)(end-begin)) == 0) + ret = UINT16_MAX; + else if( strncmp(begin, "PX_MAX_U16", (size_t)(end-begin)) == 0) + ret = UINT16_MAX; + else + ret = (uint16_t)strtoul(begin,0,0); + + if( endptr ) + *endptr = skipNonWhiteSpace(begin); + + return ret; +} + +PX_INLINE uint32_t strToU32(const char *str, const char **endptr) +{ + uint32_t ret; + const char *begin = skipWhiteSpace(str); + const char *end = skipNonWhiteSpace(begin); + + if( !end ) + end = begin + strlen(str); + + if( strncmp(begin, "UINT32_MAX", (size_t)(end-begin)) == 0) + ret = UINT32_MAX; + else if( strncmp(begin, "PX_U32_MAX", (size_t)(end-begin)) == 0) + ret = UINT32_MAX; + else + ret = (uint32_t)strtoul(begin,0,0); + + if( endptr ) + *endptr = skipNonWhiteSpace(begin); + + return ret; +} + +PX_INLINE uint64_t strToU64(const char *str, const char **endptr) +{ + uint64_t ret; + const char *begin; + begin = skipWhiteSpace(str); + + //FIXME +#ifdef _WIN32 //NV_WINDOWS, NV_XBOX + ret = (uint64_t)_strtoui64(begin,0,10); +#else + ret = (uint64_t)strtoull(begin,0,10); +#endif + + if( endptr ) + *endptr = skipNonWhiteSpace(begin); + + return ret; +} + +#ifndef DEBUGGING_MISMATCHES +#define DEBUGGING_MISMATCHES 0 +#endif + +PX_INLINE float strToF32(const char *str, const char **endptr) +{ + float ret; + const char *begin = skipWhiteSpace(str); + const char *end = skipNonWhiteSpace(begin); + + if( !end ) + end = begin + strlen(str); + + const uint32_t len = (uint32_t)(end - begin); + + const char F32_MIN[] = "NV_MIN_F32"; + const char F32_MAX[] = "NV_MAX_F32"; + const char PX_F32_MIN[] = "PX_MIN_F32"; + const char PX_F32_MAX[] = "PX_MAX_F32"; + + if( strncmp(begin, PX_F32_MIN, physx::PxMin(len, (uint32_t)(sizeof(PX_F32_MIN) - 1))) == 0) + ret = -PX_MAX_F32; + else if( strncmp(begin, PX_F32_MAX, physx::PxMin(len, (uint32_t)(sizeof(PX_F32_MAX) - 1))) == 0) + ret = PX_MAX_F32; + else if( strncmp(begin, F32_MIN, physx::PxMin(len, (uint32_t)(sizeof(F32_MIN) - 1))) == 0) + ret = -PX_MAX_F32; + else if( strncmp(begin, F32_MAX, physx::PxMin(len, (uint32_t)(sizeof(F32_MAX) - 1))) == 0) + ret = PX_MAX_F32; + else + { + ret = (float)strtof_fast(begin); + } + +#if DEBUGGING_MISMATCHES + float testRet = (float)atof(begin); + if( ret != testRet ) + { + PX_ASSERT(0 && "Inaccurate float string"); + } +#endif + + if( endptr ) + *endptr = skipNonWhiteSpace(begin); + + return ret; +} + + +PX_INLINE double strToF64(const char *str, const char **endptr) +{ + double ret; + const char *begin = skipWhiteSpace(str); + const char *end = skipNonWhiteSpace(begin); + + end = skipNonWhiteSpace(begin); + + if( !end ) + end = begin + strlen(str); + + const uint32_t len = (const uint32_t)(end - begin); + + const char F64_MIN[] = "PX_MIN_F364"; + const char F64_MAX[] = "PX_MAX_F64"; + const char PX_F64_MIN[] = "PX_MIN_F64"; + const char PX_F64_MAX[] = "PX_MAX_F64"; + + if( strncmp(begin, F64_MIN, physx::PxMin(len, (uint32_t)(sizeof(F64_MIN) - 1))) == 0) + ret = -PX_MAX_F64; + else if( strncmp(begin, F64_MAX, physx::PxMin(len, (uint32_t)(sizeof(F64_MAX) - 1))) == 0) + ret = PX_MAX_F64; + else if( strncmp(begin, PX_F64_MIN, physx::PxMin(len, (uint32_t)(sizeof(PX_F64_MIN) - 1))) == 0) + ret = -PX_MAX_F64; + else if( strncmp(begin, PX_F64_MAX, physx::PxMin(len, (uint32_t)(sizeof(PX_F64_MAX) - 1))) == 0) + ret = PX_MAX_F64; + else + ret = (double)strtod_fast(begin); + + if( endptr ) + *endptr = skipNonWhiteSpace(begin); + + return ret; +} + +PX_INLINE void strToF32s(float *v,uint32_t count,const char *str, const char**endptr) +{ + const char *begin = skipWhiteSpace(str); + + if ( *begin == '(' ) begin++; + for (uint32_t i=0; i<count && *begin; i++) + { + v[i] = (float)strToF32(begin, &begin); + } + + if( endptr ) + *endptr = skipNonWhiteSpace(begin); +} + + +////////////////////////// +// value to str functions +////////////////////////// +PX_INLINE const char * valueToStr( bool val, char *buf, uint32_t n ) +{ + physx::shdfnd::snprintf(buf, n,"%s",val ? "true" : "false"); + return buf; +} + +PX_INLINE const char * valueToStr( int8_t val, char *buf, uint32_t n ) +{ + if( val == INT8_MIN ) + physx::shdfnd::snprintf(buf, n,"%s","INT8_MIN" ); + else if( val == INT8_MAX ) + physx::shdfnd::snprintf(buf, n,"%s","INT8_MAX" ); + else + physx::shdfnd::snprintf(buf, n, "%d", val); + return buf; +} + +PX_INLINE const char * valueToStr( int16_t val, char *buf, uint32_t n ) +{ + if( val == INT16_MIN ) + physx::shdfnd::snprintf(buf, n,"%s","INT16_MIN" ); + else if( val == INT16_MAX ) + physx::shdfnd::snprintf(buf, n,"%s","INT16_MAX" ); + else + physx::shdfnd::snprintf(buf, n,"%d",val ); + return buf; +} + +PX_INLINE const char * valueToStr( int32_t val, char *buf, uint32_t n ) +{ + if( val == INT32_MIN ) + physx::shdfnd::snprintf(buf, n,"%s","INT32_MIN" ); + else if( val == INT32_MAX ) + physx::shdfnd::snprintf(buf, n,"%s","INT32_MAX" ); + else + physx::shdfnd::snprintf(buf, n,"%d",val ); + return buf; +} + +PX_INLINE const char * valueToStr( int64_t val, char *buf, uint32_t n ) +{ + physx::shdfnd::snprintf(buf, n,"%lld",val ); + return buf; +} + +PX_INLINE const char * valueToStr( uint8_t val, char *buf, uint32_t n ) +{ + physx::shdfnd::snprintf(buf, n, "%u", val); + return buf; +} + +PX_INLINE const char * valueToStr( uint16_t val, char *buf, uint32_t n ) +{ + if( val == UINT16_MAX ) + physx::shdfnd::snprintf(buf, n,"%s","UINT16_MAX" ); + else + physx::shdfnd::snprintf(buf, n,"%u",val ); + return buf; +} + +PX_INLINE const char * valueToStr( uint32_t val, char *buf, uint32_t n ) +{ + if( val == UINT32_MAX ) + physx::shdfnd::snprintf(buf, n,"%s","UINT32_MAX" ); + else + physx::shdfnd::snprintf(buf, n,"%u",val ); + return buf; +} + +PX_INLINE const char * valueToStr( uint64_t val, char *buf, uint32_t n ) +{ + physx::shdfnd::snprintf(buf, n,"%llu",val ); + return buf; +} + +PX_INLINE const char * valueToStr( float val, char *buf, uint32_t n ) +{ + if( !physx::PxIsFinite(val) ) + { + PX_ASSERT( 0 && "invalid floating point" ); + physx::shdfnd::snprintf(buf, n,"%s","0" ); + } + else if( val == -PX_MAX_F32 ) + physx::shdfnd::snprintf(buf, n,"%s","PX_MIN_F32" ); + else if( val == PX_MAX_F32 ) + physx::shdfnd::snprintf(buf, n,"%s","PX_MAX_F32" ); + else if ( val == 1 ) + physx::shdfnd::strlcpy(buf, n, "1"); + else if ( val == 0 ) + physx::shdfnd::strlcpy(buf, n, "0"); + else if ( val == - 1 ) + physx::shdfnd::strlcpy(buf, n, "-1"); + else + { + physx::shdfnd::snprintf(buf,n,"%.9g", (double)val ); // %g expects double + const char *dot = strchr(buf,'.'); + const char *e = strchr(buf,'e'); + if ( dot && !e ) + { + int32_t len = (int32_t)strlen(buf); + char *foo = &buf[len-1]; + while ( *foo == '0' ) foo--; + if ( *foo == '.' ) + *foo = 0; + else + foo[1] = 0; + } + } + return buf; +} + +PX_INLINE const char * valueToStr( double val, char *buf, uint32_t n ) +{ + if( !physx::PxIsFinite(val) ) + { + PX_ASSERT( 0 && "invalid floating point" ); + physx::shdfnd::snprintf(buf, n,"%s","0" ); + } + else if( val == -PX_MAX_F64 ) + physx::shdfnd::snprintf(buf, n,"%s","PX_MIN_F64" ); + else if( val == PX_MAX_F64 ) + physx::shdfnd::snprintf(buf, n,"%s","PX_MAX_F64" ); + else if ( val == 1 ) + physx::shdfnd::strlcpy(buf, n, "1"); + else if ( val == 0 ) + physx::shdfnd::strlcpy(buf, n, "0"); + else if ( val == - 1 ) + physx::shdfnd::strlcpy(buf, n, "-1"); + else + { + physx::shdfnd::snprintf(buf,n,"%.18g", val ); + const char *dot = strchr(buf,'.'); + const char *e = strchr(buf,'e'); + if ( dot && !e ) + { + int32_t len = (int32_t)strlen(buf); + char *foo = &buf[len-1]; + while ( *foo == '0' ) foo--; + if ( *foo == '.' ) + *foo = 0; + else + foo[1] = 0; + } + } + return buf; +} diff --git a/PxShared/src/filebuf/include/PsFileBuffer.h b/PxShared/src/filebuf/include/PsFileBuffer.h new file mode 100644 index 0000000..d768968 --- /dev/null +++ b/PxShared/src/filebuf/include/PsFileBuffer.h @@ -0,0 +1,250 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PSFILEBUFFER_PSFILEBUFFER_H +#define PSFILEBUFFER_PSFILEBUFFER_H + +#include "filebuf/PxFileBuf.h" + +#include "Ps.h" +#include "PsUserAllocated.h" +#include <stdio.h> + +namespace physx +{ +namespace general_PxIOStream2 +{ + using namespace shdfnd; + +//Use this class if you want to use your own allocator +class PxFileBufferBase : public PxFileBuf +{ +public: + PxFileBufferBase(const char *fileName,OpenMode mode) + { + mOpenMode = mode; + mFph = NULL; + mFileLength = 0; + mSeekRead = 0; + mSeekWrite = 0; + mSeekCurrent = 0; + switch ( mode ) + { + case OPEN_READ_ONLY: + mFph = fopen(fileName,"rb"); + break; + case OPEN_WRITE_ONLY: + mFph = fopen(fileName,"wb"); + break; + case OPEN_READ_WRITE_NEW: + mFph = fopen(fileName,"wb+"); + break; + case OPEN_READ_WRITE_EXISTING: + mFph = fopen(fileName,"rb+"); + break; + case OPEN_FILE_NOT_FOUND: + break; + } + if ( mFph ) + { + fseek(mFph,0L,SEEK_END); + mFileLength = static_cast<uint32_t>(ftell(mFph)); + fseek(mFph,0L,SEEK_SET); + } + else + { + mOpenMode = OPEN_FILE_NOT_FOUND; + } + } + + virtual ~PxFileBufferBase() + { + close(); + } + + virtual void close() + { + if( mFph ) + { + fclose(mFph); + mFph = 0; + } + } + + virtual SeekType isSeekable(void) const + { + return mSeekType; + } + + virtual uint32_t read(void* buffer, uint32_t size) + { + uint32_t ret = 0; + if ( mFph ) + { + setSeekRead(); + ret = static_cast<uint32_t>(::fread(buffer,1,size,mFph)); + mSeekRead+=ret; + mSeekCurrent+=ret; + } + return ret; + } + + virtual uint32_t peek(void* buffer, uint32_t size) + { + uint32_t ret = 0; + if ( mFph ) + { + uint32_t loc = tellRead(); + setSeekRead(); + ret = static_cast<uint32_t>(::fread(buffer,1,size,mFph)); + mSeekCurrent+=ret; + seekRead(loc); + } + return ret; + } + + virtual uint32_t write(const void* buffer, uint32_t size) + { + uint32_t ret = 0; + if ( mFph ) + { + setSeekWrite(); + ret = static_cast<uint32_t>(::fwrite(buffer,1,size,mFph)); + mSeekWrite+=ret; + mSeekCurrent+=ret; + if ( mSeekWrite > mFileLength ) + { + mFileLength = mSeekWrite; + } + } + return ret; + } + + virtual uint32_t tellRead(void) const + { + return mSeekRead; + } + + virtual uint32_t tellWrite(void) const + { + return mSeekWrite; + } + + virtual uint32_t seekRead(uint32_t loc) + { + mSeekRead = loc; + if ( mSeekRead > mFileLength ) + { + mSeekRead = mFileLength; + } + return mSeekRead; + } + + virtual uint32_t seekWrite(uint32_t loc) + { + mSeekWrite = loc; + if ( mSeekWrite > mFileLength ) + { + mSeekWrite = mFileLength; + } + return mSeekWrite; + } + + virtual void flush(void) + { + if ( mFph ) + { + ::fflush(mFph); + } + } + + virtual OpenMode getOpenMode(void) const + { + return mOpenMode; + } + + virtual uint32_t getFileLength(void) const + { + return mFileLength; + } + +private: + // Moves the actual file pointer to the current read location + void setSeekRead(void) + { + if ( mSeekRead != mSeekCurrent && mFph ) + { + if ( mSeekRead >= mFileLength ) + { + fseek(mFph,0L,SEEK_END); + } + else + { + fseek(mFph,static_cast<long>(mSeekRead),SEEK_SET); + } + mSeekCurrent = mSeekRead = static_cast<uint32_t>(ftell(mFph)); + } + } + // Moves the actual file pointer to the current write location + void setSeekWrite(void) + { + if ( mSeekWrite != mSeekCurrent && mFph ) + { + if ( mSeekWrite >= mFileLength ) + { + fseek(mFph,0L,SEEK_END); + } + else + { + fseek(mFph,static_cast<long>(mSeekWrite),SEEK_SET); + } + mSeekCurrent = mSeekWrite = static_cast<uint32_t>(ftell(mFph)); + } + } + + + FILE *mFph; + uint32_t mSeekRead; + uint32_t mSeekWrite; + uint32_t mSeekCurrent; + uint32_t mFileLength; + SeekType mSeekType; + OpenMode mOpenMode; +}; + +//Use this class if you want to use PhysX memory allocator +class PsFileBuffer: public PxFileBufferBase, public UserAllocated +{ +public: + PsFileBuffer(const char *fileName,OpenMode mode): PxFileBufferBase(fileName, mode) {} +}; + +} +using namespace general_PxIOStream2; +} + +#endif // PSFILEBUFFER_PSFILEBUFFER_H diff --git a/PxShared/src/filebuf/include/PsIOStream.h b/PxShared/src/filebuf/include/PsIOStream.h new file mode 100644 index 0000000..07c73f9 --- /dev/null +++ b/PxShared/src/filebuf/include/PsIOStream.h @@ -0,0 +1,137 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PSFILEBUFFER_PSIOSTREAM_H +#define PSFILEBUFFER_PSIOSTREAM_H + +/*! +\file +\brief PsIOStream class +*/ +#include "filebuf/PxFileBuf.h" + +#include "Ps.h" +#include "PsString.h" +#include <string.h> +#include <stdlib.h> +#include "PsAsciiConversion.h" + +#define safePrintf physx::shdfnd::snprintf + +PX_PUSH_PACK_DEFAULT + +namespace physx +{ + namespace general_PxIOStream2 + { + +/** +\brief A wrapper class for physx::PxFileBuf that provides both binary and ASCII streaming capabilities +*/ +class PsIOStream +{ + static const uint32_t MAX_STREAM_STRING = 1024; +public: + /** + \param [in] stream the physx::PxFileBuf through which all reads and writes will be performed + \param [in] streamLen the length of the input data stream when de-serializing + */ + PsIOStream(physx::PxFileBuf &stream,uint32_t streamLen) : mBinary(true), mStreamLen(streamLen), mStream(stream) { } + ~PsIOStream(void) { } + + /** + \brief Set the stream to binary or ASCII + + \param [in] state if true, stream is binary, if false, stream is ASCII + + If the stream is binary, stream access is passed straight through to the respecitve + physx::PxFileBuf methods. If the stream is ASCII, all stream reads and writes are converted to + human readable ASCII. + */ + PX_INLINE void setBinary(bool state) { mBinary = state; } + PX_INLINE bool getBinary() { return mBinary; } + + PX_INLINE PsIOStream& operator<<(bool v); + PX_INLINE PsIOStream& operator<<(char c); + PX_INLINE PsIOStream& operator<<(uint8_t v); + PX_INLINE PsIOStream& operator<<(int8_t v); + + PX_INLINE PsIOStream& operator<<(const char *c); + PX_INLINE PsIOStream& operator<<(int64_t v); + PX_INLINE PsIOStream& operator<<(uint64_t v); + PX_INLINE PsIOStream& operator<<(double v); + PX_INLINE PsIOStream& operator<<(float v); + PX_INLINE PsIOStream& operator<<(uint32_t v); + PX_INLINE PsIOStream& operator<<(int32_t v); + PX_INLINE PsIOStream& operator<<(uint16_t v); + PX_INLINE PsIOStream& operator<<(int16_t v); + PX_INLINE PsIOStream& operator<<(const physx::PxVec3 &v); + PX_INLINE PsIOStream& operator<<(const physx::PxQuat &v); + PX_INLINE PsIOStream& operator<<(const physx::PxBounds3 &v); + + PX_INLINE PsIOStream& operator>>(const char *&c); + PX_INLINE PsIOStream& operator>>(bool &v); + PX_INLINE PsIOStream& operator>>(char &c); + PX_INLINE PsIOStream& operator>>(uint8_t &v); + PX_INLINE PsIOStream& operator>>(int8_t &v); + PX_INLINE PsIOStream& operator>>(int64_t &v); + PX_INLINE PsIOStream& operator>>(uint64_t &v); + PX_INLINE PsIOStream& operator>>(double &v); + PX_INLINE PsIOStream& operator>>(float &v); + PX_INLINE PsIOStream& operator>>(uint32_t &v); + PX_INLINE PsIOStream& operator>>(int32_t &v); + PX_INLINE PsIOStream& operator>>(uint16_t &v); + PX_INLINE PsIOStream& operator>>(int16_t &v); + PX_INLINE PsIOStream& operator>>(physx::PxVec3 &v); + PX_INLINE PsIOStream& operator>>(physx::PxQuat &v); + PX_INLINE PsIOStream& operator>>(physx::PxBounds3 &v); + + uint32_t getStreamLen(void) const { return mStreamLen; } + + physx::PxFileBuf& getStream(void) { return mStream; } + + PX_INLINE void storeString(const char *c,bool zeroTerminate=false); + +private: + PsIOStream& operator=( const PsIOStream& ); + + + bool mBinary; // true if we are serializing binary data. Otherwise, everything is assumed converted to ASCII + uint32_t mStreamLen; // the length of the input data stream when de-serializing. + physx::PxFileBuf &mStream; + char mReadString[MAX_STREAM_STRING]; // a temp buffer for streaming strings on input. +}; + +#include "PsIOStream.inl" // inline methods... + + } // end of namespace + using namespace general_PxIOStream2; +} // end of physx namespace + +PX_POP_PACK + +#endif // PSFILEBUFFER_PSIOSTREAM_H diff --git a/PxShared/src/filebuf/include/PsIOStream.inl b/PxShared/src/filebuf/include/PsIOStream.inl new file mode 100644 index 0000000..e821e6a --- /dev/null +++ b/PxShared/src/filebuf/include/PsIOStream.inl @@ -0,0 +1,451 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + + +/* + * Copyright 2009-2011 NVIDIA Corporation. All rights reserved. + * + * NOTICE TO USER: + * + * This source code is subject to NVIDIA ownership rights under U.S. and + * international Copyright laws. Users and possessors of this source code + * are hereby granted a nonexclusive, royalty-free license to use this code + * in individual and commercial software. + * + * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE + * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR + * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH + * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. + * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, + * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS + * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE + * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE + * OR PERFORMANCE OF THIS SOURCE CODE. + * + * U.S. Government End Users. This source code is a "commercial item" as + * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of + * "commercial computer software" and "commercial computer software + * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) + * and is provided to the U.S. Government only as a commercial end item. + * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through + * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the + * source code with only those rights set forth herein. + * + * Any use of this source code in individual and commercial software must + * include, in the user documentation and internal comments to the code, + * the above Disclaimer and U.S. Government End Users Notice. + */ + +/*! +\file +\brief PsIOStream inline implementation +*/ + +PX_INLINE PsIOStream& PsIOStream::operator<<(bool v) +{ + if ( mBinary ) + { + mStream.storeByte((uint8_t)v); + } + else + { + char scratch[6]; + storeString( physx::PxAsc::valueToStr(v, scratch, 6) ); + } + return *this; +} + + +PX_INLINE PsIOStream& PsIOStream::operator<<(char c) +{ + mStream.storeByte((uint8_t)c); + return *this; +} + +PX_INLINE PsIOStream& PsIOStream::operator<<(uint8_t c) +{ + if ( mBinary ) + { + mStream.storeByte((uint8_t)c); + } + else + { + char scratch[physx::PxAsc::IntStrLen]; + storeString( physx::PxAsc::valueToStr(c, scratch, physx::PxAsc::IntStrLen) ); + } + + return *this; +} + +PX_INLINE PsIOStream& PsIOStream::operator<<(int8_t c) +{ + if ( mBinary ) + { + mStream.storeByte((uint8_t)c); + } + else + { + char scratch[physx::PxAsc::IntStrLen]; + storeString( physx::PxAsc::valueToStr(c, scratch, physx::PxAsc::IntStrLen) ); + } + + return *this; +} + +PX_INLINE PsIOStream& PsIOStream::operator<<(const char *c) +{ + if ( mBinary ) + { + c = c ? c : ""; // it it is a null pointer, assign it to an empty string. + uint32_t len = (uint32_t)strlen(c); + PX_ASSERT( len < (MAX_STREAM_STRING-1)); + if ( len > (MAX_STREAM_STRING-1) ) + { + len = MAX_STREAM_STRING-1; + } + mStream.storeDword(len); + if ( len ) + mStream.write(c,len); + } + else + { + storeString(c); + } + return *this; +} + +PX_INLINE PsIOStream& PsIOStream::operator<<(uint64_t v) +{ + if ( mBinary ) + { + mStream.storeDouble( (double) v ); + } + else + { + char scratch[physx::PxAsc::IntStrLen]; + storeString( physx::PxAsc::valueToStr(v, scratch, physx::PxAsc::IntStrLen) ); + } + return *this; +} + +PX_INLINE PsIOStream& PsIOStream::operator<<(int64_t v) +{ + if ( mBinary ) + { + mStream.storeDouble( (double) v ); + } + else + { + char scratch[physx::PxAsc::IntStrLen]; + storeString( physx::PxAsc::valueToStr(v, scratch, physx::PxAsc::IntStrLen) ); + } + return *this; +} + +PX_INLINE PsIOStream& PsIOStream::operator<<(double v) +{ + if ( mBinary ) + { + mStream.storeDouble( (double) v ); + } + else + { + char scratch[physx::PxAsc::PxF64StrLen]; + storeString( physx::PxAsc::valueToStr(v, scratch, physx::PxAsc::PxF64StrLen) ); + } + return *this; +} + +PX_INLINE PsIOStream& PsIOStream::operator<<(float v) +{ + if ( mBinary ) + { + mStream.storeFloat(v); + } + else + { + char scratch[physx::PxAsc::PxF32StrLen]; + storeString( physx::PxAsc::valueToStr(v, scratch, physx::PxAsc::PxF32StrLen) ); + + } + return *this; +} + +PX_INLINE PsIOStream& PsIOStream::operator<<(uint32_t v) +{ + if ( mBinary ) + { + mStream.storeDword(v); + } + else + { + char scratch[physx::PxAsc::IntStrLen]; + storeString( physx::PxAsc::valueToStr(v, scratch, physx::PxAsc::IntStrLen) ); + } + return *this; +} + +PX_INLINE PsIOStream& PsIOStream::operator<<(int32_t v) +{ + if ( mBinary ) + { + mStream.storeDword( (uint32_t) v ); + } + else + { + char scratch[physx::PxAsc::IntStrLen]; + storeString( physx::PxAsc::valueToStr(v, scratch, physx::PxAsc::IntStrLen) ); + } + return *this; +} + +PX_INLINE PsIOStream& PsIOStream::operator<<(uint16_t v) +{ + if ( mBinary ) + { + mStream.storeWord(v); + } + else + { + char scratch[physx::PxAsc::IntStrLen]; + storeString( physx::PxAsc::valueToStr(v, scratch, physx::PxAsc::IntStrLen) ); + } + return *this; +} + +PX_INLINE PsIOStream& PsIOStream::operator<<(int16_t v) +{ + if ( mBinary ) + { + mStream.storeWord( (uint16_t) v ); + } + else + { + char scratch[physx::PxAsc::IntStrLen]; + storeString( physx::PxAsc::valueToStr(v, scratch, physx::PxAsc::IntStrLen) ); + } + return *this; +} + + +PX_INLINE PsIOStream& PsIOStream::operator>>(uint32_t &v) +{ + if ( mBinary ) + { + v = mStream.readDword(); + } + return *this; +} + +PX_INLINE PsIOStream& PsIOStream::operator>>(char &v) +{ + if ( mBinary ) + { + v = (char)mStream.readByte(); + } + return *this; +} + +PX_INLINE PsIOStream& PsIOStream::operator>>(uint8_t &v) +{ + if ( mBinary ) + { + v = mStream.readByte(); + } + return *this; +} + +PX_INLINE PsIOStream& PsIOStream::operator>>(int8_t &v) +{ + if ( mBinary ) + { + v = (int8_t)mStream.readByte(); + } + return *this; +} + +PX_INLINE PsIOStream& PsIOStream::operator>>(int64_t &v) +{ + if ( mBinary ) + { + v = mStream.readDword(); + } + return *this; +} + +PX_INLINE PsIOStream& PsIOStream::operator>>(uint64_t &v) +{ + if ( mBinary ) + { + v = (uint64_t)mStream.readDouble(); + } + return *this; +} + +PX_INLINE PsIOStream& PsIOStream::operator>>(double &v) +{ + if ( mBinary ) + { + v = mStream.readDouble(); + } + return *this; +} + +PX_INLINE PsIOStream& PsIOStream::operator>>(float &v) +{ + if ( mBinary ) + { + v = mStream.readFloat(); + } + return *this; +} + +PX_INLINE PsIOStream& PsIOStream::operator>>(int32_t &v) +{ + if ( mBinary ) + { + v = (int32_t)mStream.readDword(); + } + return *this; +} + +PX_INLINE PsIOStream& PsIOStream::operator>>(uint16_t &v) +{ + if ( mBinary ) + { + v = mStream.readWord(); + } + return *this; +} + +PX_INLINE PsIOStream& PsIOStream::operator>>(int16_t &v) +{ + if ( mBinary ) + { + v = (int16_t)mStream.readWord(); + } + return *this; +} + +PX_INLINE PsIOStream& PsIOStream::operator>>(bool &v) +{ + int8_t iv; + iv = (int8_t)mStream.readByte(); + v = iv ? true : false; + return *this; +} + +#define NX_IOSTREAM_COMMA_SEPARATOR if(!mBinary) *this << ' '; + +PX_INLINE PsIOStream& PsIOStream::operator<<(const physx::PxVec3 &v) +{ + *this << v.x; + NX_IOSTREAM_COMMA_SEPARATOR; + *this << v.y; + NX_IOSTREAM_COMMA_SEPARATOR; + *this << v.z; + return *this; +} + +PX_INLINE PsIOStream& PsIOStream::operator<<(const physx::PxQuat &v) +{ + *this << v.x; + NX_IOSTREAM_COMMA_SEPARATOR; + *this << v.y; + NX_IOSTREAM_COMMA_SEPARATOR; + *this << v.z; + NX_IOSTREAM_COMMA_SEPARATOR; + *this << v.w; + return *this; +} + + +PX_INLINE PsIOStream& PsIOStream::operator<<(const physx::PxBounds3 &v) +{ + *this << v.minimum; + NX_IOSTREAM_COMMA_SEPARATOR; + *this << v.maximum; + return *this; +} + +PX_INLINE PsIOStream& PsIOStream::operator>>(physx::PxVec3 &v) +{ + *this >> v.x; + *this >> v.y; + *this >> v.z; + return *this; +} + +PX_INLINE PsIOStream& PsIOStream::operator>>(physx::PxQuat &v) +{ + *this>>v.x; + *this>>v.y; + *this>>v.z; + *this>>v.w; + return *this; +} + +PX_INLINE PsIOStream& PsIOStream::operator>>(physx::PxBounds3 &v) +{ + *this >> v.minimum; + *this >> v.maximum; + return *this; +} + +PX_INLINE PsIOStream& PsIOStream::operator>>(const char *&str) +{ + str = NULL; // by default no string streamed... + if ( mBinary ) + { + uint32_t len=0; + *this >> len; + + PX_ASSERT( len < (MAX_STREAM_STRING-1) ); + if ( len < (MAX_STREAM_STRING-1) ) + { + mStream.read(mReadString,len); + mReadString[len] = 0; + str = mReadString; + } + } + return *this; +} + + +PX_INLINE void PsIOStream::storeString(const char *c,bool zeroTerminate) +{ + while ( *c ) + { + mStream.storeByte((uint8_t)*c); + c++; + } + if ( zeroTerminate ) + { + mStream.storeByte(0); + } +} diff --git a/PxShared/src/filebuf/include/PsMemoryBuffer.h b/PxShared/src/filebuf/include/PsMemoryBuffer.h new file mode 100644 index 0000000..5b59386 --- /dev/null +++ b/PxShared/src/filebuf/include/PsMemoryBuffer.h @@ -0,0 +1,449 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PSFILEBUFFER_PSMEMORYBUFFER_H +#define PSFILEBUFFER_PSMEMORYBUFFER_H + +#include "Ps.h" +#include "PsUserAllocated.h" +#include "PsAlignedMalloc.h" +#include "filebuf/PxFileBuf.h" +#include "foundation/PxAssert.h" + +namespace physx +{ +namespace general_PxIOStream2 +{ + using namespace shdfnd; + + const uint32_t BUFFER_SIZE_DEFAULT = 4096; + +//Use this class if you want to use your own allocator +template<class Allocator> +class PxMemoryBufferBase : public PxFileBuf, public Allocator +{ + PX_NOCOPY(PxMemoryBufferBase) + void init(const void *readMem, uint32_t readLen) + { + mAllocator = this; + + mReadBuffer = mReadLoc = static_cast<const uint8_t *>(readMem); + mReadStop = &mReadLoc[readLen]; + + mWriteBuffer = mWriteLoc = mWriteStop = NULL; + mWriteBufferSize = 0; + mDefaultWriteBufferSize = BUFFER_SIZE_DEFAULT; + + mOpenMode = OPEN_READ_ONLY; + mSeekType = SEEKABLE_READ; + } + + void init(uint32_t defaultWriteBufferSize) + { + mAllocator = this; + + mReadBuffer = mReadLoc = mReadStop = NULL; + + mWriteBuffer = mWriteLoc = mWriteStop = NULL; + mWriteBufferSize = 0; + mDefaultWriteBufferSize = defaultWriteBufferSize; + + mOpenMode = OPEN_READ_WRITE_NEW; + mSeekType = SEEKABLE_READWRITE; + } + +public: + PxMemoryBufferBase(const void *readMem,uint32_t readLen) + { + init(readMem, readLen); + } + + PxMemoryBufferBase(const void *readMem,uint32_t readLen, const Allocator &alloc): Allocator(alloc) + { + init(readMem, readLen); + } + + PxMemoryBufferBase(uint32_t defaultWriteBufferSize = BUFFER_SIZE_DEFAULT) + { + init(defaultWriteBufferSize); + } + + PxMemoryBufferBase(uint32_t defaultWriteBufferSize, const Allocator &alloc): Allocator(alloc) + { + init(defaultWriteBufferSize); + } + + virtual ~PxMemoryBufferBase(void) + { + reset(); + } + + void setAllocator(Allocator *allocator) + { + mAllocator = allocator; + } + + void initWriteBuffer(uint32_t size) + { + if ( mWriteBuffer == NULL ) + { + if ( size < mDefaultWriteBufferSize ) size = mDefaultWriteBufferSize; + mWriteBuffer = static_cast<uint8_t *>(mAllocator->allocate(size)); + PX_ASSERT( mWriteBuffer ); + mWriteLoc = mWriteBuffer; + mWriteStop = &mWriteBuffer[size]; + mWriteBufferSize = size; + mReadBuffer = mWriteBuffer; + mReadStop = &mWriteBuffer[size]; + mReadLoc = mWriteBuffer; + } + } + + void reset(void) + { + mAllocator->deallocate(mWriteBuffer); + mWriteBuffer = NULL; + mWriteBufferSize = 0; + mWriteLoc = NULL; + mWriteStop = NULL; + mReadBuffer = NULL; + mReadStop = NULL; + mReadLoc = NULL; + } + + virtual OpenMode getOpenMode(void) const + { + return mOpenMode; + } + + + SeekType isSeekable(void) const + { + return mSeekType; + } + + virtual uint32_t read(void* buffer, uint32_t size) + { + if ( (mReadLoc+size) > mReadStop ) + { + size = uint32_t(mReadStop - mReadLoc); + } + if ( size != 0 ) + { + memmove(buffer,mReadLoc,size); + mReadLoc+=size; + } + return size; + } + + virtual uint32_t peek(void* buffer, uint32_t size) + { + if ( (mReadLoc+size) > mReadStop ) + { + size = uint32_t(mReadStop - mReadLoc); + } + if ( size != 0 ) + { + memmove(buffer,mReadLoc,size); + } + return size; + } + + virtual uint32_t write(const void* buffer, uint32_t size) + { + PX_ASSERT( mOpenMode == OPEN_READ_WRITE_NEW ); + if ( mOpenMode == OPEN_READ_WRITE_NEW ) + { + if ( (mWriteLoc+size) > mWriteStop ) + growWriteBuffer(size); + memmove(mWriteLoc,buffer,size); + mWriteLoc+=size; + mReadStop = mWriteLoc; + } + else + { + size = 0; + } + return size; + } + + PX_INLINE const uint8_t * getReadLoc(void) const { return mReadLoc; } + PX_INLINE void advanceReadLoc(uint32_t len) + { + PX_ASSERT(mReadBuffer); + if ( mReadBuffer ) + { + mReadLoc+=len; + if ( mReadLoc >= mReadStop ) + { + mReadLoc = mReadStop; + } + } + } + + virtual uint32_t tellRead(void) const + { + uint32_t ret=0; + + if ( mReadBuffer ) + { + ret = uint32_t(mReadLoc-mReadBuffer); + } + return ret; + } + + virtual uint32_t tellWrite(void) const + { + return uint32_t(mWriteLoc-mWriteBuffer); + } + + virtual uint32_t seekRead(uint32_t loc) + { + uint32_t ret = 0; + PX_ASSERT(mReadBuffer); + if ( mReadBuffer ) + { + mReadLoc = &mReadBuffer[loc]; + if ( mReadLoc >= mReadStop ) + { + mReadLoc = mReadStop; + } + ret = uint32_t(mReadLoc-mReadBuffer); + } + return ret; + } + + virtual uint32_t seekWrite(uint32_t loc) + { + uint32_t ret = 0; + PX_ASSERT( mOpenMode == OPEN_READ_WRITE_NEW ); + if ( mWriteBuffer ) + { + if ( loc > mWriteBufferSize ) + { + mWriteLoc = mWriteStop; + growWriteBuffer(loc - mWriteBufferSize); + } + mWriteLoc = &mWriteBuffer[loc]; + ret = uint32_t(mWriteLoc-mWriteBuffer); + } + return ret; + } + + virtual void flush(void) + { + + } + + virtual uint32_t getFileLength(void) const + { + uint32_t ret = 0; + if ( mReadBuffer ) + { + ret = uint32_t(mReadStop-mReadBuffer); + } + else if ( mWriteBuffer ) + { + ret = uint32_t(mWriteLoc-mWriteBuffer); + } + return ret; + } + + uint32_t getWriteBufferSize(void) const + { + return uint32_t(mWriteLoc-mWriteBuffer); + } + + void setWriteLoc(uint8_t *writeLoc) + { + PX_ASSERT(writeLoc >= mWriteBuffer && writeLoc < mWriteStop ); + mWriteLoc = writeLoc; + mReadStop = mWriteLoc; + } + + const uint8_t * getWriteBuffer(void) const + { + return mWriteBuffer; + } + + /** + * Attention: if you use aligned allocator you cannot free memory with PX_FREE macros instead use deallocate method from base + */ + uint8_t * getWriteBufferOwnership(uint32_t &dataLen) // return the write buffer, and zero it out, the caller is taking ownership of the memory + { + uint8_t *ret = mWriteBuffer; + dataLen = uint32_t(mWriteLoc-mWriteBuffer); + mWriteBuffer = NULL; + mWriteLoc = NULL; + mWriteStop = NULL; + mWriteBufferSize = 0; + return ret; + } + + + void alignRead(uint32_t a) + { + uint32_t loc = tellRead(); + uint32_t aloc = ((loc+(a-1))/a)*a; + if ( aloc != loc ) + { + seekRead(aloc); + } + } + + void alignWrite(uint32_t a) + { + uint32_t loc = tellWrite(); + uint32_t aloc = ((loc+(a-1))/a)*a; + if ( aloc != loc ) + { + seekWrite(aloc); + } + } + +private: + + + // double the size of the write buffer or at least as large as the 'size' value passed in. + void growWriteBuffer(uint32_t size) + { + if ( mWriteBuffer == NULL ) + { + if ( size < mDefaultWriteBufferSize ) size = mDefaultWriteBufferSize; + initWriteBuffer(size); + } + else + { + uint32_t oldWriteIndex = uint32_t(mWriteLoc - mWriteBuffer); + uint32_t newSize = mWriteBufferSize*2; + uint32_t avail = newSize-oldWriteIndex; + if ( size >= avail ) newSize = newSize+size; + uint8_t *writeBuffer = static_cast<uint8_t *>(mAllocator->allocate(newSize)); + PX_ASSERT( writeBuffer ); + memmove(writeBuffer,mWriteBuffer,mWriteBufferSize); + mAllocator->deallocate(mWriteBuffer); + mWriteBuffer = writeBuffer; + mWriteBufferSize = newSize; + mWriteLoc = &mWriteBuffer[oldWriteIndex]; + mWriteStop = &mWriteBuffer[mWriteBufferSize]; + uint32_t oldReadLoc = uint32_t(mReadLoc-mReadBuffer); + mReadBuffer = mWriteBuffer; + mReadStop = mWriteLoc; + mReadLoc = &mReadBuffer[oldReadLoc]; + } + } + + const uint8_t *mReadBuffer; + const uint8_t *mReadLoc; + const uint8_t *mReadStop; + + uint8_t *mWriteBuffer; + uint8_t *mWriteLoc; + uint8_t *mWriteStop; + + uint32_t mWriteBufferSize; + uint32_t mDefaultWriteBufferSize; + Allocator *mAllocator; + OpenMode mOpenMode; + SeekType mSeekType; + +}; + +class PxMemoryBufferAllocator +{ +public: + PxMemoryBufferAllocator(uint32_t a = 0) : alignment(a) {} + + virtual void * allocate(uint32_t size) + { + switch(alignment) + { + case 0: + return PX_ALLOC(size, PX_DEBUG_EXP("PxMemoryBufferAllocator")); + case 16 : + return physx::AlignedAllocator<16>().allocate(size, __FILE__, __LINE__); + case 32 : + return physx::AlignedAllocator<32>().allocate(size, __FILE__, __LINE__); + case 64 : + return physx::AlignedAllocator<64>().allocate(size, __FILE__, __LINE__); + case 128 : + return physx::AlignedAllocator<128>().allocate(size, __FILE__, __LINE__); + default : + PX_ASSERT(0); + } + return NULL; + } + virtual void deallocate(void *mem) + { + switch(alignment) + { + case 0: + PX_FREE(mem); + break; + case 16 : + physx::AlignedAllocator<16>().deallocate(mem); + break; + case 32 : + physx::AlignedAllocator<32>().deallocate(mem); + break; + case 64 : + physx::AlignedAllocator<64>().deallocate(mem); + break; + case 128 : + physx::AlignedAllocator<128>().deallocate(mem); + break; + default : + PX_ASSERT(0); + } + } + virtual ~PxMemoryBufferAllocator(void) {} +private: + PxMemoryBufferAllocator& operator=(const PxMemoryBufferAllocator&); + + const uint32_t alignment; +}; + +//Use this class if you want to use PhysX memory allocator +class PsMemoryBuffer: public PxMemoryBufferBase<PxMemoryBufferAllocator>, public UserAllocated +{ + PX_NOCOPY(PsMemoryBuffer) + typedef PxMemoryBufferBase<PxMemoryBufferAllocator> BaseClass; + +public: + PsMemoryBuffer(const void *readMem,uint32_t readLen): BaseClass(readMem, readLen) {} + PsMemoryBuffer(const void *readMem,uint32_t readLen, uint32_t alignment): BaseClass(readMem, readLen, PxMemoryBufferAllocator(alignment)) {} + + PsMemoryBuffer(uint32_t defaultWriteBufferSize=BUFFER_SIZE_DEFAULT): BaseClass(defaultWriteBufferSize) {} + PsMemoryBuffer(uint32_t defaultWriteBufferSize,uint32_t alignment): BaseClass(defaultWriteBufferSize, PxMemoryBufferAllocator(alignment)) {} +}; + +} +using namespace general_PxIOStream2; +} + +#endif // PSFILEBUFFER_PSMEMORYBUFFER_H + diff --git a/PxShared/src/foundation/doc/PsFoundation.chm b/PxShared/src/foundation/doc/PsFoundation.chm Binary files differnew file mode 100644 index 0000000..df3c07f --- /dev/null +++ b/PxShared/src/foundation/doc/PsFoundation.chm diff --git a/PxShared/src/foundation/doc/Readme.txt b/PxShared/src/foundation/doc/Readme.txt new file mode 100644 index 0000000..88e1189 --- /dev/null +++ b/PxShared/src/foundation/doc/Readme.txt @@ -0,0 +1,18 @@ +This is the 'NVIDIA Shared' foundation library. + +This code should not ever appear in any public headers or interfaces. + +This library is primarily a platform abstraction layer. + +It contains code to handle mutexes, atomic operations, etc. + +It also handles some SIMD data types. + +It provides math utility functions. + +It implements a number of common container classes. + +It manages trapping all memory allocations. + +All projects should leverage against this foundation library to +perform these common functions. diff --git a/PxShared/src/foundation/doc/create_docs.cmd b/PxShared/src/foundation/doc/create_docs.cmd new file mode 100644 index 0000000..4691d7d --- /dev/null +++ b/PxShared/src/foundation/doc/create_docs.cmd @@ -0,0 +1,7 @@ +set DOXYGEN_DIR=..\..\..\..\..\..\..\devrel\GameWorks\BuildTools\doxygen-win\bin +set HTMLHELP_DIR=..\..\..\..\..\..\..\devrel\GameWorks\BuildTools\HTMLHelpWorkshop + +%DOXYGEN_DIR%\doxygen.exe docs.doxyfile +cd html +..\%HTMLHELP_DIR%\hhc.exe index.hhp +cd .. diff --git a/PxShared/src/foundation/doc/docs.doxyfile b/PxShared/src/foundation/doc/docs.doxyfile new file mode 100644 index 0000000..624a44f --- /dev/null +++ b/PxShared/src/foundation/doc/docs.doxyfile @@ -0,0 +1,13 @@ +# Doxyfile 1.5.8 + +PROJECT_NAME = "NVIDIA(R) PsFoundation Reference" +#ENABLED_SECTIONS = PHYSICS_SDK_PAGES +WARN_LOGFILE = PsFoundation.err +INPUT = ../include ../include/windows ../src ../src/windows +EXTRACT_ALL = YES +ENABLE_PREPROCESSING = YES +MACRO_EXPANSION = YES +GENERATE_HTMLHELP = YES +HTML_OUTPUT = html/ +CHM_FILE = ../PsFoundation.chm +TOC_EXPAND = YES diff --git a/PxShared/src/foundation/externals/src/android/cpu-features.c b/PxShared/src/foundation/externals/src/android/cpu-features.c new file mode 100644 index 0000000..4754c46 --- /dev/null +++ b/PxShared/src/foundation/externals/src/android/cpu-features.c @@ -0,0 +1,1082 @@ +/* + * Copyright (C) 2010 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* ChangeLog for this library: + * + * NDK r8d: Add android_setCpu(). + * + * NDK r8c: Add new ARM CPU features: VFPv2, VFP_D32, VFP_FP16, + * VFP_FMA, NEON_FMA, IDIV_ARM, IDIV_THUMB2 and iWMMXt. + * + * Rewrite the code to parse /proc/self/auxv instead of + * the "Features" field in /proc/cpuinfo. + * + * Dynamically allocate the buffer that hold the content + * of /proc/cpuinfo to deal with newer hardware. + * + * NDK r7c: Fix CPU count computation. The old method only reported the + * number of _active_ CPUs when the library was initialized, + * which could be less than the real total. + * + * NDK r5: Handle buggy kernels which report a CPU Architecture number of 7 + * for an ARMv6 CPU (see below). + * + * Handle kernels that only report 'neon', and not 'vfpv3' + * (VFPv3 is mandated by the ARM architecture is Neon is implemented) + * + * Handle kernels that only report 'vfpv3d16', and not 'vfpv3' + * + * Fix x86 compilation. Report ANDROID_CPU_FAMILY_X86 in + * android_getCpuFamily(). + * + * NDK r4: Initial release + */ + +#if defined(__le32__) + +// When users enter this, we should only provide interface and +// libportable will give the implementations. + +#else // !__le32__ + +#include <sys/system_properties.h> +#include <pthread.h> +#include "cpu-features.h" +#include <stdio.h> +#include <stdlib.h> +#include <fcntl.h> +#include <errno.h> + +static pthread_once_t g_once; +static int g_inited; +static AndroidCpuFamily g_cpuFamily; +static uint64_t g_cpuFeatures; +static int g_cpuCount; + +#ifdef __arm__ +static uint32_t g_cpuIdArm; +#endif + +static const int android_cpufeatures_debug = 0; + +#ifdef __arm__ +# define DEFAULT_CPU_FAMILY ANDROID_CPU_FAMILY_ARM +#elif defined __i386__ +# define DEFAULT_CPU_FAMILY ANDROID_CPU_FAMILY_X86 +#else +# define DEFAULT_CPU_FAMILY ANDROID_CPU_FAMILY_UNKNOWN +#endif + +#define D(...) \ + do { \ + if (android_cpufeatures_debug) { \ + printf(__VA_ARGS__); fflush(stdout); \ + } \ + } while (0) + +#ifdef __i386__ +static __inline__ void x86_cpuid(int func, int values[4]) +{ + int a, b, c, d; + /* We need to preserve ebx since we're compiling PIC code */ + /* this means we can't use "=b" for the second output register */ + __asm__ __volatile__ ( \ + "push %%ebx\n" + "cpuid\n" \ + "mov %%ebx, %1\n" + "pop %%ebx\n" + : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \ + : "a" (func) \ + ); + values[0] = a; + values[1] = b; + values[2] = c; + values[3] = d; +} +#endif + +/* Get the size of a file by reading it until the end. This is needed + * because files under /proc do not always return a valid size when + * using fseek(0, SEEK_END) + ftell(). Nor can they be mmap()-ed. + */ +static int +get_file_size(const char* pathname) +{ + int fd, ret, result = 0; + char buffer[256]; + + fd = open(pathname, O_RDONLY); + if (fd < 0) { + D("Can't open %s: %s\n", pathname, strerror(errno)); + return -1; + } + + for (;;) { + int ret = read(fd, buffer, sizeof buffer); + if (ret < 0) { + if (errno == EINTR) + continue; + D("Error while reading %s: %s\n", pathname, strerror(errno)); + break; + } + if (ret == 0) + break; + + result += ret; + } + close(fd); + return result; +} + +/* Read the content of /proc/cpuinfo into a user-provided buffer. + * Return the length of the data, or -1 on error. Does *not* + * zero-terminate the content. Will not read more + * than 'buffsize' bytes. + */ +static int +read_file(const char* pathname, char* buffer, size_t buffsize) +{ + int fd, count; + + fd = open(pathname, O_RDONLY); + if (fd < 0) { + D("Could not open %s: %s\n", pathname, strerror(errno)); + return -1; + } + count = 0; + while (count < (int)buffsize) { + int ret = read(fd, buffer + count, buffsize - count); + if (ret < 0) { + if (errno == EINTR) + continue; + D("Error while reading from %s: %s\n", pathname, strerror(errno)); + if (count == 0) + count = -1; + break; + } + if (ret == 0) + break; + count += ret; + } + close(fd); + return count; +} + +/* Extract the content of a the first occurence of a given field in + * the content of /proc/cpuinfo and return it as a heap-allocated + * string that must be freed by the caller. + * + * Return NULL if not found + */ +static char* +extract_cpuinfo_field(const char* buffer, int buflen, const char* field) +{ + int fieldlen = strlen(field); + const char* bufend = buffer + buflen; + char* result = NULL; + int len, ignore; + const char *p, *q; + + /* Look for first field occurence, and ensures it starts the line. */ + p = buffer; + for (;;) { + p = memmem(p, bufend-p, field, fieldlen); + if (p == NULL) + goto EXIT; + + if (p == buffer || p[-1] == '\n') + break; + + p += fieldlen; + } + + /* Skip to the first column followed by a space */ + p += fieldlen; + p = memchr(p, ':', bufend-p); + if (p == NULL || p[1] != ' ') + goto EXIT; + + /* Find the end of the line */ + p += 2; + q = memchr(p, '\n', bufend-p); + if (q == NULL) + q = bufend; + + /* Copy the line into a heap-allocated buffer */ + len = q-p; + result = malloc(len+1); + if (result == NULL) + goto EXIT; + + memcpy(result, p, len); + result[len] = '\0'; + +EXIT: + return result; +} + +/* Checks that a space-separated list of items contains one given 'item'. + * Returns 1 if found, 0 otherwise. + */ +static int +has_list_item(const char* list, const char* item) +{ + const char* p = list; + int itemlen = strlen(item); + + if (list == NULL) + return 0; + + while (*p) { + const char* q; + + /* skip spaces */ + while (*p == ' ' || *p == '\t') + p++; + + /* find end of current list item */ + q = p; + while (*q && *q != ' ' && *q != '\t') + q++; + + if (itemlen == q-p && !memcmp(p, item, itemlen)) + return 1; + + /* skip to next item */ + p = q; + } + return 0; +} + +/* Parse a number starting from 'input', but not going further + * than 'limit'. Return the value into '*result'. + * + * NOTE: Does not skip over leading spaces, or deal with sign characters. + * NOTE: Ignores overflows. + * + * The function returns NULL in case of error (bad format), or the new + * position after the decimal number in case of success (which will always + * be <= 'limit'). + */ +static const char* +parse_number(const char* input, const char* limit, int base, int* result) +{ + const char* p = input; + int val = 0; + while (p < limit) { + int d = (*p - '0'); + if ((unsigned)d >= 10U) { + d = (*p - 'a'); + if ((unsigned)d >= 6U) + d = (*p - 'A'); + if ((unsigned)d >= 6U) + break; + d += 10; + } + if (d >= base) + break; + val = val*base + d; + p++; + } + if (p == input) + return NULL; + + *result = val; + return p; +} + +static const char* +parse_decimal(const char* input, const char* limit, int* result) +{ + return parse_number(input, limit, 10, result); +} + +static const char* +parse_hexadecimal(const char* input, const char* limit, int* result) +{ + return parse_number(input, limit, 16, result); +} + +/* This small data type is used to represent a CPU list / mask, as read + * from sysfs on Linux. See http://www.kernel.org/doc/Documentation/cputopology.txt + * + * For now, we don't expect more than 32 cores on mobile devices, so keep + * everything simple. + */ +typedef struct { + uint32_t mask; +} CpuList; + +static __inline__ void +cpulist_init(CpuList* list) { + list->mask = 0; +} + +static __inline__ void +cpulist_and(CpuList* list1, CpuList* list2) { + list1->mask &= list2->mask; +} + +static __inline__ void +cpulist_set(CpuList* list, int index) { + if ((unsigned)index < 32) { + list->mask |= (uint32_t)(1U << index); + } +} + +static __inline__ int +cpulist_count(CpuList* list) { + return __builtin_popcount(list->mask); +} + +/* Parse a textual list of cpus and store the result inside a CpuList object. + * Input format is the following: + * - comma-separated list of items (no spaces) + * - each item is either a single decimal number (cpu index), or a range made + * of two numbers separated by a single dash (-). Ranges are inclusive. + * + * Examples: 0 + * 2,4-127,128-143 + * 0-1 + */ +static void +cpulist_parse(CpuList* list, const char* line, int line_len) +{ + const char* p = line; + const char* end = p + line_len; + const char* q; + + /* NOTE: the input line coming from sysfs typically contains a + * trailing newline, so take care of it in the code below + */ + while (p < end && *p != '\n') + { + int val, start_value, end_value; + + /* Find the end of current item, and put it into 'q' */ + q = memchr(p, ',', end-p); + if (q == NULL) { + q = end; + } + + /* Get first value */ + p = parse_decimal(p, q, &start_value); + if (p == NULL) + goto BAD_FORMAT; + + end_value = start_value; + + /* If we're not at the end of the item, expect a dash and + * and integer; extract end value. + */ + if (p < q && *p == '-') { + p = parse_decimal(p+1, q, &end_value); + if (p == NULL) + goto BAD_FORMAT; + } + + /* Set bits CPU list bits */ + for (val = start_value; val <= end_value; val++) { + cpulist_set(list, val); + } + + /* Jump to next item */ + p = q; + if (p < end) + p++; + } + +BAD_FORMAT: + ; +} + +/* Read a CPU list from one sysfs file */ +static void +cpulist_read_from(CpuList* list, const char* filename) +{ + char file[64]; + int filelen; + + cpulist_init(list); + + filelen = read_file(filename, file, sizeof file); + if (filelen < 0) { + D("Could not read %s: %s\n", filename, strerror(errno)); + return; + } + + cpulist_parse(list, file, filelen); +} + +// See <asm/hwcap.h> kernel header. +#define HWCAP_VFP (1 << 6) +#define HWCAP_IWMMXT (1 << 9) +#define HWCAP_NEON (1 << 12) +#define HWCAP_VFPv3 (1 << 13) +#define HWCAP_VFPv3D16 (1 << 14) +#define HWCAP_VFPv4 (1 << 16) +#define HWCAP_IDIVA (1 << 17) +#define HWCAP_IDIVT (1 << 18) + +#define AT_HWCAP 16 + +#if defined(__arm__) +/* Compute the ELF HWCAP flags. + */ +static uint32_t +get_elf_hwcap(const char* cpuinfo, int cpuinfo_len) +{ + /* IMPORTANT: + * Accessing /proc/self/auxv doesn't work anymore on all + * platform versions. More specifically, when running inside + * a regular application process, most of /proc/self/ will be + * non-readable, including /proc/self/auxv. This doesn't + * happen however if the application is debuggable, or when + * running under the "shell" UID, which is why this was not + * detected appropriately. + */ +#if 0 + uint32_t result = 0; + const char filepath[] = "/proc/self/auxv"; + int fd = open(filepath, O_RDONLY); + if (fd < 0) { + D("Could not open %s: %s\n", filepath, strerror(errno)); + return 0; + } + + struct { uint32_t tag; uint32_t value; } entry; + + for (;;) { + int ret = read(fd, (char*)&entry, sizeof entry); + if (ret < 0) { + if (errno == EINTR) + continue; + D("Error while reading %s: %s\n", filepath, strerror(errno)); + break; + } + // Detect end of list. + if (ret == 0 || (entry.tag == 0 && entry.value == 0)) + break; + if (entry.tag == AT_HWCAP) { + result = entry.value; + break; + } + } + close(fd); + return result; +#else + // Recreate ELF hwcaps by parsing /proc/cpuinfo Features tag. + uint32_t hwcaps = 0; + + char* cpuFeatures = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "Features"); + + if (cpuFeatures != NULL) { + D("Found cpuFeatures = '%s'\n", cpuFeatures); + + if (has_list_item(cpuFeatures, "vfp")) + hwcaps |= HWCAP_VFP; + if (has_list_item(cpuFeatures, "vfpv3")) + hwcaps |= HWCAP_VFPv3; + if (has_list_item(cpuFeatures, "vfpv3d16")) + hwcaps |= HWCAP_VFPv3D16; + if (has_list_item(cpuFeatures, "vfpv4")) + hwcaps |= HWCAP_VFPv4; + if (has_list_item(cpuFeatures, "neon")) + hwcaps |= HWCAP_NEON; + if (has_list_item(cpuFeatures, "idiva")) + hwcaps |= HWCAP_IDIVA; + if (has_list_item(cpuFeatures, "idivt")) + hwcaps |= HWCAP_IDIVT; + if (has_list_item(cpuFeatures, "idiv")) + hwcaps |= HWCAP_IDIVA | HWCAP_IDIVT; + if (has_list_item(cpuFeatures, "iwmmxt")) + hwcaps |= HWCAP_IWMMXT; + + free(cpuFeatures); + } + return hwcaps; +#endif +} +#endif /* __arm__ */ + +/* Return the number of cpus present on a given device. + * + * To handle all weird kernel configurations, we need to compute the + * intersection of the 'present' and 'possible' CPU lists and count + * the result. + */ +static int +get_cpu_count(void) +{ + CpuList cpus_present[1]; + CpuList cpus_possible[1]; + + cpulist_read_from(cpus_present, "/sys/devices/system/cpu/present"); + cpulist_read_from(cpus_possible, "/sys/devices/system/cpu/possible"); + + /* Compute the intersection of both sets to get the actual number of + * CPU cores that can be used on this device by the kernel. + */ + cpulist_and(cpus_present, cpus_possible); + + return cpulist_count(cpus_present); +} + +static void +android_cpuInitFamily(void) +{ +#if defined(__arm__) + g_cpuFamily = ANDROID_CPU_FAMILY_ARM; +#elif defined(__i386__) + g_cpuFamily = ANDROID_CPU_FAMILY_X86; +#elif defined(__mips__) + g_cpuFamily = ANDROID_CPU_FAMILY_MIPS; +#else + g_cpuFamily = ANDROID_CPU_FAMILY_UNKNOWN; +#endif +} + +static void +android_cpuInit(void) +{ + char* cpuinfo = NULL; + int cpuinfo_len; + + android_cpuInitFamily(); + + g_cpuFeatures = 0; + g_cpuCount = 1; + g_inited = 1; + + cpuinfo_len = get_file_size("/proc/cpuinfo"); + if (cpuinfo_len < 0) { + D("cpuinfo_len cannot be computed!"); + return; + } + cpuinfo = malloc(cpuinfo_len); + if (cpuinfo == NULL) { + D("cpuinfo buffer could not be allocated"); + return; + } + cpuinfo_len = read_file("/proc/cpuinfo", cpuinfo, cpuinfo_len); + D("cpuinfo_len is (%d):\n%.*s\n", cpuinfo_len, + cpuinfo_len >= 0 ? cpuinfo_len : 0, cpuinfo); + + if (cpuinfo_len < 0) /* should not happen */ { + free(cpuinfo); + return; + } + + /* Count the CPU cores, the value may be 0 for single-core CPUs */ + g_cpuCount = get_cpu_count(); + if (g_cpuCount == 0) { + g_cpuCount = 1; + } + + D("found cpuCount = %d\n", g_cpuCount); + +#ifdef __arm__ + { + char* features = NULL; + char* architecture = NULL; + + /* Extract architecture from the "CPU Architecture" field. + * The list is well-known, unlike the the output of + * the 'Processor' field which can vary greatly. + * + * See the definition of the 'proc_arch' array in + * $KERNEL/arch/arm/kernel/setup.c and the 'c_show' function in + * same file. + */ + char* cpuArch = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "CPU architecture"); + + if (cpuArch != NULL) { + char* end; + long archNumber; + int hasARMv7 = 0; + + D("found cpuArch = '%s'\n", cpuArch); + + /* read the initial decimal number, ignore the rest */ + archNumber = strtol(cpuArch, &end, 10); + + /* Here we assume that ARMv8 will be upwards compatible with v7 + * in the future. Unfortunately, there is no 'Features' field to + * indicate that Thumb-2 is supported. + */ + if (end > cpuArch && archNumber >= 7) { + hasARMv7 = 1; + } + + /* Unfortunately, it seems that certain ARMv6-based CPUs + * report an incorrect architecture number of 7! + * + * See http://code.google.com/p/android/issues/detail?id=10812 + * + * We try to correct this by looking at the 'elf_format' + * field reported by the 'Processor' field, which is of the + * form of "(v7l)" for an ARMv7-based CPU, and "(v6l)" for + * an ARMv6-one. + */ + if (hasARMv7) { + char* cpuProc = extract_cpuinfo_field(cpuinfo, cpuinfo_len, + "Processor"); + if (cpuProc != NULL) { + D("found cpuProc = '%s'\n", cpuProc); + if (has_list_item(cpuProc, "(v6l)")) { + D("CPU processor and architecture mismatch!!\n"); + hasARMv7 = 0; + } + free(cpuProc); + } + } + + if (hasARMv7) { + g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_ARMv7; + } + + /* The LDREX / STREX instructions are available from ARMv6 */ + if (archNumber >= 6) { + g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_LDREX_STREX; + } + + free(cpuArch); + } + + /* Extract the list of CPU features from ELF hwcaps */ + uint32_t hwcaps = get_elf_hwcap(cpuinfo, cpuinfo_len); + + if (hwcaps != 0) { + int has_vfp = (hwcaps & HWCAP_VFP); + int has_vfpv3 = (hwcaps & HWCAP_VFPv3); + int has_vfpv3d16 = (hwcaps & HWCAP_VFPv3D16); + int has_vfpv4 = (hwcaps & HWCAP_VFPv4); + int has_neon = (hwcaps & HWCAP_NEON); + int has_idiva = (hwcaps & HWCAP_IDIVA); + int has_idivt = (hwcaps & HWCAP_IDIVT); + int has_iwmmxt = (hwcaps & HWCAP_IWMMXT); + + // The kernel does a poor job at ensuring consistency when + // describing CPU features. So lots of guessing is needed. + + // 'vfpv4' implies VFPv3|VFP_FMA|FP16 + if (has_vfpv4) + g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3 | + ANDROID_CPU_ARM_FEATURE_VFP_FP16 | + ANDROID_CPU_ARM_FEATURE_VFP_FMA; + + // 'vfpv3' or 'vfpv3d16' imply VFPv3. Note that unlike GCC, + // a value of 'vfpv3' doesn't necessarily mean that the D32 + // feature is present, so be conservative. All CPUs in the + // field that support D32 also support NEON, so this should + // not be a problem in practice. + if (has_vfpv3 || has_vfpv3d16) + g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3; + + // 'vfp' is super ambiguous. Depending on the kernel, it can + // either mean VFPv2 or VFPv3. Make it depend on ARMv7. + if (has_vfp) { + if (g_cpuFeatures & ANDROID_CPU_ARM_FEATURE_ARMv7) + g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3; + else + g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv2; + } + + // Neon implies VFPv3|D32, and if vfpv4 is detected, NEON_FMA + if (has_neon) { + g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3 | + ANDROID_CPU_ARM_FEATURE_NEON | + ANDROID_CPU_ARM_FEATURE_VFP_D32; + if (has_vfpv4) + g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_NEON_FMA; + } + + // VFPv3 implies VFPv2 and ARMv7 + if (g_cpuFeatures & ANDROID_CPU_ARM_FEATURE_VFPv3) + g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv2 | + ANDROID_CPU_ARM_FEATURE_ARMv7; + + if (has_idiva) + g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_ARM; + if (has_idivt) + g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2; + + if (has_iwmmxt) + g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_iWMMXt; + } + + /* Extract the cpuid value from various fields */ + // The CPUID value is broken up in several entries in /proc/cpuinfo. + // This table is used to rebuild it from the entries. + static const struct CpuIdEntry { + const char* field; + char format; + char bit_lshift; + char bit_length; + } cpu_id_entries[] = { + { "CPU implementer", 'x', 24, 8 }, + { "CPU variant", 'x', 20, 4 }, + { "CPU part", 'x', 4, 12 }, + { "CPU revision", 'd', 0, 4 }, + }; + size_t i; + D("Parsing /proc/cpuinfo to recover CPUID\n"); + for (i = 0; + i < sizeof(cpu_id_entries)/sizeof(cpu_id_entries[0]); + ++i) { + const struct CpuIdEntry* entry = &cpu_id_entries[i]; + char* value = extract_cpuinfo_field(cpuinfo, + cpuinfo_len, + entry->field); + if (value == NULL) + continue; + + D("field=%s value='%s'\n", entry->field, value); + char* value_end = value + strlen(value); + int val = 0; + const char* start = value; + const char* p; + if (value[0] == '0' && (value[1] == 'x' || value[1] == 'X')) { + start += 2; + p = parse_hexadecimal(start, value_end, &val); + } else if (entry->format == 'x') + p = parse_hexadecimal(value, value_end, &val); + else + p = parse_decimal(value, value_end, &val); + + if (p > (const char*)start) { + val &= ((1 << entry->bit_length)-1); + val <<= entry->bit_lshift; + g_cpuIdArm |= (uint32_t) val; + } + + free(value); + } + + // Handle kernel configuration bugs that prevent the correct + // reporting of CPU features. + static const struct CpuFix { + uint32_t cpuid; + uint64_t or_flags; + } cpu_fixes[] = { + /* The Nexus 4 (Qualcomm Krait) kernel configuration + * forgets to report IDIV support. */ + { 0x510006f2, ANDROID_CPU_ARM_FEATURE_IDIV_ARM | + ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2 }, + { 0x510006f3, ANDROID_CPU_ARM_FEATURE_IDIV_ARM | + ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2 }, + }; + size_t n; + for (n = 0; n < sizeof(cpu_fixes)/sizeof(cpu_fixes[0]); ++n) { + const struct CpuFix* entry = &cpu_fixes[n]; + + if (g_cpuIdArm == entry->cpuid) + g_cpuFeatures |= entry->or_flags; + } + + } +#endif /* __arm__ */ + +#ifdef __i386__ + int regs[4]; + +/* According to http://en.wikipedia.org/wiki/CPUID */ +#define VENDOR_INTEL_b 0x756e6547 +#define VENDOR_INTEL_c 0x6c65746e +#define VENDOR_INTEL_d 0x49656e69 + + x86_cpuid(0, regs); + int vendorIsIntel = (regs[1] == VENDOR_INTEL_b && + regs[2] == VENDOR_INTEL_c && + regs[3] == VENDOR_INTEL_d); + + x86_cpuid(1, regs); + if ((regs[2] & (1 << 9)) != 0) { + g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SSSE3; + } + if ((regs[2] & (1 << 23)) != 0) { + g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_POPCNT; + } + if (vendorIsIntel && (regs[2] & (1 << 22)) != 0) { + g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_MOVBE; + } +#endif + + free(cpuinfo); +} + + +AndroidCpuFamily +android_getCpuFamily(void) +{ + pthread_once(&g_once, android_cpuInit); + return g_cpuFamily; +} + + +uint64_t +android_getCpuFeatures(void) +{ + pthread_once(&g_once, android_cpuInit); + return g_cpuFeatures; +} + + +int +android_getCpuCount(void) +{ + pthread_once(&g_once, android_cpuInit); + return g_cpuCount; +} + +static void +android_cpuInitDummy(void) +{ + g_inited = 1; +} + +int +android_setCpu(int cpu_count, uint64_t cpu_features) +{ + /* Fail if the library was already initialized. */ + if (g_inited) + return 0; + + android_cpuInitFamily(); + g_cpuCount = (cpu_count <= 0 ? 1 : cpu_count); + g_cpuFeatures = cpu_features; + pthread_once(&g_once, android_cpuInitDummy); + + return 1; +} + +#ifdef __arm__ +uint32_t +android_getCpuIdArm(void) +{ + pthread_once(&g_once, android_cpuInit); + return g_cpuIdArm; +} + +int +android_setCpuArm(int cpu_count, uint64_t cpu_features, uint32_t cpu_id) +{ + if (!android_setCpu(cpu_count, cpu_features)) + return 0; + + g_cpuIdArm = cpu_id; + return 1; +} +#endif /* __arm__ */ + +/* + * Technical note: Making sense of ARM's FPU architecture versions. + * + * FPA was ARM's first attempt at an FPU architecture. There is no Android + * device that actually uses it since this technology was already obsolete + * when the project started. If you see references to FPA instructions + * somewhere, you can be sure that this doesn't apply to Android at all. + * + * FPA was followed by "VFP", soon renamed "VFPv1" due to the emergence of + * new versions / additions to it. ARM considers this obsolete right now, + * and no known Android device implements it either. + * + * VFPv2 added a few instructions to VFPv1, and is an *optional* extension + * supported by some ARMv5TE, ARMv6 and ARMv6T2 CPUs. Note that a device + * supporting the 'armeabi' ABI doesn't necessarily support these. + * + * VFPv3-D16 adds a few instructions on top of VFPv2 and is typically used + * on ARMv7-A CPUs which implement a FPU. Note that it is also mandated + * by the Android 'armeabi-v7a' ABI. The -D16 suffix in its name means + * that it provides 16 double-precision FPU registers (d0-d15) and 32 + * single-precision ones (s0-s31) which happen to be mapped to the same + * register banks. + * + * VFPv3-D32 is the name of an extension to VFPv3-D16 that provides 16 + * additional double precision registers (d16-d31). Note that there are + * still only 32 single precision registers. + * + * VFPv3xD is a *subset* of VFPv3-D16 that only provides single-precision + * registers. It is only used on ARMv7-M (i.e. on micro-controllers) which + * are not supported by Android. Note that it is not compatible with VFPv2. + * + * NOTE: The term 'VFPv3' usually designate either VFPv3-D16 or VFPv3-D32 + * depending on context. For example GCC uses it for VFPv3-D32, but + * the Linux kernel code uses it for VFPv3-D16 (especially in + * /proc/cpuinfo). Always try to use the full designation when + * possible. + * + * NEON, a.k.a. "ARM Advanced SIMD" is an extension that provides + * instructions to perform parallel computations on vectors of 8, 16, + * 32, 64 and 128 bit quantities. NEON requires VFPv32-D32 since all + * NEON registers are also mapped to the same register banks. + * + * VFPv4-D16, adds a few instructions on top of VFPv3-D16 in order to + * perform fused multiply-accumulate on VFP registers, as well as + * half-precision (16-bit) conversion operations. + * + * VFPv4-D32 is VFPv4-D16 with 32, instead of 16, FPU double precision + * registers. + * + * VPFv4-NEON is VFPv4-D32 with NEON instructions. It also adds fused + * multiply-accumulate instructions that work on the NEON registers. + * + * NOTE: Similarly, "VFPv4" might either reference VFPv4-D16 or VFPv4-D32 + * depending on context. + * + * The following information was determined by scanning the binutils-2.22 + * sources: + * + * Basic VFP instruction subsets: + * + * #define FPU_VFP_EXT_V1xD 0x08000000 // Base VFP instruction set. + * #define FPU_VFP_EXT_V1 0x04000000 // Double-precision insns. + * #define FPU_VFP_EXT_V2 0x02000000 // ARM10E VFPr1. + * #define FPU_VFP_EXT_V3xD 0x01000000 // VFPv3 single-precision. + * #define FPU_VFP_EXT_V3 0x00800000 // VFPv3 double-precision. + * #define FPU_NEON_EXT_V1 0x00400000 // Neon (SIMD) insns. + * #define FPU_VFP_EXT_D32 0x00200000 // Registers D16-D31. + * #define FPU_VFP_EXT_FP16 0x00100000 // Half-precision extensions. + * #define FPU_NEON_EXT_FMA 0x00080000 // Neon fused multiply-add + * #define FPU_VFP_EXT_FMA 0x00040000 // VFP fused multiply-add + * + * FPU types (excluding NEON) + * + * FPU_VFP_V1xD (EXT_V1xD) + * | + * +--------------------------+ + * | | + * FPU_VFP_V1 (+EXT_V1) FPU_VFP_V3xD (+EXT_V2+EXT_V3xD) + * | | + * | | + * FPU_VFP_V2 (+EXT_V2) FPU_VFP_V4_SP_D16 (+EXT_FP16+EXT_FMA) + * | + * FPU_VFP_V3D16 (+EXT_Vx3D+EXT_V3) + * | + * +--------------------------+ + * | | + * FPU_VFP_V3 (+EXT_D32) FPU_VFP_V4D16 (+EXT_FP16+EXT_FMA) + * | | + * | FPU_VFP_V4 (+EXT_D32) + * | + * FPU_VFP_HARD (+EXT_FMA+NEON_EXT_FMA) + * + * VFP architectures: + * + * ARCH_VFP_V1xD (EXT_V1xD) + * | + * +------------------+ + * | | + * | ARCH_VFP_V3xD (+EXT_V2+EXT_V3xD) + * | | + * | ARCH_VFP_V3xD_FP16 (+EXT_FP16) + * | | + * | ARCH_VFP_V4_SP_D16 (+EXT_FMA) + * | + * ARCH_VFP_V1 (+EXT_V1) + * | + * ARCH_VFP_V2 (+EXT_V2) + * | + * ARCH_VFP_V3D16 (+EXT_V3xD+EXT_V3) + * | + * +-------------------+ + * | | + * | ARCH_VFP_V3D16_FP16 (+EXT_FP16) + * | + * +-------------------+ + * | | + * | ARCH_VFP_V4_D16 (+EXT_FP16+EXT_FMA) + * | | + * | ARCH_VFP_V4 (+EXT_D32) + * | | + * | ARCH_NEON_VFP_V4 (+EXT_NEON+EXT_NEON_FMA) + * | + * ARCH_VFP_V3 (+EXT_D32) + * | + * +-------------------+ + * | | + * | ARCH_VFP_V3_FP16 (+EXT_FP16) + * | + * ARCH_VFP_V3_PLUS_NEON_V1 (+EXT_NEON) + * | + * ARCH_NEON_FP16 (+EXT_FP16) + * + * -fpu=<name> values and their correspondance with FPU architectures above: + * + * {"vfp", FPU_ARCH_VFP_V2}, + * {"vfp9", FPU_ARCH_VFP_V2}, + * {"vfp3", FPU_ARCH_VFP_V3}, // For backwards compatbility. + * {"vfp10", FPU_ARCH_VFP_V2}, + * {"vfp10-r0", FPU_ARCH_VFP_V1}, + * {"vfpxd", FPU_ARCH_VFP_V1xD}, + * {"vfpv2", FPU_ARCH_VFP_V2}, + * {"vfpv3", FPU_ARCH_VFP_V3}, + * {"vfpv3-fp16", FPU_ARCH_VFP_V3_FP16}, + * {"vfpv3-d16", FPU_ARCH_VFP_V3D16}, + * {"vfpv3-d16-fp16", FPU_ARCH_VFP_V3D16_FP16}, + * {"vfpv3xd", FPU_ARCH_VFP_V3xD}, + * {"vfpv3xd-fp16", FPU_ARCH_VFP_V3xD_FP16}, + * {"neon", FPU_ARCH_VFP_V3_PLUS_NEON_V1}, + * {"neon-fp16", FPU_ARCH_NEON_FP16}, + * {"vfpv4", FPU_ARCH_VFP_V4}, + * {"vfpv4-d16", FPU_ARCH_VFP_V4D16}, + * {"fpv4-sp-d16", FPU_ARCH_VFP_V4_SP_D16}, + * {"neon-vfpv4", FPU_ARCH_NEON_VFP_V4}, + * + * + * Simplified diagram that only includes FPUs supported by Android: + * Only ARCH_VFP_V3D16 is actually mandated by the armeabi-v7a ABI, + * all others are optional and must be probed at runtime. + * + * ARCH_VFP_V3D16 (EXT_V1xD+EXT_V1+EXT_V2+EXT_V3xD+EXT_V3) + * | + * +-------------------+ + * | | + * | ARCH_VFP_V3D16_FP16 (+EXT_FP16) + * | + * +-------------------+ + * | | + * | ARCH_VFP_V4_D16 (+EXT_FP16+EXT_FMA) + * | | + * | ARCH_VFP_V4 (+EXT_D32) + * | | + * | ARCH_NEON_VFP_V4 (+EXT_NEON+EXT_NEON_FMA) + * | + * ARCH_VFP_V3 (+EXT_D32) + * | + * +-------------------+ + * | | + * | ARCH_VFP_V3_FP16 (+EXT_FP16) + * | + * ARCH_VFP_V3_PLUS_NEON_V1 (+EXT_NEON) + * | + * ARCH_NEON_FP16 (+EXT_FP16) + * + */ + +#endif // defined(__le32__) diff --git a/PxShared/src/foundation/externals/src/android/cpu-features.h b/PxShared/src/foundation/externals/src/android/cpu-features.h new file mode 100644 index 0000000..89f7666 --- /dev/null +++ b/PxShared/src/foundation/externals/src/android/cpu-features.h @@ -0,0 +1,208 @@ +/* + * Copyright (C) 2010 The Android Open Source Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#ifndef CPU_FEATURES_H +#define CPU_FEATURES_H + +#include <sys/cdefs.h> +#include <stdint.h> + +__BEGIN_DECLS + +typedef enum +{ + ANDROID_CPU_FAMILY_UNKNOWN = 0, + ANDROID_CPU_FAMILY_ARM, + ANDROID_CPU_FAMILY_X86, + ANDROID_CPU_FAMILY_MIPS, + ANDROID_CPU_FAMILY_MAX /* do not remove */ +} AndroidCpuFamily; + +/* Return family of the device's CPU */ +extern AndroidCpuFamily android_getCpuFamily(void); + +/* The list of feature flags for ARM CPUs that can be recognized by the + * library. Value details are: + * + * VFPv2: + * CPU supports the VFPv2 instruction set. Many, but not all, ARMv6 CPUs + * support these instructions. VFPv2 is a subset of VFPv3 so this will + * be set whenever VFPv3 is set too. + * + * ARMv7: + * CPU supports the ARMv7-A basic instruction set. + * This feature is mandated by the 'armeabi-v7a' ABI. + * + * VFPv3: + * CPU supports the VFPv3-D16 instruction set, providing hardware FPU + * support for single and double precision floating point registers. + * Note that only 16 FPU registers are available by default, unless + * the D32 bit is set too. This feature is also mandated by the + * 'armeabi-v7a' ABI. + * + * VFP_D32: + * CPU VFP optional extension that provides 32 FPU registers, + * instead of 16. Note that ARM mandates this feature is the 'NEON' + * feature is implemented by the CPU. + * + * NEON: + * CPU FPU supports "ARM Advanced SIMD" instructions, also known as + * NEON. Note that this mandates the VFP_D32 feature as well, per the + * ARM Architecture specification. + * + * VFP_FP16: + * Half-width floating precision VFP extension. If set, the CPU + * supports instructions to perform floating-point operations on + * 16-bit registers. This is part of the VFPv4 specification, but + * not mandated by any Android ABI. + * + * VFP_FMA: + * Fused multiply-accumulate VFP instructions extension. Also part of + * the VFPv4 specification, but not mandated by any Android ABI. + * + * NEON_FMA: + * Fused multiply-accumulate NEON instructions extension. Optional + * extension from the VFPv4 specification, but not mandated by any + * Android ABI. + * + * IDIV_ARM: + * Integer division available in ARM mode. Only available + * on recent CPUs (e.g. Cortex-A15). + * + * IDIV_THUMB2: + * Integer division available in Thumb-2 mode. Only available + * on recent CPUs (e.g. Cortex-A15). + * + * iWMMXt: + * Optional extension that adds MMX registers and operations to an + * ARM CPU. This is only available on a few XScale-based CPU designs + * sold by Marvell. Pretty rare in practice. + * + * If you want to tell the compiler to generate code that targets one of + * the feature set above, you should probably use one of the following + * flags (for more details, see technical note at the end of this file): + * + * -mfpu=vfp + * -mfpu=vfpv2 + * These are equivalent and tell GCC to use VFPv2 instructions for + * floating-point operations. Use this if you want your code to + * run on *some* ARMv6 devices, and any ARMv7-A device supported + * by Android. + * + * Generated code requires VFPv2 feature. + * + * -mfpu=vfpv3-d16 + * Tell GCC to use VFPv3 instructions (using only 16 FPU registers). + * This should be generic code that runs on any CPU that supports the + * 'armeabi-v7a' Android ABI. Note that no ARMv6 CPU supports this. + * + * Generated code requires VFPv3 feature. + * + * -mfpu=vfpv3 + * Tell GCC to use VFPv3 instructions with 32 FPU registers. + * Generated code requires VFPv3|VFP_D32 features. + * + * -mfpu=neon + * Tell GCC to use VFPv3 instructions with 32 FPU registers, and + * also support NEON intrinsics (see <arm_neon.h>). + * Generated code requires VFPv3|VFP_D32|NEON features. + * + * -mfpu=vfpv4-d16 + * Generated code requires VFPv3|VFP_FP16|VFP_FMA features. + * + * -mfpu=vfpv4 + * Generated code requires VFPv3|VFP_FP16|VFP_FMA|VFP_D32 features. + * + * -mfpu=neon-vfpv4 + * Generated code requires VFPv3|VFP_FP16|VFP_FMA|VFP_D32|NEON|NEON_FMA + * features. + * + * -mcpu=cortex-a7 + * -mcpu=cortex-a15 + * Generated code requires VFPv3|VFP_FP16|VFP_FMA|VFP_D32| + * NEON|NEON_FMA|IDIV_ARM|IDIV_THUMB2 + * This flag implies -mfpu=neon-vfpv4. + * + * -mcpu=iwmmxt + * Allows the use of iWMMXt instrinsics with GCC. + */ +enum +{ + ANDROID_CPU_ARM_FEATURE_ARMv7 = (1 << 0), + ANDROID_CPU_ARM_FEATURE_VFPv3 = (1 << 1), + ANDROID_CPU_ARM_FEATURE_NEON = (1 << 2), + ANDROID_CPU_ARM_FEATURE_LDREX_STREX = (1 << 3), + ANDROID_CPU_ARM_FEATURE_VFPv2 = (1 << 4), + ANDROID_CPU_ARM_FEATURE_VFP_D32 = (1 << 5), + ANDROID_CPU_ARM_FEATURE_VFP_FP16 = (1 << 6), + ANDROID_CPU_ARM_FEATURE_VFP_FMA = (1 << 7), + ANDROID_CPU_ARM_FEATURE_NEON_FMA = (1 << 8), + ANDROID_CPU_ARM_FEATURE_IDIV_ARM = (1 << 9), + ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2 = (1 << 10), + ANDROID_CPU_ARM_FEATURE_iWMMXt = (1 << 11), +}; + +enum +{ + ANDROID_CPU_X86_FEATURE_SSSE3 = (1 << 0), + ANDROID_CPU_X86_FEATURE_POPCNT = (1 << 1), + ANDROID_CPU_X86_FEATURE_MOVBE = (1 << 2), +}; + +extern uint64_t android_getCpuFeatures(void); + +/* Return the number of CPU cores detected on this device. */ +extern int android_getCpuCount(void); + +/* The following is used to force the CPU count and features + * mask in sandboxed processes. Under 4.1 and higher, these processes + * cannot access /proc, which is the only way to get information from + * the kernel about the current hardware (at least on ARM). + * + * It _must_ be called only once, and before any android_getCpuXXX + * function, any other case will fail. + * + * This function return 1 on success, and 0 on failure. + */ +extern int android_setCpu(int cpu_count, uint64_t cpu_features); + +#ifdef __arm__ +/* Retrieve the ARM 32-bit CPUID value from the kernel. + * Note that this cannot work on sandboxed processes under 4.1 and + * higher, unless you called android_setCpuArm() before. + */ +extern uint32_t android_getCpuIdArm(void); + +/* An ARM-specific variant of android_setCpu() that also allows you + * to set the ARM CPUID field. + */ +extern int android_setCpuArm(int cpu_count, uint64_t cpu_features, uint32_t cpu_id); +#endif + +__END_DECLS + +#endif /* CPU_FEATURES_H */ diff --git a/PxShared/src/foundation/include/Ps.h b/PxShared/src/foundation/include/Ps.h new file mode 100644 index 0000000..89fc9c7 --- /dev/null +++ b/PxShared/src/foundation/include/Ps.h @@ -0,0 +1,70 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PS_H +#define PSFOUNDATION_PS_H + +/*! \file top level include file for shared foundation */ + +#include "foundation/Px.h" + +/** +Platform specific defines +*/ +#if PX_WINDOWS_FAMILY || PX_XBOXONE +#pragma intrinsic(memcmp) +#pragma intrinsic(memcpy) +#pragma intrinsic(memset) +#pragma intrinsic(abs) +#pragma intrinsic(labs) +#endif + +// An expression that should expand to nothing in non PX_CHECKED builds. +// We currently use this only for tagging the purpose of containers for memory use tracking. +#if PX_CHECKED +#define PX_DEBUG_EXP(x) (x) +#else +#define PX_DEBUG_EXP(x) +#endif + +#define PX_SIGN_BITMASK 0x80000000 + +namespace physx +{ +namespace shdfnd +{ +// Int-as-bool type - has some uses for efficiency and with SIMD +typedef int IntBool; +static const IntBool IntFalse = 0; +static const IntBool IntTrue = 1; +} + +} // namespace physx + +#endif // #ifndef PSFOUNDATION_PS_H diff --git a/PxShared/src/foundation/include/PsAlignedMalloc.h b/PxShared/src/foundation/include/PsAlignedMalloc.h new file mode 100644 index 0000000..4be8409 --- /dev/null +++ b/PxShared/src/foundation/include/PsAlignedMalloc.h @@ -0,0 +1,88 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSALIGNEDMALLOC_H +#define PSFOUNDATION_PSALIGNEDMALLOC_H + +#include "PsUserAllocated.h" + +/*! +Allocate aligned memory. +Alignment must be a power of 2! +-- should be templated by a base allocator +*/ + +namespace physx +{ +namespace shdfnd +{ +/** +Allocator, which is used to access the global PxAllocatorCallback instance +(used for dynamic data types template instantiation), which can align memory +*/ + +// SCS: AlignedMalloc with 3 params not found, seems not used on PC either +// disabled for now to avoid GCC error + +template <uint32_t N, typename BaseAllocator = NonTrackingAllocator> +class AlignedAllocator : public BaseAllocator +{ + public: + AlignedAllocator(const BaseAllocator& base = BaseAllocator()) : BaseAllocator(base) + { + } + + void* allocate(size_t size, const char* file, int line) + { + size_t pad = N - 1 + sizeof(size_t); // store offset for delete. + uint8_t* base = reinterpret_cast<uint8_t*>(BaseAllocator::allocate(size + pad, file, line)); + if(!base) + return NULL; + + uint8_t* ptr = reinterpret_cast<uint8_t*>(size_t(base + pad) & ~(size_t(N) - 1)); // aligned pointer, ensuring N + // is a size_t + // wide mask + reinterpret_cast<size_t*>(ptr)[-1] = size_t(ptr - base); // store offset + + return ptr; + } + void deallocate(void* ptr) + { + if(ptr == NULL) + return; + + uint8_t* base = reinterpret_cast<uint8_t*>(ptr) - reinterpret_cast<size_t*>(ptr)[-1]; + BaseAllocator::deallocate(base); + } +}; + +} // namespace shdfnd +} // namespace physx + +#endif // #ifndef PSFOUNDATION_PSALIGNEDMALLOC_H diff --git a/PxShared/src/foundation/include/PsAlloca.h b/PxShared/src/foundation/include/PsAlloca.h new file mode 100644 index 0000000..add64c4 --- /dev/null +++ b/PxShared/src/foundation/include/PsAlloca.h @@ -0,0 +1,76 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSALLOCA_H +#define PSFOUNDATION_PSALLOCA_H + +#include "PsTempAllocator.h" + +namespace physx +{ +namespace shdfnd +{ +template <typename T, typename Alloc = TempAllocator> +class ScopedPointer : private Alloc +{ + public: + ~ScopedPointer() + { + if(mOwned) + Alloc::deallocate(mPointer); + } + + operator T*() const + { + return mPointer; + } + + T* mPointer; + bool mOwned; +}; + +} // namespace shdfnd +} // namespace physx + +/*! Stack allocation for \c count instances of \c type. Falling back to temp allocator if using more than 1kB. */ +#ifdef __SPU__ +#define PX_ALLOCA(var, type, count) type* var = reinterpret_cast<type*>(PxAlloca(sizeof(type) * (count))) +#else +#define PX_ALLOCA(var, type, count) \ + physx::shdfnd::ScopedPointer<type> var; \ + { \ + uint32_t size = sizeof(type) * (count); \ + var.mOwned = size > 1024; \ + if(var.mOwned) \ + var.mPointer = reinterpret_cast<type*>(physx::shdfnd::TempAllocator().allocate(size, __FILE__, __LINE__)); \ + else \ + var.mPointer = reinterpret_cast<type*>(PxAlloca(size)); \ + } +#endif +#endif // #ifndef PSFOUNDATION_PSALLOCA_H diff --git a/PxShared/src/foundation/include/PsAllocator.h b/PxShared/src/foundation/include/PsAllocator.h new file mode 100644 index 0000000..cbf32d3 --- /dev/null +++ b/PxShared/src/foundation/include/PsAllocator.h @@ -0,0 +1,367 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSALLOCATOR_H +#define PSFOUNDATION_PSALLOCATOR_H + +#include "foundation/PxAllocatorCallback.h" +#include "foundation/PxFoundation.h" +#include "Ps.h" +#include "foundation/PxAssert.h" + +#if(PX_WINDOWS_FAMILY || PX_XBOXONE) +#include <exception> +#include <typeinfo.h> +#endif +#if(PX_APPLE_FAMILY) +#include <typeinfo> +#endif + +#include <new> + +// Allocation macros going through user allocator +#if PX_CHECKED +#define PX_ALLOC(n, name) physx::shdfnd::NamedAllocator(name).allocate(n, __FILE__, __LINE__) +#else +#define PX_ALLOC(n, name) physx::shdfnd::NonTrackingAllocator().allocate(n, __FILE__, __LINE__) +#endif +#define PX_ALLOC_TEMP(n, name) PX_ALLOC(n, name) +#define PX_FREE(x) physx::shdfnd::NonTrackingAllocator().deallocate(x) +#define PX_FREE_AND_RESET(x) \ + { \ + PX_FREE(x); \ + x = 0; \ + } + +// The following macros support plain-old-types and classes derived from UserAllocated. +#define PX_NEW(T) new (physx::shdfnd::ReflectionAllocator<T>(), __FILE__, __LINE__) T +#define PX_NEW_TEMP(T) PX_NEW(T) +#define PX_DELETE(x) delete x +#define PX_DELETE_AND_RESET(x) \ + { \ + PX_DELETE(x); \ + x = 0; \ + } +#define PX_DELETE_POD(x) \ + { \ + PX_FREE(x); \ + x = 0; \ + } +#define PX_DELETE_ARRAY(x) \ + { \ + PX_DELETE([] x); \ + x = 0; \ + } + +// aligned allocation +#define PX_ALIGNED16_ALLOC(n) physx::shdfnd::AlignedAllocator<16>().allocate(n, __FILE__, __LINE__) +#define PX_ALIGNED16_FREE(x) physx::shdfnd::AlignedAllocator<16>().deallocate(x) + +//! placement new macro to make it easy to spot bad use of 'new' +#define PX_PLACEMENT_NEW(p, T) new (p) T + +#if PX_DEBUG || PX_CHECKED +#define PX_USE_NAMED_ALLOCATOR 1 +#else +#define PX_USE_NAMED_ALLOCATOR 0 +#endif + +// Don't use inline for alloca !!! +#if PX_WINDOWS_FAMILY +#include <malloc.h> +#define PxAlloca(x) _alloca(x) +#elif PX_LINUX || PX_ANDROID +#include <malloc.h> +#define PxAlloca(x) alloca(x) +#elif PX_APPLE_FAMILY +#include <alloca.h> +#define PxAlloca(x) alloca(x) +#elif PX_PS4 +#include <memory.h> +#define PxAlloca(x) alloca(x) +#elif PX_XBOXONE +#include <malloc.h> +#define PxAlloca(x) alloca(x) +#elif PX_NX +#include <malloc.h> +#define PxAlloca(x) alloca(x) +#endif + +#define PxAllocaAligned(x, alignment) ((size_t(PxAlloca(x + alignment)) + (alignment - 1)) & ~size_t(alignment - 1)) + +namespace physx +{ +namespace shdfnd +{ + +PX_FOUNDATION_API PxAllocatorCallback& getAllocator(); + +/** +Allocator used to access the global PxAllocatorCallback instance without providing additional information. +*/ + +class PX_FOUNDATION_API Allocator +{ + public: + Allocator(const char* = 0) + { + } + void* allocate(size_t size, const char* file, int line); + void deallocate(void* ptr); +}; + +/* + * Bootstrap allocator using malloc/free. + * Don't use unless your objects get allocated before foundation is initialized. + */ +class RawAllocator +{ + public: + RawAllocator(const char* = 0) + { + } + void* allocate(size_t size, const char*, int) + { + // malloc returns valid pointer for size==0, no need to check + return ::malloc(size); + } + void deallocate(void* ptr) + { + // free(0) is guaranteed to have no side effect, no need to check + ::free(ptr); + } +}; + +/* + * Allocator that simply calls straight back to the application without tracking. + * This is used by the heap (Foundation::mNamedAllocMap) that tracks allocations + * because it needs to be able to grow as a result of an allocation. + * Making the hash table re-entrant to deal with this may not make sense. + */ +class NonTrackingAllocator +{ + public: + PX_FORCE_INLINE NonTrackingAllocator(const char* = 0) + { + } + PX_FORCE_INLINE void* allocate(size_t size, const char* file, int line) + { + return !size ? 0 : getAllocator().allocate(size, "NonTrackedAlloc", file, line); + } + PX_FORCE_INLINE void deallocate(void* ptr) + { + if(ptr) + getAllocator().deallocate(ptr); + } +}; + +/* +\brief Virtual allocator callback used to provide run-time defined allocators to foundation types like Array or Bitmap. + This is used by VirtualAllocator +*/ +class VirtualAllocatorCallback +{ + public: + VirtualAllocatorCallback() + { + } + virtual ~VirtualAllocatorCallback() + { + } + virtual void* allocate(const size_t size, const char* file, const int line) = 0; + virtual void deallocate(void* ptr) = 0; +}; + +/* +\brief Virtual allocator to be used by foundation types to provide run-time defined allocators. +Due to the fact that Array extends its allocator, rather than contains a reference/pointer to it, the VirtualAllocator +must +be a concrete type containing a pointer to a virtual callback. The callback may not be available at instantiation time, +therefore +methods are provided to set the callback later. +*/ +class VirtualAllocator +{ + public: + VirtualAllocator(VirtualAllocatorCallback* callback = NULL) : mCallback(callback) + { + } + + void* allocate(const size_t size, const char* file, const int line) + { + PX_ASSERT(mCallback); + if(size) + return mCallback->allocate(size, file, line); + return NULL; + } + void deallocate(void* ptr) + { + PX_ASSERT(mCallback); + if(ptr) + mCallback->deallocate(ptr); + } + + void setCallback(VirtualAllocatorCallback* callback) + { + mCallback = callback; + } + VirtualAllocatorCallback* getCallback() + { + return mCallback; + } + + private: + VirtualAllocatorCallback* mCallback; + VirtualAllocator& operator=(const VirtualAllocator&); +}; + +#if PX_USE_NAMED_ALLOCATOR // can be slow, so only use in debug/checked +class PX_FOUNDATION_API NamedAllocator +{ + public: + NamedAllocator(const PxEMPTY); + NamedAllocator(const char* name = 0); // todo: should not have default argument! + NamedAllocator(const NamedAllocator&); + ~NamedAllocator(); + NamedAllocator& operator=(const NamedAllocator&); + void* allocate(size_t size, const char* filename, int line); + void deallocate(void* ptr); +}; +#else +class NamedAllocator; +#endif // PX_DEBUG + +/** +Allocator used to access the global PxAllocatorCallback instance using a static name derived from T. +*/ + +template <typename T> +class ReflectionAllocator +{ + static const char* getName() + { + if(!PxGetFoundation().getReportAllocationNames()) + return "<allocation names disabled>"; +#if PX_GCC_FAMILY + return __PRETTY_FUNCTION__; +#else + // name() calls malloc(), raw_name() wouldn't + return typeid(T).name(); +#endif + } + + public: + ReflectionAllocator(const PxEMPTY) + { + } + ReflectionAllocator(const char* = 0) + { + } + inline ReflectionAllocator(const ReflectionAllocator&) + { + } + void* allocate(size_t size, const char* filename, int line) + { + return size ? getAllocator().allocate(size, getName(), filename, line) : 0; + } + void deallocate(void* ptr) + { + if(ptr) + getAllocator().deallocate(ptr); + } +}; + +template <typename T> +struct AllocatorTraits +{ +#if PX_USE_NAMED_ALLOCATOR + typedef NamedAllocator Type; +#else + typedef ReflectionAllocator<T> Type; +#endif +}; + +// if you get a build error here, you are trying to PX_NEW a class +// that is neither plain-old-type nor derived from UserAllocated +template <typename T, typename X> +union EnableIfPod +{ + int i; + T t; + typedef X Type; +}; + +} // namespace shdfnd +} // namespace physx + +// Global placement new for ReflectionAllocator templated by +// plain-old-type. Allows using PX_NEW for pointers and built-in-types. +// +// ATTENTION: You need to use PX_DELETE_POD or PX_FREE to deallocate +// memory, not PX_DELETE. PX_DELETE_POD redirects to PX_FREE. +// +// Rationale: PX_DELETE uses global operator delete(void*), which we dont' want to overload. +// Any other definition of PX_DELETE couldn't support array syntax 'PX_DELETE([]a);'. +// PX_DELETE_POD was preferred over PX_DELETE_ARRAY because it is used +// less often and applies to both single instances and arrays. +template <typename T> +PX_INLINE void* operator new(size_t size, physx::shdfnd::ReflectionAllocator<T> alloc, const char* fileName, + typename physx::shdfnd::EnableIfPod<T, int>::Type line) +{ + return alloc.allocate(size, fileName, line); +} + +template <typename T> +PX_INLINE void* operator new [](size_t size, physx::shdfnd::ReflectionAllocator<T> alloc, const char* fileName, + typename physx::shdfnd::EnableIfPod<T, int>::Type line) +{ return alloc.allocate(size, fileName, line); } + +// If construction after placement new throws, this placement delete is being called. +template <typename T> +PX_INLINE void operator delete(void* ptr, physx::shdfnd::ReflectionAllocator<T> alloc, const char* fileName, + typename physx::shdfnd::EnableIfPod<T, int>::Type line) +{ + PX_UNUSED(fileName); + PX_UNUSED(line); + + alloc.deallocate(ptr); +} + +// If construction after placement new throws, this placement delete is being called. +template <typename T> +PX_INLINE void operator delete [](void* ptr, physx::shdfnd::ReflectionAllocator<T> alloc, const char* fileName, + typename physx::shdfnd::EnableIfPod<T, int>::Type line) +{ + PX_UNUSED(fileName); + PX_UNUSED(line); + + alloc.deallocate(ptr); +} + +#endif // #ifndef PSFOUNDATION_PSALLOCATOR_H diff --git a/PxShared/src/foundation/include/PsAoS.h b/PxShared/src/foundation/include/PsAoS.h new file mode 100644 index 0000000..641a40a --- /dev/null +++ b/PxShared/src/foundation/include/PsAoS.h @@ -0,0 +1,45 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSAOS_H +#define PSFOUNDATION_PSAOS_H + +#include "foundation/Px.h" + +#if PX_WINDOWS && !PX_NEON +#include "windows/PsWindowsAoS.h" +#elif(PX_UNIX_FAMILY || PX_PS4 || PX_NX) +#include "unix/PsUnixAoS.h" +#elif PX_XBOXONE +#include "XboxOne/PsXboxOneAoS.h" +#else +#error "Platform not supported!" +#endif + +#endif diff --git a/PxShared/src/foundation/include/PsArray.h b/PxShared/src/foundation/include/PsArray.h new file mode 100644 index 0000000..8433fbe --- /dev/null +++ b/PxShared/src/foundation/include/PsArray.h @@ -0,0 +1,806 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSARRAY_H +#define PSFOUNDATION_PSARRAY_H + +#include "foundation/PxAssert.h" +#include "foundation/PxIntrinsics.h" +#include "PsAllocator.h" +#include "PsBasicTemplates.h" + +#if PX_LIBCPP +#include <type_traits> +#else +#include <tr1/type_traits> +#endif + +#if PX_VC == 9 || PX_VC == 10 +#pragma warning(push) +#pragma warning(disable : 4347) // behavior change: 'function template' is called instead of 'function' +#endif + +namespace physx +{ +namespace shdfnd +{ +template <class Serializer> +void exportArray(Serializer& stream, const void* data, uint32_t size, uint32_t sizeOfElement, uint32_t capacity); +char* importArray(char* address, void** data, uint32_t size, uint32_t sizeOfElement, uint32_t capacity); + +/*! +An array is a sequential container. + +Implementation note +* entries between 0 and size are valid objects +* we use inheritance to build this because the array is included inline in a lot + of objects and we want the allocator to take no space if it's not stateful, which + aggregation doesn't allow. Also, we want the metadata at the front for the inline + case where the allocator contains some inline storage space +*/ +template <class T, class Alloc = typename AllocatorTraits<T>::Type> +class Array : protected Alloc +{ + public: + typedef T* Iterator; + typedef const T* ConstIterator; + + explicit Array(const PxEMPTY v) : Alloc(v) + { + if(mData) + mCapacity |= PX_SIGN_BITMASK; + } + + /*! + Default array constructor. Initialize an empty array + */ + PX_INLINE explicit Array(const Alloc& alloc = Alloc()) : Alloc(alloc), mData(0), mSize(0), mCapacity(0) + { + } + + /*! + Initialize array with given capacity + */ + PX_INLINE explicit Array(uint32_t size, const T& a = T(), const Alloc& alloc = Alloc()) + : Alloc(alloc), mData(0), mSize(0), mCapacity(0) + { + resize(size, a); + } + + /*! + Copy-constructor. Copy all entries from other array + */ + template <class A> + PX_INLINE explicit Array(const Array<T, A>& other, const Alloc& alloc = Alloc()) + : Alloc(alloc) + { + copy(other); + } + + // This is necessary else the basic default copy constructor is used in the case of both arrays being of the same + // template instance + // The C++ standard clearly states that a template constructor is never a copy constructor [2]. In other words, + // the presence of a template constructor does not suppress the implicit declaration of the copy constructor. + // Also never make a copy constructor explicit, or copy-initialization* will no longer work. This is because + // 'binding an rvalue to a const reference requires an accessible copy constructor' (http://gcc.gnu.org/bugs/) + // *http://stackoverflow.com/questions/1051379/is-there-a-difference-in-c-between-copy-initialization-and-assignment-initializ + PX_INLINE Array(const Array& other, const Alloc& alloc = Alloc()) : Alloc(alloc) + { + copy(other); + } + + /*! + Initialize array with given length + */ + PX_INLINE explicit Array(const T* first, const T* last, const Alloc& alloc = Alloc()) + : Alloc(alloc), mSize(last < first ? 0 : uint32_t(last - first)), mCapacity(mSize) + { + mData = allocate(mSize); + copy(mData, mData + mSize, first); + } + + /*! + Destructor + */ + PX_INLINE ~Array() + { + destroy(mData, mData + mSize); + + if(capacity() && !isInUserMemory()) + deallocate(mData); + } + + /*! + Assignment operator. Copy content (deep-copy) + */ + template <class A> + PX_INLINE Array& operator=(const Array<T, A>& rhs) + { + if(&rhs == this) + return *this; + + clear(); + reserve(rhs.mSize); + copy(mData, mData + rhs.mSize, rhs.mData); + + mSize = rhs.mSize; + return *this; + } + + PX_INLINE Array& operator=(const Array& t) // Needs to be declared, see comment at copy-constructor + { + return operator=<Alloc>(t); + } + + PX_FORCE_INLINE static bool isArrayOfPOD() + { +#if PX_LIBCPP + return std::is_trivially_copyable<T>::value; +#else + return std::tr1::is_pod<T>::value; +#endif + } + + /*! + Array indexing operator. + \param i + The index of the element that will be returned. + \return + The element i in the array. + */ + PX_FORCE_INLINE const T& operator[](uint32_t i) const + { + PX_ASSERT(i < mSize); + return mData[i]; + } + + /*! + Array indexing operator. + \param i + The index of the element that will be returned. + \return + The element i in the array. + */ + PX_FORCE_INLINE T& operator[](uint32_t i) + { + PX_ASSERT(i < mSize); + return mData[i]; + } + + /*! + Returns a pointer to the initial element of the array. + \return + a pointer to the initial element of the array. + */ + PX_FORCE_INLINE ConstIterator begin() const + { + return mData; + } + + PX_FORCE_INLINE Iterator begin() + { + return mData; + } + + /*! + Returns an iterator beyond the last element of the array. Do not dereference. + \return + a pointer to the element beyond the last element of the array. + */ + + PX_FORCE_INLINE ConstIterator end() const + { + return mData + mSize; + } + + PX_FORCE_INLINE Iterator end() + { + return mData + mSize; + } + + /*! + Returns a reference to the first element of the array. Undefined if the array is empty. + \return a reference to the first element of the array + */ + + PX_FORCE_INLINE const T& front() const + { + PX_ASSERT(mSize); + return mData[0]; + } + + PX_FORCE_INLINE T& front() + { + PX_ASSERT(mSize); + return mData[0]; + } + + /*! + Returns a reference to the last element of the array. Undefined if the array is empty + \return a reference to the last element of the array + */ + + PX_FORCE_INLINE const T& back() const + { + PX_ASSERT(mSize); + return mData[mSize - 1]; + } + + PX_FORCE_INLINE T& back() + { + PX_ASSERT(mSize); + return mData[mSize - 1]; + } + + /*! + Returns the number of entries in the array. This can, and probably will, + differ from the array capacity. + \return + The number of of entries in the array. + */ + PX_FORCE_INLINE uint32_t size() const + { + return mSize; + } + + /*! + Clears the array. + */ + PX_INLINE void clear() + { + destroy(mData, mData + mSize); + mSize = 0; + } + + /*! + Returns whether the array is empty (i.e. whether its size is 0). + \return + true if the array is empty + */ + PX_FORCE_INLINE bool empty() const + { + return mSize == 0; + } + + /*! + Finds the first occurrence of an element in the array. + \param a + The element to find. + */ + + PX_INLINE Iterator find(const T& a) + { + uint32_t index; + for(index = 0; index < mSize && mData[index] != a; index++) + ; + return mData + index; + } + + PX_INLINE ConstIterator find(const T& a) const + { + uint32_t index; + for(index = 0; index < mSize && mData[index] != a; index++) + ; + return mData + index; + } + + ///////////////////////////////////////////////////////////////////////// + /*! + Adds one element to the end of the array. Operation is O(1). + \param a + The element that will be added to this array. + */ + ///////////////////////////////////////////////////////////////////////// + + PX_FORCE_INLINE T& pushBack(const T& a) + { + if(capacity() <= mSize) + return growAndPushBack(a); + + PX_PLACEMENT_NEW(reinterpret_cast<void*>(mData + mSize), T)(a); + + return mData[mSize++]; + } + + ///////////////////////////////////////////////////////////////////////// + /*! + Returns the element at the end of the array. Only legal if the array is non-empty. + */ + ///////////////////////////////////////////////////////////////////////// + PX_INLINE T popBack() + { + PX_ASSERT(mSize); + T t = mData[mSize - 1]; + + if(!isArrayOfPOD()) + { + mData[--mSize].~T(); + } + else + { + --mSize; + } + + return t; + } + + ///////////////////////////////////////////////////////////////////////// + /*! + Construct one element at the end of the array. Operation is O(1). + */ + ///////////////////////////////////////////////////////////////////////// + PX_INLINE T& insert() + { + if(capacity() <= mSize) + grow(capacityIncrement()); + + T* ptr = mData + mSize++; + new (ptr) T; // not 'T()' because PODs should not get default-initialized. + return *ptr; + } + + ///////////////////////////////////////////////////////////////////////// + /*! + Subtracts the element on position i from the array and replace it with + the last element. + Operation is O(1) + \param i + The position of the element that will be subtracted from this array. + */ + ///////////////////////////////////////////////////////////////////////// + PX_INLINE void replaceWithLast(uint32_t i) + { + PX_ASSERT(i < mSize); + mData[i] = mData[--mSize]; + + if(!isArrayOfPOD()) + { + mData[mSize].~T(); + } + } + + PX_INLINE void replaceWithLast(Iterator i) + { + replaceWithLast(static_cast<uint32_t>(i - mData)); + } + + ///////////////////////////////////////////////////////////////////////// + /*! + Replaces the first occurrence of the element a with the last element + Operation is O(n) + \param a + The position of the element that will be subtracted from this array. + \return true if the element has been removed. + */ + ///////////////////////////////////////////////////////////////////////// + + PX_INLINE bool findAndReplaceWithLast(const T& a) + { + uint32_t index = 0; + while(index < mSize && mData[index] != a) + ++index; + if(index == mSize) + return false; + replaceWithLast(index); + return true; + } + + ///////////////////////////////////////////////////////////////////////// + /*! + Subtracts the element on position i from the array. Shift the entire + array one step. + Operation is O(n) + \param i + The position of the element that will be subtracted from this array. + */ + ///////////////////////////////////////////////////////////////////////// + PX_INLINE void remove(uint32_t i) + { + PX_ASSERT(i < mSize); + + if(isArrayOfPOD()) + { + if(i + 1 != mSize) + { + physx::intrinsics::memMove(mData + i, mData + i + 1, (mSize - i - 1) * sizeof(T)); + } + } + else + { + T* it = mData + i; + it->~T(); + while (++i < mSize) + { + new (it) T(mData[i]); + ++it; + it->~T(); + } + } + --mSize; + } + + ///////////////////////////////////////////////////////////////////////// + /*! + Removes a range from the array. Shifts the array so order is maintained. + Operation is O(n) + \param begin + The starting position of the element that will be subtracted from this array. + \param count + The number of elments that will be subtracted from this array. + */ + ///////////////////////////////////////////////////////////////////////// + PX_INLINE void removeRange(uint32_t begin, uint32_t count) + { + PX_ASSERT(begin < mSize); + PX_ASSERT((begin + count) <= mSize); + + if(!isArrayOfPOD()) + { + for(uint32_t i = 0; i < count; i++) + { + mData[begin + i].~T(); // call the destructor on the ones being removed first. + } + } + + T* dest = &mData[begin]; // location we are copying the tail end objects to + T* src = &mData[begin + count]; // start of tail objects + uint32_t move_count = mSize - (begin + count); // compute remainder that needs to be copied down + + if(isArrayOfPOD()) + { + physx::intrinsics::memMove(dest, src, move_count * sizeof(T)); + } + else + { + for(uint32_t i = 0; i < move_count; i++) + { + new (dest) T(*src); // copy the old one to the new location + src->~T(); // call the destructor on the old location + dest++; + src++; + } + } + mSize -= count; + } + + ////////////////////////////////////////////////////////////////////////// + /*! + Resize array + */ + ////////////////////////////////////////////////////////////////////////// + PX_NOINLINE void resize(const uint32_t size, const T& a = T()); + + PX_NOINLINE void resizeUninitialized(const uint32_t size); + + ////////////////////////////////////////////////////////////////////////// + /*! + Resize array such that only as much memory is allocated to hold the + existing elements + */ + ////////////////////////////////////////////////////////////////////////// + PX_INLINE void shrink() + { + recreate(mSize); + } + + ////////////////////////////////////////////////////////////////////////// + /*! + Deletes all array elements and frees memory. + */ + ////////////////////////////////////////////////////////////////////////// + PX_INLINE void reset() + { + resize(0); + shrink(); + } + + ////////////////////////////////////////////////////////////////////////// + /*! + Ensure that the array has at least size capacity. + */ + ////////////////////////////////////////////////////////////////////////// + PX_INLINE void reserve(const uint32_t capacity) + { + if(capacity > this->capacity()) + grow(capacity); + } + + ////////////////////////////////////////////////////////////////////////// + /*! + Query the capacity(allocated mem) for the array. + */ + ////////////////////////////////////////////////////////////////////////// + PX_FORCE_INLINE uint32_t capacity() const + { + return mCapacity & ~PX_SIGN_BITMASK; + } + + ////////////////////////////////////////////////////////////////////////// + /*! + Unsafe function to force the size of the array + */ + ////////////////////////////////////////////////////////////////////////// + PX_FORCE_INLINE void forceSize_Unsafe(uint32_t size) + { + PX_ASSERT(size <= mCapacity); + mSize = size; + } + + ////////////////////////////////////////////////////////////////////////// + /*! + Swap contents of an array without allocating temporary storage + */ + ////////////////////////////////////////////////////////////////////////// + PX_INLINE void swap(Array<T, Alloc>& other) + { + shdfnd::swap(mData, other.mData); + shdfnd::swap(mSize, other.mSize); + shdfnd::swap(mCapacity, other.mCapacity); + } + + ////////////////////////////////////////////////////////////////////////// + /*! + Assign a range of values to this vector (resizes to length of range) + */ + ////////////////////////////////////////////////////////////////////////// + PX_INLINE void assign(const T* first, const T* last) + { + resizeUninitialized(uint32_t(last - first)); + copy(begin(), end(), first); + } + + // We need one bit to mark arrays that have been deserialized from a user-provided memory block. + // For alignment & memory saving purpose we store that bit in the rarely used capacity member. + PX_FORCE_INLINE uint32_t isInUserMemory() const + { + return mCapacity & PX_SIGN_BITMASK; + } + + /// return reference to allocator + PX_INLINE Alloc& getAllocator() + { + return *this; + } + + protected: + // constructor for where we don't own the memory + Array(T* memory, uint32_t size, uint32_t capacity, const Alloc& alloc = Alloc()) + : Alloc(alloc), mData(memory), mSize(size), mCapacity(capacity | PX_SIGN_BITMASK) + { + } + + template <class A> + PX_NOINLINE void copy(const Array<T, A>& other); + + PX_INLINE T* allocate(uint32_t size) + { + if(size > 0) + { + T* p = reinterpret_cast<T*>(Alloc::allocate(sizeof(T) * size, __FILE__, __LINE__)); +/** +Mark a specified amount of memory with 0xcd pattern. This is used to check that the meta data +definition for serialized classes is complete in checked builds. +*/ +#if PX_CHECKED + if(p) + { + for(uint32_t i = 0; i < (sizeof(T) * size); ++i) + reinterpret_cast<uint8_t*>(p)[i] = 0xcd; + } +#endif + return p; + } + return 0; + } + + PX_INLINE void deallocate(void* mem) + { + Alloc::deallocate(mem); + } + + static PX_INLINE bool isZeroInit(const T& object) + { + if (!isArrayOfPOD()) + return false; + char ZeroBuffOnStack[sizeof(object)] = {}; + // bgaldrikian - casting to void* to avoid compiler error: + // error : first operand of this 'memcmp' call is a pointer to dynamic class [...]; vtable pointer will be compared [-Werror,-Wdynamic-class-memaccess] + // even though POD check prevents memcmp from being used on a dynamic class + return memcmp(reinterpret_cast<const void*>(&object), ZeroBuffOnStack, sizeof(object)) == 0; + } + + static PX_INLINE void create(T* first, T* last, const T& a) + { + if(isZeroInit(a)) + { + if(last > first) + physx::intrinsics::memZero(first, uint32_t((last - first) * sizeof(T))); + } + else + { + for(; first < last; ++first) + ::new (first) T(a); + } + } + + static PX_INLINE void copy(T* first, T* last, const T* src) + { + if(last <= first) + return; + + if(isArrayOfPOD()) + { + physx::intrinsics::memCopy(first, src, uint32_t((last - first) * sizeof(T))); + } + else + { + for(; first < last; ++first, ++src) + ::new (first) T(*src); + } + } + + static PX_INLINE void destroy(T* first, T* last) + { + if(!isArrayOfPOD()) + { + for(; first < last; ++first) + first->~T(); + } + } + + /*! + Called when pushBack() needs to grow the array. + \param a The element that will be added to this array. + */ + PX_NOINLINE T& growAndPushBack(const T& a); + + /*! + Resizes the available memory for the array. + + \param capacity + The number of entries that the set should be able to hold. + */ + PX_INLINE void grow(uint32_t capacity) + { + PX_ASSERT(this->capacity() < capacity); + recreate(capacity); + } + + /*! + Creates a new memory block, copies all entries to the new block and destroys old entries. + + \param capacity + The number of entries that the set should be able to hold. + */ + PX_NOINLINE void recreate(uint32_t capacity); + + // The idea here is to prevent accidental bugs with pushBack or insert. Unfortunately + // it interacts badly with InlineArrays with smaller inline allocations. + // TODO(dsequeira): policy template arg, this is exactly what they're for. + PX_INLINE uint32_t capacityIncrement() const + { + const uint32_t capacity = this->capacity(); + return capacity == 0 ? 1 : capacity * 2; + } + + T* mData; + uint32_t mSize; + uint32_t mCapacity; +}; + +template <class T, class Alloc> +PX_NOINLINE void Array<T, Alloc>::resize(const uint32_t size, const T& a) +{ + reserve(size); + create(mData + mSize, mData + size, a); + destroy(mData + size, mData + mSize); + mSize = size; +} + +template <class T, class Alloc> +template <class A> +PX_NOINLINE void Array<T, Alloc>::copy(const Array<T, A>& other) +{ + if(!other.empty()) + { + mData = allocate(mSize = mCapacity = other.size()); + copy(mData, mData + mSize, other.begin()); + } + else + { + mData = NULL; + mSize = 0; + mCapacity = 0; + } + + // mData = allocate(other.mSize); + // mSize = other.mSize; + // mCapacity = other.mSize; + // copy(mData, mData + mSize, other.mData); +} + +template <class T, class Alloc> +PX_NOINLINE void Array<T, Alloc>::resizeUninitialized(const uint32_t size) +{ + reserve(size); + mSize = size; +} + +template <class T, class Alloc> +PX_NOINLINE T& Array<T, Alloc>::growAndPushBack(const T& a) +{ + uint32_t capacity = capacityIncrement(); + + T* newData = allocate(capacity); + PX_ASSERT((!capacity) || (newData && (newData != mData))); + copy(newData, newData + mSize, mData); + + // inserting element before destroying old array + // avoids referencing destroyed object when duplicating array element. + PX_PLACEMENT_NEW(reinterpret_cast<void*>(newData + mSize), T)(a); + + destroy(mData, mData + mSize); + if(!isInUserMemory()) + deallocate(mData); + + mData = newData; + mCapacity = capacity; + + return mData[mSize++]; +} + +template <class T, class Alloc> +PX_NOINLINE void Array<T, Alloc>::recreate(uint32_t capacity) +{ + T* newData = allocate(capacity); + PX_ASSERT((!capacity) || (newData && (newData != mData))); + + copy(newData, newData + mSize, mData); + destroy(mData, mData + mSize); + if(!isInUserMemory()) + deallocate(mData); + + mData = newData; + mCapacity = capacity; +} + +template <class T, class Alloc> +PX_INLINE void swap(Array<T, Alloc>& x, Array<T, Alloc>& y) +{ + x.swap(y); +} + +} // namespace shdfnd +} // namespace physx + +#if PX_VC == 9 || PX_VC == 10 +#pragma warning(pop) +#endif + +#endif // #ifndef PSFOUNDATION_PSARRAY_H diff --git a/PxShared/src/foundation/include/PsAtomic.h b/PxShared/src/foundation/include/PsAtomic.h new file mode 100644 index 0000000..23df190 --- /dev/null +++ b/PxShared/src/foundation/include/PsAtomic.h @@ -0,0 +1,63 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSATOMIC_H +#define PSFOUNDATION_PSATOMIC_H + +#include "Ps.h" + +namespace physx +{ +namespace shdfnd +{ +/* set *dest equal to val. Return the old value of *dest */ +PX_FOUNDATION_API int32_t atomicExchange(volatile int32_t* dest, int32_t val); + +/* if *dest == comp, replace with exch. Return original value of *dest */ +PX_FOUNDATION_API int32_t atomicCompareExchange(volatile int32_t* dest, int32_t exch, int32_t comp); + +/* if *dest == comp, replace with exch. Return original value of *dest */ +PX_FOUNDATION_API void* atomicCompareExchangePointer(volatile void** dest, void* exch, void* comp); + +/* increment the specified location. Return the incremented value */ +PX_FOUNDATION_API int32_t atomicIncrement(volatile int32_t* val); + +/* decrement the specified location. Return the decremented value */ +PX_FOUNDATION_API int32_t atomicDecrement(volatile int32_t* val); + +/* add delta to *val. Return the new value */ +PX_FOUNDATION_API int32_t atomicAdd(volatile int32_t* val, int32_t delta); + +/* compute the maximum of dest and val. Return the new value */ +PX_FOUNDATION_API int32_t atomicMax(volatile int32_t* val, int32_t val2); + +} // namespace shdfnd +} // namespace physx + +#endif // #ifndef PSFOUNDATION_PSATOMIC_H diff --git a/PxShared/src/foundation/include/PsBasicTemplates.h b/PxShared/src/foundation/include/PsBasicTemplates.h new file mode 100644 index 0000000..514da05 --- /dev/null +++ b/PxShared/src/foundation/include/PsBasicTemplates.h @@ -0,0 +1,146 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSBASICTEMPLATES_H +#define PSFOUNDATION_PSBASICTEMPLATES_H + +#include "Ps.h" + +namespace physx +{ +namespace shdfnd +{ +template <typename A> +struct Equal +{ + bool operator()(const A& a, const A& b) const + { + return a == b; + } +}; + +template <typename A> +struct Less +{ + bool operator()(const A& a, const A& b) const + { + return a < b; + } +}; + +template <typename A> +struct Greater +{ + bool operator()(const A& a, const A& b) const + { + return a > b; + } +}; + +template <class F, class S> +class Pair +{ + public: + F first; + S second; + Pair() : first(F()), second(S()) + { + } + Pair(const F& f, const S& s) : first(f), second(s) + { + } + Pair(const Pair& p) : first(p.first), second(p.second) + { + } + // CN - fix for /.../PsBasicTemplates.h(61) : warning C4512: 'physx::shdfnd::Pair<F,S>' : assignment operator could + // not be generated + Pair& operator=(const Pair& p) + { + first = p.first; + second = p.second; + return *this; + } + bool operator==(const Pair& p) const + { + return first == p.first && second == p.second; + } + bool operator<(const Pair& p) const + { + if(first < p.first) + return true; + else + return !(p.first < first) && (second < p.second); + } +}; + +template <unsigned int A> +struct LogTwo +{ + static const unsigned int value = LogTwo<(A >> 1)>::value + 1; +}; +template <> +struct LogTwo<1> +{ + static const unsigned int value = 0; +}; + +template <typename T> +struct UnConst +{ + typedef T Type; +}; +template <typename T> +struct UnConst<const T> +{ + typedef T Type; +}; + +template <typename T> +T pointerOffset(void* p, ptrdiff_t offset) +{ + return reinterpret_cast<T>(reinterpret_cast<char*>(p) + offset); +} +template <typename T> +T pointerOffset(const void* p, ptrdiff_t offset) +{ + return reinterpret_cast<T>(reinterpret_cast<const char*>(p) + offset); +} + +template <class T> +PX_CUDA_CALLABLE PX_INLINE void swap(T& x, T& y) +{ + const T tmp = x; + x = y; + y = tmp; +} + +} // namespace shdfnd +} // namespace physx + +#endif // #ifndef PSFOUNDATION_PSBASICTEMPLATES_H diff --git a/PxShared/src/foundation/include/PsBitUtils.h b/PxShared/src/foundation/include/PsBitUtils.h new file mode 100644 index 0000000..f69f47a --- /dev/null +++ b/PxShared/src/foundation/include/PsBitUtils.h @@ -0,0 +1,109 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSBITUTILS_H +#define PSFOUNDATION_PSBITUTILS_H + +#include "foundation/PxIntrinsics.h" +#include "foundation/PxAssert.h" +#include "PsIntrinsics.h" +#include "Ps.h" + +namespace physx +{ +namespace shdfnd +{ +PX_INLINE uint32_t bitCount(uint32_t v) +{ + // from http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel + uint32_t const w = v - ((v >> 1) & 0x55555555); + uint32_t const x = (w & 0x33333333) + ((w >> 2) & 0x33333333); + return (((x + (x >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24; +} + +PX_INLINE bool isPowerOfTwo(uint32_t x) +{ + return x != 0 && (x & (x - 1)) == 0; +} + +// "Next Largest Power of 2 +// Given a binary integer value x, the next largest power of 2 can be computed by a SWAR algorithm +// that recursively "folds" the upper bits into the lower bits. This process yields a bit vector with +// the same most significant 1 as x, but all 1's below it. Adding 1 to that value yields the next +// largest power of 2. For a 32-bit value:" +PX_INLINE uint32_t nextPowerOfTwo(uint32_t x) +{ + x |= (x >> 1); + x |= (x >> 2); + x |= (x >> 4); + x |= (x >> 8); + x |= (x >> 16); + return x + 1; +} + +/*! +Return the index of the highest set bit. Not valid for zero arg. +*/ + +PX_INLINE uint32_t lowestSetBit(uint32_t x) +{ + PX_ASSERT(x); + return lowestSetBitUnsafe(x); +} + +/*! +Return the index of the highest set bit. Not valid for zero arg. +*/ + +PX_INLINE uint32_t highestSetBit(uint32_t x) +{ + PX_ASSERT(x); + return highestSetBitUnsafe(x); +} + +// Helper function to approximate log2 of an integer value +// assumes that the input is actually power of two. +// todo: replace 2 usages with 'highestSetBit' +PX_INLINE uint32_t ilog2(uint32_t num) +{ + for(uint32_t i = 0; i < 32; i++) + { + num >>= 1; + if(num == 0) + return i; + } + + PX_ASSERT(0); + return uint32_t(-1); +} + +} // namespace shdfnd +} // namespace physx + +#endif // #ifndef PSFOUNDATION_PSBITUTILS_H diff --git a/PxShared/src/foundation/include/PsBroadcast.h b/PxShared/src/foundation/include/PsBroadcast.h new file mode 100644 index 0000000..3eb1cba --- /dev/null +++ b/PxShared/src/foundation/include/PsBroadcast.h @@ -0,0 +1,277 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PXPVDSDK_PXBROADCAST_H +#define PXPVDSDK_PXBROADCAST_H + +#include "Ps.h" +#include "PsInlineArray.h" + +#include "foundation/PxSimpleTypes.h" +#include "foundation/PxErrorCallback.h" + +namespace physx +{ +namespace shdfnd +{ + +/** +\brief Abstract listener class that listens to allocation and deallocation events from the + foundation memory system. + +<b>Threading:</b> All methods of this class should be thread safe as it can be called from the user thread +or the physics processing thread(s). +*/ +class AllocationListener +{ + public: + /** + \brief callback when memory is allocated. + \param size Size of the allocation in bytes. + \param typeName Type this data is being allocated for. + \param filename File the allocation came from. + \param line the allocation came from. + \param allocatedMemory memory that will be returned from the allocation. + */ + virtual void onAllocation(size_t size, const char* typeName, const char* filename, int line, + void* allocatedMemory) = 0; + + /** + \brief callback when memory is deallocated. + \param allocatedMemory memory just before allocation. + */ + virtual void onDeallocation(void* allocatedMemory) = 0; + + protected: + virtual ~AllocationListener() + { + } +}; + +/** +\brief Broadcast class implementation, registering listeners. + +<b>Threading:</b> All methods of this class should be thread safe as it can be called from the user thread +or the physics processing thread(s). There is not internal locking +*/ +template <class Listener, class Base> +class Broadcast : public Base +{ + public: + static const uint32_t MAX_NB_LISTENERS = 16; + + /** + \brief The default constructor. + */ + Broadcast() + { + } + + /** + \brief Register new listener. + + \note It is NOT SAFE to register and deregister listeners while allocations may be taking place. + moreover, there is no thread safety to registration/deregistration. + + \param listener Listener to register. + */ + void registerListener(Listener& listener) + { + if(mListeners.size() < MAX_NB_LISTENERS) + mListeners.pushBack(&listener); + } + + /** + \brief Deregister an existing listener. + + \note It is NOT SAFE to register and deregister listeners while allocations may be taking place. + moreover, there is no thread safety to registration/deregistration. + + \param listener Listener to deregister. + */ + void deregisterListener(Listener& listener) + { + mListeners.findAndReplaceWithLast(&listener); + } + + /** + \brief Get number of registered listeners. + + \return Number of listeners. + */ + uint32_t getNbListeners() const + { + return mListeners.size(); + } + + /** + \brief Get an existing listener from given index. + + \param index Index of the listener. + \return Listener on given index. + */ + Listener& getListener(uint32_t index) + { + PX_ASSERT(index <= mListeners.size()); + return *mListeners[index]; + } + + protected: + virtual ~Broadcast() + { + } + + physx::shdfnd::InlineArray<Listener*, MAX_NB_LISTENERS, physx::shdfnd::NonTrackingAllocator> mListeners; +}; + +/** +\brief Abstract base class for an application defined memory allocator that allows an external listener +to audit the memory allocations. +*/ +class BroadcastingAllocator : public Broadcast<AllocationListener, PxAllocatorCallback> +{ + PX_NOCOPY(BroadcastingAllocator) + + public: + /** + \brief The default constructor. + */ + BroadcastingAllocator(PxAllocatorCallback& allocator, PxErrorCallback& error) : mAllocator(allocator), mError(error) + { + mListeners.clear(); + } + + /** + \brief The default constructor. + */ + virtual ~BroadcastingAllocator() + { + mListeners.clear(); + } + + /** + \brief Allocates size bytes of memory, which must be 16-byte aligned. + + This method should never return NULL. If you run out of memory, then + you should terminate the app or take some other appropriate action. + + <b>Threading:</b> This function should be thread safe as it can be called in the context of the user thread + and physics processing thread(s). + + \param size Number of bytes to allocate. + \param typeName Name of the datatype that is being allocated + \param filename The source file which allocated the memory + \param line The source line which allocated the memory + \return The allocated block of memory. + */ + void* allocate(size_t size, const char* typeName, const char* filename, int line) + { + void* mem = mAllocator.allocate(size, typeName, filename, line); + + if(!mem) + { + mError.reportError(PxErrorCode::eABORT, "User allocator returned NULL.", __FILE__, __LINE__); + return NULL; + } + + if((reinterpret_cast<size_t>(mem) & 15)) + { + mError.reportError(PxErrorCode::eABORT, "Allocations must be 16-byte aligned.", __FILE__, __LINE__); + return NULL; + } + + for(uint32_t i = 0; i < mListeners.size(); i++) + mListeners[i]->onAllocation(size, typeName, filename, line, mem); + + return mem; + } + + /** + \brief Frees memory previously allocated by allocate(). + + <b>Threading:</b> This function should be thread safe as it can be called in the context of the user thread + and physics processing thread(s). + + \param ptr Memory to free. + */ + void deallocate(void* ptr) + { + for(uint32_t i = 0; i < mListeners.size(); i++) + { + mListeners[i]->onDeallocation(ptr); + } + mAllocator.deallocate(ptr); + } + + private: + PxAllocatorCallback& mAllocator; + PxErrorCallback& mError; +}; + +/** +\brief Abstract base class for an application defined error callback that allows an external listener +to report errors. +*/ +class BroadcastingErrorCallback : public Broadcast<PxErrorCallback, PxErrorCallback> +{ + PX_NOCOPY(BroadcastingErrorCallback) + public: + /** + \brief The default constructor. + */ + BroadcastingErrorCallback(PxErrorCallback& errorCallback) + { + registerListener(errorCallback); + } + + /** + \brief The default destructor. + */ + virtual ~BroadcastingErrorCallback() + { + mListeners.clear(); + } + + /** + \brief Reports an error code. + \param code Error code, see #PxErrorCode + \param message Message to display. + \param file File error occured in. + \param line Line number error occured on. + */ + void reportError(PxErrorCode::Enum code, const char* message, const char* file, int line) + { + for(uint32_t i = 0; i < mListeners.size(); i++) + mListeners[i]->reportError(code, message, file, line); + } +}; +} +} // namespace physx + +#endif // PXPVDSDK_PXBROADCAST_H diff --git a/PxShared/src/foundation/include/PsCpu.h b/PxShared/src/foundation/include/PsCpu.h new file mode 100644 index 0000000..2dcc5c1 --- /dev/null +++ b/PxShared/src/foundation/include/PsCpu.h @@ -0,0 +1,47 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSCPU_H +#define PSFOUNDATION_PSCPU_H + +#include "Ps.h" + +namespace physx +{ +namespace shdfnd +{ +class Cpu +{ + public: + static uint8_t getCpuId(); +}; +} +} + +#endif // #ifndef PSFOUNDATION_PSCPU_H diff --git a/PxShared/src/foundation/include/PsFPU.h b/PxShared/src/foundation/include/PsFPU.h new file mode 100644 index 0000000..fd990a8 --- /dev/null +++ b/PxShared/src/foundation/include/PsFPU.h @@ -0,0 +1,103 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSFPU_H +#define PSFOUNDATION_PSFPU_H + +#include "Ps.h" +#include "PsIntrinsics.h" + +#define PX_IR(x) ((uint32_t&)(x)) +#define PX_SIR(x) ((int32_t&)(x)) +#define PX_FR(x) ((float&)(x)) + +// signed integer representation of a floating-point value. + +// Floating-point representation of a integer value. + +#define PX_SIGN_BITMASK 0x80000000 + +#define PX_FPU_GUARD shdfnd::FPUGuard scopedFpGuard; +#define PX_SIMD_GUARD shdfnd::SIMDGuard scopedFpGuard; + +#define PX_SUPPORT_GUARDS (PX_WINDOWS_FAMILY || PX_XBOXONE || (PX_LINUX && (PX_X86 || PX_X64)) || PX_PS4 || PX_OSX) + +namespace physx +{ +namespace shdfnd +{ +// sets the default SDK state for scalar and SIMD units +class PX_FOUNDATION_API FPUGuard +{ + public: + FPUGuard(); // set fpu control word for PhysX + ~FPUGuard(); // restore fpu control word + private: + uint32_t mControlWords[8]; +}; + +// sets default SDK state for simd unit only, lighter weight than FPUGuard +class SIMDGuard +{ + public: + PX_INLINE SIMDGuard(); // set simd control word for PhysX + PX_INLINE ~SIMDGuard(); // restore simd control word + private: +#if PX_SUPPORT_GUARDS + uint32_t mControlWord; +#endif +}; + +/** +\brief Enables floating point exceptions for the scalar and SIMD unit +*/ +PX_FOUNDATION_API void enableFPExceptions(); + +/** +\brief Disables floating point exceptions for the scalar and SIMD unit +*/ +PX_FOUNDATION_API void disableFPExceptions(); + +} // namespace shdfnd +} // namespace physx + +#if PX_WINDOWS_FAMILY || PX_XBOXONE +#include "windows/PsWindowsFPU.h" +#elif (PX_LINUX && PX_SSE2) || PX_PS4 || PX_OSX +#include "unix/PsUnixFPU.h" +#else +PX_INLINE physx::shdfnd::SIMDGuard::SIMDGuard() +{ +} +PX_INLINE physx::shdfnd::SIMDGuard::~SIMDGuard() +{ +} +#endif + +#endif // #ifndef PSFOUNDATION_PSFPU_H diff --git a/PxShared/src/foundation/include/PsFoundation.h b/PxShared/src/foundation/include/PsFoundation.h new file mode 100644 index 0000000..68f2cc7 --- /dev/null +++ b/PxShared/src/foundation/include/PsFoundation.h @@ -0,0 +1,216 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PX_FOUNDATION_PSFOUNDATION_H +#define PX_FOUNDATION_PSFOUNDATION_H + +#include "foundation/PxFoundation.h" +#include "foundation/PxErrors.h" +#include "foundation/PxProfiler.h" + +#include "PsBroadcast.h" +#include "PsAllocator.h" +#include "PsTempAllocator.h" +#include "PsMutex.h" +#include "PsHashMap.h" +#include "PsUserAllocated.h" + +#include <stdarg.h> + +namespace physx +{ +namespace shdfnd +{ + +#if PX_VC +#pragma warning(push) +#pragma warning(disable : 4251) // class needs to have dll-interface to be used by clients of class +#endif + +class PX_FOUNDATION_API Foundation : public PxFoundation, public UserAllocated +{ + PX_NOCOPY(Foundation) + + public: + typedef MutexT<Allocator> Mutex; + + typedef HashMap<const NamedAllocator*, const char*, Hash<const NamedAllocator*>, NonTrackingAllocator> AllocNameMap; + typedef Array<TempAllocatorChunk*, Allocator> AllocFreeTable; + + public: + // factory + // note, you MUST eventually call release if createInstance returned true! + static Foundation* createInstance(PxU32 version, PxErrorCallback& errc, PxAllocatorCallback& alloc); + static Foundation& getInstance(); + void release(); + static void incRefCount(); // this call requires a foundation object to exist already + static void decRefCount(); // this call requires a foundation object to exist already + + // Begin Errors + virtual PxErrorCallback& getErrorCallback() + { + return mErrorCallback; + } // Return the user's error callback + PxErrorCallback& getInternalErrorCallback() + { + return mBroadcastingError; + } // Return the broadcasting error callback + + void registerErrorCallback(PxErrorCallback& listener); + void deregisterErrorCallback(PxErrorCallback& listener); + + virtual void setErrorLevel(PxErrorCode::Enum mask) + { + mErrorMask = mask; + } + virtual PxErrorCode::Enum getErrorLevel() const + { + return mErrorMask; + } + + void error(PxErrorCode::Enum, const char* file, int line, const char* messageFmt, ...); // Report errors with the + // broadcasting + void errorImpl(PxErrorCode::Enum, const char* file, int line, const char* messageFmt, va_list); // error callback + static PxU32 getWarnOnceTimestamp(); + + // End errors + + // Begin Allocations + virtual PxAllocatorCallback& getAllocatorCallback() + { + return mAllocatorCallback; + } // Return the user's allocator callback + PxAllocatorCallback& getAllocator() + { + return mBroadcastingAllocator; + } // Return the broadcasting allocator + + void registerAllocationListener(physx::shdfnd::AllocationListener& listener); + void deregisterAllocationListener(physx::shdfnd::AllocationListener& listener); + + virtual bool getReportAllocationNames() const + { + return mReportAllocationNames; + } + virtual void setReportAllocationNames(bool value) + { + mReportAllocationNames = value; + } + + PX_INLINE AllocNameMap& getNamedAllocMap() + { + return mNamedAllocMap; + } + PX_INLINE Mutex& getNamedAllocMutex() + { + return mNamedAllocMutex; + } + + PX_INLINE AllocFreeTable& getTempAllocFreeTable() + { + return mTempAllocFreeTable; + } + PX_INLINE Mutex& getTempAllocMutex() + { + return mTempAllocMutex; + } + // End allocations + + private: + static void destroyInstance(); + + Foundation(PxErrorCallback& errc, PxAllocatorCallback& alloc); + ~Foundation(); + + // init order is tricky here: the mutexes require the allocator, the allocator may require the error stream + PxAllocatorCallback& mAllocatorCallback; + PxErrorCallback& mErrorCallback; + + BroadcastingAllocator mBroadcastingAllocator; + BroadcastingErrorCallback mBroadcastingError; + + bool mReportAllocationNames; + + PxErrorCode::Enum mErrorMask; + Mutex mErrorMutex; + + AllocNameMap mNamedAllocMap; + Mutex mNamedAllocMutex; + + AllocFreeTable mTempAllocFreeTable; + Mutex mTempAllocMutex; + + Mutex mListenerMutex; + + static Foundation* mInstance; + static PxU32 mRefCount; + static PxU32 mWarnOnceTimestap; +}; +#if PX_VC +#pragma warning(pop) +#endif + +PX_INLINE Foundation& getFoundation() +{ + return Foundation::getInstance(); +} + +} // namespace shdfnd +} // namespace physx + +// shortcut macros: +// usage: Foundation::error(PX_WARN, "static friction %f is is lower than dynamic friction %d", sfr, dfr); +#define PX_WARN ::physx::PxErrorCode::eDEBUG_WARNING, __FILE__, __LINE__ +#define PX_INFO ::physx::PxErrorCode::eDEBUG_INFO, __FILE__, __LINE__ + +#if PX_DEBUG || PX_CHECKED +#define PX_WARN_ONCE(string) \ + { \ + static PxU32 timestamp = 0; \ + if(timestamp != Ps::getFoundation().getWarnOnceTimestamp()) \ + { \ + timestamp = Ps::getFoundation().getWarnOnceTimestamp(); \ + Ps::getFoundation().error(PX_WARN, string); \ + } \ + \ +} +#define PX_WARN_ONCE_IF(condition, string) \ + { \ + if(condition) \ + { \ + PX_WARN_ONCE(string) \ + } \ + \ +} +#else +#define PX_WARN_ONCE(string) ((void)0) +#define PX_WARN_ONCE_IF(condition, string) ((void)0) +#endif + +#endif diff --git a/PxShared/src/foundation/include/PsHash.h b/PxShared/src/foundation/include/PsHash.h new file mode 100644 index 0000000..6b74fb2 --- /dev/null +++ b/PxShared/src/foundation/include/PsHash.h @@ -0,0 +1,162 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSHASH_H +#define PSFOUNDATION_PSHASH_H + +#include "Ps.h" +#include "PsBasicTemplates.h" + +#if PX_VC +#pragma warning(push) +#pragma warning(disable : 4302) +#endif + +#if PX_LINUX +#include "foundation/PxSimpleTypes.h" +#endif + +/*! +Central definition of hash functions +*/ + +namespace physx +{ +namespace shdfnd +{ +// Hash functions + +// Thomas Wang's 32 bit mix +// http://www.cris.com/~Ttwang/tech/inthash.htm +PX_FORCE_INLINE uint32_t hash(const uint32_t key) +{ + uint32_t k = key; + k += ~(k << 15); + k ^= (k >> 10); + k += (k << 3); + k ^= (k >> 6); + k += ~(k << 11); + k ^= (k >> 16); + return uint32_t(k); +} + +PX_FORCE_INLINE uint32_t hash(const int32_t key) +{ + return hash(uint32_t(key)); +} + +// Thomas Wang's 64 bit mix +// http://www.cris.com/~Ttwang/tech/inthash.htm +PX_FORCE_INLINE uint32_t hash(const uint64_t key) +{ + uint64_t k = key; + k += ~(k << 32); + k ^= (k >> 22); + k += ~(k << 13); + k ^= (k >> 8); + k += (k << 3); + k ^= (k >> 15); + k += ~(k << 27); + k ^= (k >> 31); + return uint32_t(UINT32_MAX & k); +} + +#if PX_APPLE_FAMILY +// hash for size_t, to make gcc happy +PX_INLINE uint32_t hash(const size_t key) +{ +#if PX_P64_FAMILY + return hash(uint64_t(key)); +#else + return hash(uint32_t(key)); +#endif +} +#endif + +// Hash function for pointers +PX_INLINE uint32_t hash(const void* ptr) +{ +#if PX_P64_FAMILY + return hash(uint64_t(ptr)); +#else + return hash(uint32_t(UINT32_MAX & size_t(ptr))); +#endif +} + +// Hash function for pairs +template <typename F, typename S> +PX_INLINE uint32_t hash(const Pair<F, S>& p) +{ + uint32_t seed = 0x876543; + uint32_t m = 1000007; + return hash(p.second) ^ (m * (hash(p.first) ^ (m * seed))); +} + +// hash object for hash map template parameter +template <class Key> +struct Hash +{ + uint32_t operator()(const Key& k) const + { + return hash(k); + } + bool equal(const Key& k0, const Key& k1) const + { + return k0 == k1; + } +}; + +// specialization for strings +template <> +struct Hash<const char*> +{ + public: + uint32_t operator()(const char* _string) const + { + // "DJB" string hash + const uint8_t* string = reinterpret_cast<const uint8_t*>(_string); + uint32_t h = 5381; + for(const uint8_t* ptr = string; *ptr; ptr++) + h = ((h << 5) + h) ^ uint32_t(*ptr); + return h; + } + bool equal(const char* string0, const char* string1) const + { + return !strcmp(string0, string1); + } +}; + +} // namespace shdfnd +} // namespace physx + +#if PX_VC +#pragma warning(pop) +#endif + +#endif // #ifndef PSFOUNDATION_PSHASH_H diff --git a/PxShared/src/foundation/include/PsHashInternals.h b/PxShared/src/foundation/include/PsHashInternals.h new file mode 100644 index 0000000..809baa3 --- /dev/null +++ b/PxShared/src/foundation/include/PsHashInternals.h @@ -0,0 +1,795 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSHASHINTERNALS_H +#define PSFOUNDATION_PSHASHINTERNALS_H + +#include "PsBasicTemplates.h" +#include "PsArray.h" +#include "PsBitUtils.h" +#include "PsHash.h" +#include "foundation/PxIntrinsics.h" + +#if PX_VC +#pragma warning(push) +#pragma warning(disable : 4127) // conditional expression is constant +#endif +namespace physx +{ +namespace shdfnd +{ +namespace internal +{ +template <class Entry, class Key, class HashFn, class GetKey, class Allocator, bool compacting> +class HashBase : private Allocator +{ + void init(uint32_t initialTableSize, float loadFactor) + { + mBuffer = NULL; + mEntries = NULL; + mEntriesNext = NULL; + mHash = NULL; + mEntriesCapacity = 0; + mHashSize = 0; + mLoadFactor = loadFactor; + mFreeList = uint32_t(EOL); + mTimestamp = 0; + mEntriesCount = 0; + + if(initialTableSize) + reserveInternal(initialTableSize); + } + + public: + typedef Entry EntryType; + + HashBase(uint32_t initialTableSize = 64, float loadFactor = 0.75f) : Allocator(PX_DEBUG_EXP("hashBase")) + { + init(initialTableSize, loadFactor); + } + + HashBase(uint32_t initialTableSize, float loadFactor, const Allocator& alloc) : Allocator(alloc) + { + init(initialTableSize, loadFactor); + } + + HashBase(const Allocator& alloc) : Allocator(alloc) + { + init(64, 0.75f); + } + + ~HashBase() + { + destroy(); // No need to clear() + + if(mBuffer) + Allocator::deallocate(mBuffer); + } + + static const uint32_t EOL = 0xffffffff; + + PX_INLINE Entry* create(const Key& k, bool& exists) + { + uint32_t h = 0; + if(mHashSize) + { + h = hash(k); + uint32_t index = mHash[h]; + while(index != EOL && !HashFn().equal(GetKey()(mEntries[index]), k)) + index = mEntriesNext[index]; + exists = index != EOL; + if(exists) + return mEntries + index; + } + else + exists = false; + + if(freeListEmpty()) + { + grow(); + h = hash(k); + } + + uint32_t entryIndex = freeListGetNext(); + + mEntriesNext[entryIndex] = mHash[h]; + mHash[h] = entryIndex; + + mEntriesCount++; + mTimestamp++; + + return mEntries + entryIndex; + } + + PX_INLINE const Entry* find(const Key& k) const + { + if(!mEntriesCount) + return NULL; + + const uint32_t h = hash(k); + uint32_t index = mHash[h]; + while(index != EOL && !HashFn().equal(GetKey()(mEntries[index]), k)) + index = mEntriesNext[index]; + return index != EOL ? mEntries + index : NULL; + } + + PX_INLINE bool erase(const Key& k, Entry& e) + { + if(!mEntriesCount) + return false; + + const uint32_t h = hash(k); + uint32_t* ptr = mHash + h; + while(*ptr != EOL && !HashFn().equal(GetKey()(mEntries[*ptr]), k)) + ptr = mEntriesNext + *ptr; + + if(*ptr == EOL) + return false; + + PX_PLACEMENT_NEW(&e, Entry)(mEntries[*ptr]); + + return eraseInternal(ptr); + } + + PX_INLINE bool erase(const Key& k) + { + if(!mEntriesCount) + return false; + + const uint32_t h = hash(k); + uint32_t* ptr = mHash + h; + while(*ptr != EOL && !HashFn().equal(GetKey()(mEntries[*ptr]), k)) + ptr = mEntriesNext + *ptr; + + if(*ptr == EOL) + return false; + + return eraseInternal(ptr); + } + + PX_INLINE uint32_t size() const + { + return mEntriesCount; + } + + PX_INLINE uint32_t capacity() const + { + return mHashSize; + } + + void clear() + { + if(!mHashSize || mEntriesCount == 0) + return; + + destroy(); + + intrinsics::memSet(mHash, EOL, mHashSize * sizeof(uint32_t)); + + const uint32_t sizeMinus1 = mEntriesCapacity - 1; + for(uint32_t i = 0; i < sizeMinus1; i++) + { + prefetchLine(mEntriesNext + i, 128); + mEntriesNext[i] = i + 1; + } + mEntriesNext[mEntriesCapacity - 1] = uint32_t(EOL); + mFreeList = 0; + mEntriesCount = 0; + } + + void reserve(uint32_t size) + { + if(size > mHashSize) + reserveInternal(size); + } + + PX_INLINE const Entry* getEntries() const + { + return mEntries; + } + + PX_INLINE Entry* insertUnique(const Key& k) + { + PX_ASSERT(find(k) == NULL); + uint32_t h = hash(k); + + uint32_t entryIndex = freeListGetNext(); + + mEntriesNext[entryIndex] = mHash[h]; + mHash[h] = entryIndex; + + mEntriesCount++; + mTimestamp++; + + return mEntries + entryIndex; + } + + private: + void destroy() + { + for(uint32_t i = 0; i < mHashSize; i++) + { + for(uint32_t j = mHash[i]; j != EOL; j = mEntriesNext[j]) + mEntries[j].~Entry(); + } + } + + template <typename HK, typename GK, class A, bool comp> + PX_NOINLINE void copy(const HashBase<Entry, Key, HK, GK, A, comp>& other); + + // free list management - if we're coalescing, then we use mFreeList to hold + // the top of the free list and it should always be equal to size(). Otherwise, + // we build a free list in the next() pointers. + + PX_INLINE void freeListAdd(uint32_t index) + { + if(compacting) + { + mFreeList--; + PX_ASSERT(mFreeList == mEntriesCount); + } + else + { + mEntriesNext[index] = mFreeList; + mFreeList = index; + } + } + + PX_INLINE void freeListAdd(uint32_t start, uint32_t end) + { + if(!compacting) + { + for(uint32_t i = start; i < end - 1; i++) // add the new entries to the free list + mEntriesNext[i] = i + 1; + + // link in old free list + mEntriesNext[end - 1] = mFreeList; + PX_ASSERT(mFreeList != end - 1); + mFreeList = start; + } + else if(mFreeList == EOL) // don't reset the free ptr for the compacting hash unless it's empty + mFreeList = start; + } + + PX_INLINE uint32_t freeListGetNext() + { + PX_ASSERT(!freeListEmpty()); + if(compacting) + { + PX_ASSERT(mFreeList == mEntriesCount); + return mFreeList++; + } + else + { + uint32_t entryIndex = mFreeList; + mFreeList = mEntriesNext[mFreeList]; + return entryIndex; + } + } + + PX_INLINE bool freeListEmpty() const + { + if(compacting) + return mEntriesCount == mEntriesCapacity; + else + return mFreeList == EOL; + } + + PX_INLINE void replaceWithLast(uint32_t index) + { + PX_PLACEMENT_NEW(mEntries + index, Entry)(mEntries[mEntriesCount]); + mEntries[mEntriesCount].~Entry(); + mEntriesNext[index] = mEntriesNext[mEntriesCount]; + + uint32_t h = hash(GetKey()(mEntries[index])); + uint32_t* ptr; + for(ptr = mHash + h; *ptr != mEntriesCount; ptr = mEntriesNext + *ptr) + PX_ASSERT(*ptr != EOL); + *ptr = index; + } + + PX_INLINE uint32_t hash(const Key& k, uint32_t hashSize) const + { + return HashFn()(k) & (hashSize - 1); + } + + PX_INLINE uint32_t hash(const Key& k) const + { + return hash(k, mHashSize); + } + + PX_INLINE bool eraseInternal(uint32_t* ptr) + { + const uint32_t index = *ptr; + + *ptr = mEntriesNext[index]; + + mEntries[index].~Entry(); + + mEntriesCount--; + mTimestamp++; + + if (compacting && index != mEntriesCount) + replaceWithLast(index); + + freeListAdd(index); + return true; + } + + void reserveInternal(uint32_t size) + { + if(!isPowerOfTwo(size)) + size = nextPowerOfTwo(size); + + PX_ASSERT(!(size & (size - 1))); + + // decide whether iteration can be done on the entries directly + bool resizeCompact = compacting || freeListEmpty(); + + // define new table sizes + uint32_t oldEntriesCapacity = mEntriesCapacity; + uint32_t newEntriesCapacity = uint32_t(float(size) * mLoadFactor); + uint32_t newHashSize = size; + + // allocate new common buffer and setup pointers to new tables + uint8_t* newBuffer; + uint32_t* newHash; + uint32_t* newEntriesNext; + Entry* newEntries; + { + uint32_t newHashByteOffset = 0; + uint32_t newEntriesNextBytesOffset = newHashByteOffset + newHashSize * sizeof(uint32_t); + uint32_t newEntriesByteOffset = newEntriesNextBytesOffset + newEntriesCapacity * sizeof(uint32_t); + newEntriesByteOffset += (16 - (newEntriesByteOffset & 15)) & 15; + uint32_t newBufferByteSize = newEntriesByteOffset + newEntriesCapacity * sizeof(Entry); + + newBuffer = reinterpret_cast<uint8_t*>(Allocator::allocate(newBufferByteSize, __FILE__, __LINE__)); + PX_ASSERT(newBuffer); + + newHash = reinterpret_cast<uint32_t*>(newBuffer + newHashByteOffset); + newEntriesNext = reinterpret_cast<uint32_t*>(newBuffer + newEntriesNextBytesOffset); + newEntries = reinterpret_cast<Entry*>(newBuffer + newEntriesByteOffset); + } + + // initialize new hash table + intrinsics::memSet(newHash, uint32_t(EOL), newHashSize * sizeof(uint32_t)); + + // iterate over old entries, re-hash and create new entries + if(resizeCompact) + { + // check that old free list is empty - we don't need to copy the next entries + PX_ASSERT(compacting || mFreeList == EOL); + + for(uint32_t index = 0; index < mEntriesCount; ++index) + { + uint32_t h = hash(GetKey()(mEntries[index]), newHashSize); + newEntriesNext[index] = newHash[h]; + newHash[h] = index; + + PX_PLACEMENT_NEW(newEntries + index, Entry)(mEntries[index]); + mEntries[index].~Entry(); + } + } + else + { + // copy old free list, only required for non compact resizing + intrinsics::memCopy(newEntriesNext, mEntriesNext, mEntriesCapacity * sizeof(uint32_t)); + + for(uint32_t bucket = 0; bucket < mHashSize; bucket++) + { + uint32_t index = mHash[bucket]; + while(index != EOL) + { + uint32_t h = hash(GetKey()(mEntries[index]), newHashSize); + newEntriesNext[index] = newHash[h]; + PX_ASSERT(index != newHash[h]); + + newHash[h] = index; + + PX_PLACEMENT_NEW(newEntries + index, Entry)(mEntries[index]); + mEntries[index].~Entry(); + + index = mEntriesNext[index]; + } + } + } + + // swap buffer and pointers + Allocator::deallocate(mBuffer); + mBuffer = newBuffer; + mHash = newHash; + mHashSize = newHashSize; + mEntriesNext = newEntriesNext; + mEntries = newEntries; + mEntriesCapacity = newEntriesCapacity; + + freeListAdd(oldEntriesCapacity, newEntriesCapacity); + } + + void grow() + { + PX_ASSERT((mFreeList == EOL) || (compacting && (mEntriesCount == mEntriesCapacity))); + + uint32_t size = mHashSize == 0 ? 16 : mHashSize * 2; + reserve(size); + } + + uint8_t* mBuffer; + Entry* mEntries; + uint32_t* mEntriesNext; // same size as mEntries + uint32_t* mHash; + uint32_t mEntriesCapacity; + uint32_t mHashSize; + float mLoadFactor; + uint32_t mFreeList; + uint32_t mTimestamp; + uint32_t mEntriesCount; // number of entries + + public: + class Iter + { + public: + PX_INLINE Iter(HashBase& b) : mBucket(0), mEntry(uint32_t(b.EOL)), mTimestamp(b.mTimestamp), mBase(b) + { + if(mBase.mEntriesCapacity > 0) + { + mEntry = mBase.mHash[0]; + skip(); + } + } + + PX_INLINE void check() const + { + PX_ASSERT(mTimestamp == mBase.mTimestamp); + } + PX_INLINE const Entry& operator*() const + { + check(); + return mBase.mEntries[mEntry]; + } + PX_INLINE Entry& operator*() + { + check(); + return mBase.mEntries[mEntry]; + } + PX_INLINE const Entry* operator->() const + { + check(); + return mBase.mEntries + mEntry; + } + PX_INLINE Entry* operator->() + { + check(); + return mBase.mEntries + mEntry; + } + PX_INLINE Iter operator++() + { + check(); + advance(); + return *this; + } + PX_INLINE Iter operator++(int) + { + check(); + Iter i = *this; + advance(); + return i; + } + PX_INLINE bool done() const + { + check(); + return mEntry == mBase.EOL; + } + + private: + PX_INLINE void advance() + { + mEntry = mBase.mEntriesNext[mEntry]; + skip(); + } + PX_INLINE void skip() + { + while(mEntry == mBase.EOL) + { + if(++mBucket == mBase.mHashSize) + break; + mEntry = mBase.mHash[mBucket]; + } + } + + Iter& operator=(const Iter&); + + uint32_t mBucket; + uint32_t mEntry; + uint32_t mTimestamp; + HashBase& mBase; + }; + + /*! + Iterate over entries in a hash base and allow entry erase while iterating + */ + class EraseIterator + { + public: + PX_INLINE EraseIterator(HashBase& b): mBase(b) + { + reset(); + } + + PX_INLINE Entry* eraseCurrentGetNext(bool eraseCurrent) + { + if(eraseCurrent && mCurrentEntryIndexPtr) + { + mBase.eraseInternal(mCurrentEntryIndexPtr); + // if next was valid return the same ptr, if next was EOL search new hash entry + if(*mCurrentEntryIndexPtr != mBase.EOL) + return mBase.mEntries + *mCurrentEntryIndexPtr; + else + return traverseHashEntries(); + } + + // traverse mHash to find next entry + if(mCurrentEntryIndexPtr == NULL) + return traverseHashEntries(); + + const uint32_t index = *mCurrentEntryIndexPtr; + if(mBase.mEntriesNext[index] == mBase.EOL) + { + return traverseHashEntries(); + } + else + { + mCurrentEntryIndexPtr = mBase.mEntriesNext + index; + return mBase.mEntries + *mCurrentEntryIndexPtr; + } + } + + PX_INLINE void reset() + { + mCurrentHashIndex = 0; + mCurrentEntryIndexPtr = NULL; + } + + private: + PX_INLINE Entry* traverseHashEntries() + { + mCurrentEntryIndexPtr = NULL; + while (mCurrentEntryIndexPtr == NULL && mCurrentHashIndex < mBase.mHashSize) + { + if (mBase.mHash[mCurrentHashIndex] != mBase.EOL) + { + mCurrentEntryIndexPtr = mBase.mHash + mCurrentHashIndex; + mCurrentHashIndex++; + return mBase.mEntries + *mCurrentEntryIndexPtr; + } + else + { + mCurrentHashIndex++; + } + } + return NULL; + } + + EraseIterator& operator=(const EraseIterator&); + private: + uint32_t* mCurrentEntryIndexPtr; + uint32_t mCurrentHashIndex; + HashBase& mBase; + }; +}; + +template <class Entry, class Key, class HashFn, class GetKey, class Allocator, bool compacting> +template <typename HK, typename GK, class A, bool comp> +PX_NOINLINE void +HashBase<Entry, Key, HashFn, GetKey, Allocator, compacting>::copy(const HashBase<Entry, Key, HK, GK, A, comp>& other) +{ + reserve(other.mEntriesCount); + + for(uint32_t i = 0; i < other.mEntriesCount; i++) + { + for(uint32_t j = other.mHash[i]; j != EOL; j = other.mEntriesNext[j]) + { + const Entry& otherEntry = other.mEntries[j]; + + bool exists; + Entry* newEntry = create(GK()(otherEntry), exists); + PX_ASSERT(!exists); + + PX_PLACEMENT_NEW(newEntry, Entry)(otherEntry); + } + } +} + +template <class Key, class HashFn, class Allocator = typename AllocatorTraits<Key>::Type, bool Coalesced = false> +class HashSetBase +{ + PX_NOCOPY(HashSetBase) + public: + struct GetKey + { + PX_INLINE const Key& operator()(const Key& e) + { + return e; + } + }; + + typedef HashBase<Key, Key, HashFn, GetKey, Allocator, Coalesced> BaseMap; + typedef typename BaseMap::Iter Iterator; + + HashSetBase(uint32_t initialTableSize, float loadFactor, const Allocator& alloc) + : mBase(initialTableSize, loadFactor, alloc) + { + } + + HashSetBase(const Allocator& alloc) : mBase(64, 0.75f, alloc) + { + } + + HashSetBase(uint32_t initialTableSize = 64, float loadFactor = 0.75f) : mBase(initialTableSize, loadFactor) + { + } + + bool insert(const Key& k) + { + bool exists; + Key* e = mBase.create(k, exists); + if(!exists) + PX_PLACEMENT_NEW(e, Key)(k); + return !exists; + } + + PX_INLINE bool contains(const Key& k) const + { + return mBase.find(k) != 0; + } + PX_INLINE bool erase(const Key& k) + { + return mBase.erase(k); + } + PX_INLINE uint32_t size() const + { + return mBase.size(); + } + PX_INLINE uint32_t capacity() const + { + return mBase.capacity(); + } + PX_INLINE void reserve(uint32_t size) + { + mBase.reserve(size); + } + PX_INLINE void clear() + { + mBase.clear(); + } + + protected: + BaseMap mBase; +}; + +template <class Key, class Value, class HashFn, class Allocator = typename AllocatorTraits<Pair<const Key, Value> >::Type> +class HashMapBase +{ + PX_NOCOPY(HashMapBase) + public: + typedef Pair<const Key, Value> Entry; + + struct GetKey + { + PX_INLINE const Key& operator()(const Entry& e) + { + return e.first; + } + }; + + typedef HashBase<Entry, Key, HashFn, GetKey, Allocator, true> BaseMap; + typedef typename BaseMap::Iter Iterator; + typedef typename BaseMap::EraseIterator EraseIterator; + + HashMapBase(uint32_t initialTableSize, float loadFactor, const Allocator& alloc) + : mBase(initialTableSize, loadFactor, alloc) + { + } + + HashMapBase(const Allocator& alloc) : mBase(64, 0.75f, alloc) + { + } + + HashMapBase(uint32_t initialTableSize = 64, float loadFactor = 0.75f) : mBase(initialTableSize, loadFactor) + { + } + + bool insert(const Key /*&*/ k, const Value /*&*/ v) + { + bool exists; + Entry* e = mBase.create(k, exists); + if(!exists) + PX_PLACEMENT_NEW(e, Entry)(k, v); + return !exists; + } + + Value& operator[](const Key& k) + { + bool exists; + Entry* e = mBase.create(k, exists); + if(!exists) + PX_PLACEMENT_NEW(e, Entry)(k, Value()); + + return e->second; + } + + PX_INLINE const Entry* find(const Key& k) const + { + return mBase.find(k); + } + PX_INLINE bool erase(const Key& k) + { + return mBase.erase(k); + } + PX_INLINE bool erase(const Key& k, Entry& e) + { + return mBase.erase(k, e); + } + PX_INLINE uint32_t size() const + { + return mBase.size(); + } + PX_INLINE uint32_t capacity() const + { + return mBase.capacity(); + } + PX_INLINE Iterator getIterator() + { + return Iterator(mBase); + } + PX_INLINE EraseIterator getEraseIterator() + { + return EraseIterator(mBase); + } + PX_INLINE void reserve(uint32_t size) + { + mBase.reserve(size); + } + PX_INLINE void clear() + { + mBase.clear(); + } + + protected: + BaseMap mBase; +}; +} + +} // namespace shdfnd +} // namespace physx + +#if PX_VC +#pragma warning(pop) +#endif +#endif // #ifndef PSFOUNDATION_PSHASHINTERNALS_H diff --git a/PxShared/src/foundation/include/PsHashMap.h b/PxShared/src/foundation/include/PsHashMap.h new file mode 100644 index 0000000..5091dee --- /dev/null +++ b/PxShared/src/foundation/include/PsHashMap.h @@ -0,0 +1,118 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSHASHMAP_H +#define PSFOUNDATION_PSHASHMAP_H + +#include "PsHashInternals.h" + +// TODO: make this doxy-format +// +// This header defines two hash maps. Hash maps +// * support custom initial table sizes (rounded up internally to power-of-2) +// * support custom static allocator objects +// * auto-resize, based on a load factor (i.e. a 64-entry .75 load factor hash will resize +// when the 49th element is inserted) +// * are based on open hashing +// * have O(1) contains, erase +// +// Maps have STL-like copying semantics, and properly initialize and destruct copies of objects +// +// There are two forms of map: coalesced and uncoalesced. Coalesced maps keep the entries in the +// initial segment of an array, so are fast to iterate over; however deletion is approximately +// twice as expensive. +// +// HashMap<T>: +// bool insert(const Key& k, const Value& v) O(1) amortized (exponential resize policy) +// Value & operator[](const Key& k) O(1) for existing objects, else O(1) amortized +// const Entry * find(const Key& k); O(1) +// bool erase(const T& k); O(1) +// uint32_t size(); constant +// void reserve(uint32_t size); O(MAX(currentOccupancy,size)) +// void clear(); O(currentOccupancy) (with zero constant for objects +// without +// destructors) +// Iterator getIterator(); +// +// operator[] creates an entry if one does not exist, initializing with the default constructor. +// CoalescedHashMap<T> does not support getIterator, but instead supports +// const Key *getEntries(); +// +// Use of iterators: +// +// for(HashMap::Iterator iter = test.getIterator(); !iter.done(); ++iter) +// myFunction(iter->first, iter->second); + +namespace physx +{ +namespace shdfnd +{ +template <class Key, class Value, class HashFn = Hash<Key>, class Allocator = NonTrackingAllocator> +class HashMap : public internal::HashMapBase<Key, Value, HashFn, Allocator> +{ + public: + typedef internal::HashMapBase<Key, Value, HashFn, Allocator> HashMapBase; + typedef typename HashMapBase::Iterator Iterator; + + HashMap(uint32_t initialTableSize = 64, float loadFactor = 0.75f) : HashMapBase(initialTableSize, loadFactor) + { + } + HashMap(uint32_t initialTableSize, float loadFactor, const Allocator& alloc) + : HashMapBase(initialTableSize, loadFactor, alloc) + { + } + HashMap(const Allocator& alloc) : HashMapBase(64, 0.75f, alloc) + { + } + Iterator getIterator() + { + return Iterator(HashMapBase::mBase); + } +}; + +template <class Key, class Value, class HashFn = Hash<Key>, class Allocator = NonTrackingAllocator> +class CoalescedHashMap : public internal::HashMapBase<Key, Value, HashFn, Allocator> +{ + public: + typedef internal::HashMapBase<Key, Value, HashFn, Allocator> HashMapBase; + + CoalescedHashMap(uint32_t initialTableSize = 64, float loadFactor = 0.75f) + : HashMapBase(initialTableSize, loadFactor) + { + } + const Pair<const Key, Value>* getEntries() const + { + return HashMapBase::mBase.getEntries(); + } +}; + +} // namespace shdfnd +} // namespace physx + +#endif // #ifndef PSFOUNDATION_PSHASHMAP_H diff --git a/PxShared/src/foundation/include/PsHashSet.h b/PxShared/src/foundation/include/PsHashSet.h new file mode 100644 index 0000000..195f01d --- /dev/null +++ b/PxShared/src/foundation/include/PsHashSet.h @@ -0,0 +1,127 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSHASHSET_H +#define PSFOUNDATION_PSHASHSET_H + +#include "PsHashInternals.h" + +// TODO: make this doxy-format + +// This header defines two hash sets. Hash sets +// * support custom initial table sizes (rounded up internally to power-of-2) +// * support custom static allocator objects +// * auto-resize, based on a load factor (i.e. a 64-entry .75 load factor hash will resize +// when the 49th element is inserted) +// * are based on open hashing +// +// Sets have STL-like copying semantics, and properly initialize and destruct copies of objects +// +// There are two forms of set: coalesced and uncoalesced. Coalesced sets keep the entries in the +// initial segment of an array, so are fast to iterate over; however deletion is approximately +// twice as expensive. +// +// HashSet<T>: +// bool insert(const T& k) amortized O(1) (exponential resize policy) +// bool contains(const T& k) const; O(1) +// bool erase(const T& k); O(1) +// uint32_t size() const; constant +// void reserve(uint32_t size); O(MAX(size, currentOccupancy)) +// void clear(); O(currentOccupancy) (with zero constant for objects without +// destructors) +// Iterator getIterator(); +// +// Use of iterators: +// +// for(HashSet::Iterator iter = test.getIterator(); !iter.done(); ++iter) +// myFunction(*iter); +// +// CoalescedHashSet<T> does not support getIterator, but instead supports +// const Key *getEntries(); +// +// insertion into a set already containing the element fails returning false, as does +// erasure of an element not in the set +// + +namespace physx +{ +namespace shdfnd +{ +template <class Key, class HashFn = Hash<Key>, class Allocator = NonTrackingAllocator> +class HashSet : public internal::HashSetBase<Key, HashFn, Allocator, false> +{ + public: + typedef internal::HashSetBase<Key, HashFn, Allocator, false> HashSetBase; + typedef typename HashSetBase::Iterator Iterator; + + HashSet(uint32_t initialTableSize = 64, float loadFactor = 0.75f) : HashSetBase(initialTableSize, loadFactor) + { + } + HashSet(uint32_t initialTableSize, float loadFactor, const Allocator& alloc) + : HashSetBase(initialTableSize, loadFactor, alloc) + { + } + HashSet(const Allocator& alloc) : HashSetBase(64, 0.75f, alloc) + { + } + Iterator getIterator() + { + return Iterator(HashSetBase::mBase); + } +}; + +template <class Key, class HashFn = Hash<Key>, class Allocator = NonTrackingAllocator> +class CoalescedHashSet : public internal::HashSetBase<Key, HashFn, Allocator, true> +{ + public: + typedef typename internal::HashSetBase<Key, HashFn, Allocator, true> HashSetBase; + + CoalescedHashSet(uint32_t initialTableSize = 64, float loadFactor = 0.75f) + : HashSetBase(initialTableSize, loadFactor) + { + } + + CoalescedHashSet(uint32_t initialTableSize, float loadFactor, const Allocator& alloc) + : HashSetBase(initialTableSize, loadFactor, alloc) + { + } + CoalescedHashSet(const Allocator& alloc) : HashSetBase(64, 0.75f, alloc) + { + } + + const Key* getEntries() const + { + return HashSetBase::mBase.getEntries(); + } +}; + +} // namespace shdfnd +} // namespace physx + +#endif // #ifndef PSFOUNDATION_PSHASHSET_H diff --git a/PxShared/src/foundation/include/PsInlineAllocator.h b/PxShared/src/foundation/include/PsInlineAllocator.h new file mode 100644 index 0000000..f87abd6 --- /dev/null +++ b/PxShared/src/foundation/include/PsInlineAllocator.h @@ -0,0 +1,91 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSINLINEALLOCATOR_H +#define PSFOUNDATION_PSINLINEALLOCATOR_H + +#include "PsUserAllocated.h" + +namespace physx +{ +namespace shdfnd +{ +// this is used by the array class to allocate some space for a small number +// of objects along with the metadata +template <uint32_t N, typename BaseAllocator> +class InlineAllocator : private BaseAllocator +{ + public: + InlineAllocator(const PxEMPTY v) : BaseAllocator(v) + { + } + + InlineAllocator(const BaseAllocator& alloc = BaseAllocator()) : BaseAllocator(alloc), mBufferUsed(false) + { + } + + InlineAllocator(const InlineAllocator& aloc) : BaseAllocator(aloc), mBufferUsed(false) + { + } + + void* allocate(uint32_t size, const char* filename, int line) + { + if(!mBufferUsed && size <= N) + { + mBufferUsed = true; + return mBuffer; + } + return BaseAllocator::allocate(size, filename, line); + } + + void deallocate(void* ptr) + { + if(ptr == mBuffer) + mBufferUsed = false; + else + BaseAllocator::deallocate(ptr); + } + + PX_FORCE_INLINE uint8_t* getInlineBuffer() + { + return mBuffer; + } + PX_FORCE_INLINE bool isBufferUsed() const + { + return mBufferUsed; + } + + protected: + uint8_t mBuffer[N]; + bool mBufferUsed; +}; +} // namespace shdfnd +} // namespace physx + +#endif // #ifndef PSFOUNDATION_PSINLINEALLOCATOR_H diff --git a/PxShared/src/foundation/include/PsInlineAoS.h b/PxShared/src/foundation/include/PsInlineAoS.h new file mode 100644 index 0000000..6d43607 --- /dev/null +++ b/PxShared/src/foundation/include/PsInlineAoS.h @@ -0,0 +1,48 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSINLINEAOS_H +#define PSFOUNDATION_PSINLINEAOS_H + +#include "foundation/PxPreprocessor.h" + +#if PX_WINDOWS +#include "windows/PsWindowsTrigConstants.h" +#include "windows/PsWindowsInlineAoS.h" +#elif(PX_UNIX_FAMILY || PX_PS4 || PX_NX) +#include "unix/PsUnixTrigConstants.h" +#include "unix/PsUnixInlineAoS.h" +#elif PX_XBOXONE +#include "XboxOne/PsXboxOneTrigConstants.h" +#include "XboxOne/PsXboxOneInlineAoS.h" +#else +#error "Platform not supported!" +#endif + +#endif diff --git a/PxShared/src/foundation/include/PsInlineArray.h b/PxShared/src/foundation/include/PsInlineArray.h new file mode 100644 index 0000000..e9ea939 --- /dev/null +++ b/PxShared/src/foundation/include/PsInlineArray.h @@ -0,0 +1,68 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSINLINEARRAY_H +#define PSFOUNDATION_PSINLINEARRAY_H + +#include "PsArray.h" +#include "PsInlineAllocator.h" + +namespace physx +{ +namespace shdfnd +{ + +// array that pre-allocates for N elements +template <typename T, uint32_t N, typename Alloc = typename AllocatorTraits<T>::Type> +class InlineArray : public Array<T, InlineAllocator<N * sizeof(T), Alloc> > +{ + typedef InlineAllocator<N * sizeof(T), Alloc> Allocator; + + public: + InlineArray(const PxEMPTY v) : Array<T, Allocator>(v) + { + if(isInlined()) + this->mData = reinterpret_cast<T*>(Array<T, Allocator>::getInlineBuffer()); + } + + PX_INLINE bool isInlined() const + { + return Allocator::isBufferUsed(); + } + + PX_INLINE explicit InlineArray(const Alloc& alloc = Alloc()) : Array<T, Allocator>(alloc) + { + this->mData = this->allocate(N); + this->mCapacity = N; + } +}; +} // namespace shdfnd +} // namespace physx + +#endif // #ifndef PSFOUNDATION_PSINLINEARRAY_H diff --git a/PxShared/src/foundation/include/PsIntrinsics.h b/PxShared/src/foundation/include/PsIntrinsics.h new file mode 100644 index 0000000..1e1b9d1 --- /dev/null +++ b/PxShared/src/foundation/include/PsIntrinsics.h @@ -0,0 +1,47 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSINTRINSICS_H +#define PSFOUNDATION_PSINTRINSICS_H + +#include "foundation/PxPreprocessor.h" + +#if PX_WINDOWS_FAMILY +#include "windows/PsWindowsIntrinsics.h" +#elif(PX_LINUX || PX_ANDROID || PX_APPLE_FAMILY || PX_PS4) +#include "unix/PsUnixIntrinsics.h" +#elif PX_XBOXONE +#include "XboxOne/PsXboxOneIntrinsics.h" +#elif PX_NX +#include "nx/PsNXIntrinsics.h" +#else +#error "Platform not supported!" +#endif + +#endif // #ifndef PSFOUNDATION_PSINTRINSICS_H diff --git a/PxShared/src/foundation/include/PsMathUtils.h b/PxShared/src/foundation/include/PsMathUtils.h new file mode 100644 index 0000000..794419b --- /dev/null +++ b/PxShared/src/foundation/include/PsMathUtils.h @@ -0,0 +1,697 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSMATHUTILS_H +#define PSFOUNDATION_PSMATHUTILS_H + +#include "foundation/PxPreprocessor.h" +#include "foundation/PxTransform.h" +#include "foundation/PxMat33.h" +#include "Ps.h" +#include "PsIntrinsics.h" + +// General guideline is: if it's an abstract math function, it belongs here. +// If it's a math function where the inputs have specific semantics (e.g. +// separateSwingTwist) it doesn't. + +namespace physx +{ +namespace shdfnd +{ +/** +\brief sign returns the sign of its argument. The sign of zero is undefined. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF32 sign(const PxF32 a) +{ + return intrinsics::sign(a); +} + +/** +\brief sign returns the sign of its argument. The sign of zero is undefined. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF64 sign(const PxF64 a) +{ + return (a >= 0.0) ? 1.0 : -1.0; +} + +/** +\brief sign returns the sign of its argument. The sign of zero is undefined. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxI32 sign(const PxI32 a) +{ + return (a >= 0) ? 1 : -1; +} + +/** +\brief Returns true if the two numbers are within eps of each other. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE bool equals(const PxF32 a, const PxF32 b, const PxF32 eps) +{ + return (PxAbs(a - b) < eps); +} + +/** +\brief Returns true if the two numbers are within eps of each other. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE bool equals(const PxF64 a, const PxF64 b, const PxF64 eps) +{ + return (PxAbs(a - b) < eps); +} + +/** +\brief The floor function returns a floating-point value representing the largest integer that is less than or equal to +x. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF32 floor(const PxF32 a) +{ + return floatFloor(a); +} + +/** +\brief The floor function returns a floating-point value representing the largest integer that is less than or equal to +x. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF64 floor(const PxF64 a) +{ + return ::floor(a); +} + +/** +\brief The ceil function returns a single value representing the smallest integer that is greater than or equal to x. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF32 ceil(const PxF32 a) +{ + return ::ceilf(a); +} + +/** +\brief The ceil function returns a double value representing the smallest integer that is greater than or equal to x. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF64 ceil(const PxF64 a) +{ + return ::ceil(a); +} + +/** +\brief mod returns the floating-point remainder of x / y. + +If the value of y is 0.0, mod returns a quiet NaN. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF32 mod(const PxF32 x, const PxF32 y) +{ + return PxF32(::fmodf(x, y)); +} + +/** +\brief mod returns the floating-point remainder of x / y. + +If the value of y is 0.0, mod returns a quiet NaN. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF64 mod(const PxF64 x, const PxF64 y) +{ + return ::fmod(x, y); +} + +/** +\brief Square. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF32 sqr(const PxF32 a) +{ + return a * a; +} + +/** +\brief Square. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF64 sqr(const PxF64 a) +{ + return a * a; +} + +/** +\brief Calculates x raised to the power of y. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF32 pow(const PxF32 x, const PxF32 y) +{ + return ::powf(x, y); +} + +/** +\brief Calculates x raised to the power of y. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF64 pow(const PxF64 x, const PxF64 y) +{ + return ::pow(x, y); +} + +/** +\brief Calculates e^n +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF32 exp(const PxF32 a) +{ + return ::expf(a); +} +/** + +\brief Calculates e^n +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF64 exp(const PxF64 a) +{ + return ::exp(a); +} + +/** +\brief Calculates 2^n +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF32 exp2(const PxF32 a) +{ + return ::expf(a * 0.693147180559945309417f); +} +/** + +\brief Calculates 2^n +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF64 exp2(const PxF64 a) +{ + return ::exp(a * 0.693147180559945309417); +} + +/** +\brief Calculates logarithms. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF32 logE(const PxF32 a) +{ + return ::logf(a); +} + +/** +\brief Calculates logarithms. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF64 logE(const PxF64 a) +{ + return ::log(a); +} + +/** +\brief Calculates logarithms. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF32 log2(const PxF32 a) +{ + return ::logf(a) / 0.693147180559945309417f; +} + +/** +\brief Calculates logarithms. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF64 log2(const PxF64 a) +{ + return ::log(a) / 0.693147180559945309417; +} + +/** +\brief Calculates logarithms. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF32 log10(const PxF32 a) +{ + return ::log10f(a); +} + +/** +\brief Calculates logarithms. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF64 log10(const PxF64 a) +{ + return ::log10(a); +} + +/** +\brief Converts degrees to radians. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF32 degToRad(const PxF32 a) +{ + return 0.01745329251994329547f * a; +} + +/** +\brief Converts degrees to radians. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF64 degToRad(const PxF64 a) +{ + return 0.01745329251994329547 * a; +} + +/** +\brief Converts radians to degrees. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF32 radToDeg(const PxF32 a) +{ + return 57.29577951308232286465f * a; +} + +/** +\brief Converts radians to degrees. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF64 radToDeg(const PxF64 a) +{ + return 57.29577951308232286465 * a; +} + +//! \brief compute sine and cosine at the same time. There is a 'fsincos' on PC that we probably want to use here +PX_CUDA_CALLABLE PX_FORCE_INLINE void sincos(const PxF32 radians, PxF32& sin, PxF32& cos) +{ + /* something like: + _asm fld Local + _asm fsincos + _asm fstp LocalCos + _asm fstp LocalSin + */ + sin = PxSin(radians); + cos = PxCos(radians); +} + +/** +\brief uniform random number in [a,b] +*/ +PX_FORCE_INLINE PxI32 rand(const PxI32 a, const PxI32 b) +{ + return a + PxI32(::rand() % (b - a + 1)); +} + +/** +\brief uniform random number in [a,b] +*/ +PX_FORCE_INLINE PxF32 rand(const PxF32 a, const PxF32 b) +{ + return a + (b - a) * ::rand() / RAND_MAX; +} + +//! \brief return angle between two vectors in radians +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF32 angle(const PxVec3& v0, const PxVec3& v1) +{ + const PxF32 cos = v0.dot(v1); // |v0|*|v1|*Cos(Angle) + const PxF32 sin = (v0.cross(v1)).magnitude(); // |v0|*|v1|*Sin(Angle) + return PxAtan2(sin, cos); +} + +//! If possible use instead fsel on the dot product /*fsel(d.dot(p),onething,anotherthing);*/ +//! Compares orientations (more readable, user-friendly function) +PX_CUDA_CALLABLE PX_FORCE_INLINE bool sameDirection(const PxVec3& d, const PxVec3& p) +{ + return d.dot(p) >= 0.0f; +} + +//! Checks 2 values have different signs +PX_CUDA_CALLABLE PX_FORCE_INLINE IntBool differentSign(PxReal f0, PxReal f1) +{ +#if !PX_EMSCRIPTEN + union + { + PxU32 u; + PxReal f; + } u1, u2; + u1.f = f0; + u2.f = f1; + return IntBool((u1.u ^ u2.u) & PX_SIGN_BITMASK); +#else + // javascript floats are 64-bits... + return IntBool( (f0*f1) < 0.0f ); +#endif +} + +PX_CUDA_CALLABLE PX_FORCE_INLINE PxMat33 star(const PxVec3& v) +{ + return PxMat33(PxVec3(0, v.z, -v.y), PxVec3(-v.z, 0, v.x), PxVec3(v.y, -v.x, 0)); +} + +PX_CUDA_CALLABLE PX_INLINE PxVec3 log(const PxQuat& q) +{ + const PxReal s = q.getImaginaryPart().magnitude(); + if(s < 1e-12f) + return PxVec3(0.0f); + // force the half-angle to have magnitude <= pi/2 + PxReal halfAngle = q.w < 0 ? PxAtan2(-s, -q.w) : PxAtan2(s, q.w); + PX_ASSERT(halfAngle >= -PxPi / 2 && halfAngle <= PxPi / 2); + + return q.getImaginaryPart().getNormalized() * 2.f * halfAngle; +} + +PX_CUDA_CALLABLE PX_INLINE PxQuat exp(const PxVec3& v) +{ + const PxReal m = v.magnitudeSquared(); + return m < 1e-24f ? PxQuat(PxIdentity) : PxQuat(PxSqrt(m), v * PxRecipSqrt(m)); +} + +// quat to rotate v0 t0 v1 +PX_CUDA_CALLABLE PX_INLINE PxQuat rotationArc(const PxVec3& v0, const PxVec3& v1) +{ + const PxVec3 cross = v0.cross(v1); + const PxReal d = v0.dot(v1); + if(d <= -0.99999f) + return (PxAbs(v0.x) < 0.1f ? PxQuat(0.0f, v0.z, -v0.y, 0.0f) : PxQuat(v0.y, -v0.x, 0.0, 0.0)).getNormalized(); + + const PxReal s = PxSqrt((1 + d) * 2), r = 1 / s; + + return PxQuat(cross.x * r, cross.y * r, cross.z * r, s * 0.5f).getNormalized(); +} + +/** +\brief returns largest axis +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 largestAxis(const PxVec3& v) +{ + PxU32 m = PxU32(v.y > v.x ? 1 : 0); + return v.z > v[m] ? 2 : m; +} + +/** +\brief returns indices for the largest axis and 2 other axii +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 largestAxis(const PxVec3& v, PxU32& other1, PxU32& other2) +{ + if(v.x >= PxMax(v.y, v.z)) + { + other1 = 1; + other2 = 2; + return 0; + } + else if(v.y >= v.z) + { + other1 = 0; + other2 = 2; + return 1; + } + else + { + other1 = 0; + other2 = 1; + return 2; + } +} + +/** +\brief returns axis with smallest absolute value +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 closestAxis(const PxVec3& v) +{ + PxU32 m = PxU32(PxAbs(v.y) > PxAbs(v.x) ? 1 : 0); + return PxAbs(v.z) > PxAbs(v[m]) ? 2 : m; +} + +PX_CUDA_CALLABLE PX_INLINE PxU32 closestAxis(const PxVec3& v, PxU32& j, PxU32& k) +{ + // find largest 2D plane projection + const PxF32 absPx = PxAbs(v.x); + const PxF32 absNy = PxAbs(v.y); + const PxF32 absNz = PxAbs(v.z); + + PxU32 m = 0; // x biggest axis + j = 1; + k = 2; + if(absNy > absPx && absNy > absNz) + { + // y biggest + j = 2; + k = 0; + m = 1; + } + else if(absNz > absPx) + { + // z biggest + j = 0; + k = 1; + m = 2; + } + return m; +} + +/*! +Extend an edge along its length by a factor +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE void makeFatEdge(PxVec3& p0, PxVec3& p1, PxReal fatCoeff) +{ + PxVec3 delta = p1 - p0; + + const PxReal m = delta.magnitude(); + if(m > 0.0f) + { + delta *= fatCoeff / m; + p0 -= delta; + p1 += delta; + } +} + +//! Compute point as combination of barycentric coordinates +PX_CUDA_CALLABLE PX_FORCE_INLINE PxVec3 +computeBarycentricPoint(const PxVec3& p0, const PxVec3& p1, const PxVec3& p2, PxReal u, PxReal v) +{ + // This seems to confuse the compiler... + // return (1.0f - u - v)*p0 + u*p1 + v*p2; + const PxF32 w = 1.0f - u - v; + return PxVec3(w * p0.x + u * p1.x + v * p2.x, w * p0.y + u * p1.y + v * p2.y, w * p0.z + u * p1.z + v * p2.z); +} + +// generates a pair of quaternions (swing, twist) such that in = swing * twist, with +// swing.x = 0 +// twist.y = twist.z = 0, and twist is a unit quat +PX_FORCE_INLINE void separateSwingTwist(const PxQuat& q, PxQuat& swing, PxQuat& twist) +{ + twist = q.x != 0.0f ? PxQuat(q.x, 0, 0, q.w).getNormalized() : PxQuat(PxIdentity); + swing = q * twist.getConjugate(); +} + +// generate two tangent vectors to a given normal +PX_FORCE_INLINE void normalToTangents(const PxVec3& normal, PxVec3& tangent0, PxVec3& tangent1) +{ + tangent0 = PxAbs(normal.x) < 0.70710678f ? PxVec3(0, -normal.z, normal.y) : PxVec3(-normal.y, normal.x, 0); + tangent0.normalize(); + tangent1 = normal.cross(tangent0); +} + +// todo: what is this function doing? +PX_FOUNDATION_API PxQuat computeQuatFromNormal(const PxVec3& n); + +/** +\brief computes a oriented bounding box around the scaled basis. +\param basis Input = skewed basis, Output = (normalized) orthogonal basis. +\return Bounding box extent. +*/ +PX_FOUNDATION_API PxVec3 optimizeBoundingBox(PxMat33& basis); + +PX_FOUNDATION_API PxQuat slerp(const PxReal t, const PxQuat& left, const PxQuat& right); + +PX_CUDA_CALLABLE PX_INLINE PxVec3 ellipseClamp(const PxVec3& point, const PxVec3& radii) +{ + // This function need to be implemented in the header file because + // it is included in a spu shader program. + + // finds the closest point on the ellipse to a given point + + // (p.y, p.z) is the input point + // (e.y, e.z) are the radii of the ellipse + + // lagrange multiplier method with Newton/Halley hybrid root-finder. + // see http://www.geometrictools.com/Documentation/DistancePointToEllipse2.pdf + // for proof of Newton step robustness and initial estimate. + // Halley converges much faster but sometimes overshoots - when that happens we take + // a newton step instead + + // converges in 1-2 iterations where D&C works well, and it's good with 4 iterations + // with any ellipse that isn't completely crazy + + const PxU32 MAX_ITERATIONS = 20; + const PxReal convergenceThreshold = 1e-4f; + + // iteration requires first quadrant but we recover generality later + + PxVec3 q(0, PxAbs(point.y), PxAbs(point.z)); + const PxReal tinyEps = 1e-6f; // very close to minor axis is numerically problematic but trivial + if(radii.y >= radii.z) + { + if(q.z < tinyEps) + return PxVec3(0, point.y > 0 ? radii.y : -radii.y, 0); + } + else + { + if(q.y < tinyEps) + return PxVec3(0, 0, point.z > 0 ? radii.z : -radii.z); + } + + PxVec3 denom, e2 = radii.multiply(radii), eq = radii.multiply(q); + + // we can use any initial guess which is > maximum(-e.y^2,-e.z^2) and for which f(t) is > 0. + // this guess works well near the axes, but is weak along the diagonals. + + PxReal t = PxMax(eq.y - e2.y, eq.z - e2.z); + + for(PxU32 i = 0; i < MAX_ITERATIONS; i++) + { + denom = PxVec3(0, 1 / (t + e2.y), 1 / (t + e2.z)); + PxVec3 denom2 = eq.multiply(denom); + + PxVec3 fv = denom2.multiply(denom2); + PxReal f = fv.y + fv.z - 1; + + // although in exact arithmetic we are guaranteed f>0, we can get here + // on the first iteration via catastrophic cancellation if the point is + // very close to the origin. In that case we just behave as if f=0 + + if(f < convergenceThreshold) + return e2.multiply(point).multiply(denom); + + PxReal df = fv.dot(denom) * -2.0f; + t = t - f / df; + } + + // we didn't converge, so clamp what we have + PxVec3 r = e2.multiply(point).multiply(denom); + return r * PxRecipSqrt(sqr(r.y / radii.y) + sqr(r.z / radii.z)); +} + +PX_CUDA_CALLABLE PX_INLINE PxReal tanHalf(PxReal sin, PxReal cos) +{ + return sin / (1 + cos); +} + +PX_INLINE PxQuat quatFromTanQVector(const PxVec3& v) +{ + PxReal v2 = v.dot(v); + if(v2 < 1e-12f) + return PxQuat(PxIdentity); + PxReal d = 1 / (1 + v2); + return PxQuat(v.x * 2, v.y * 2, v.z * 2, 1 - v2) * d; +} + +PX_FORCE_INLINE PxVec3 cross100(const PxVec3& b) +{ + return PxVec3(0.0f, -b.z, b.y); +} +PX_FORCE_INLINE PxVec3 cross010(const PxVec3& b) +{ + return PxVec3(b.z, 0.0f, -b.x); +} +PX_FORCE_INLINE PxVec3 cross001(const PxVec3& b) +{ + return PxVec3(-b.y, b.x, 0.0f); +} + +PX_INLINE void decomposeVector(PxVec3& normalCompo, PxVec3& tangentCompo, const PxVec3& outwardDir, + const PxVec3& outwardNormal) +{ + normalCompo = outwardNormal * (outwardDir.dot(outwardNormal)); + tangentCompo = outwardDir - normalCompo; +} + +//! \brief Return (i+1)%3 +// Avoid variable shift for XBox: +// PX_INLINE PxU32 Ps::getNextIndex3(PxU32 i) { return (1<<i) & 3; } +PX_INLINE PxU32 getNextIndex3(PxU32 i) +{ + return (i + 1 + (i >> 1)) & 3; +} + +PX_INLINE PxMat33 rotFrom2Vectors(const PxVec3& from, const PxVec3& to) +{ + // See bottom of http://www.euclideanspace.com/maths/algebra/matrix/orthogonal/rotation/index.htm + + // Early exit if to = from + if((from - to).magnitudeSquared() < 1e-4f) + return PxMat33(PxIdentity); + + // Early exit if to = -from + if((from + to).magnitudeSquared() < 1e-4f) + return PxMat33::createDiagonal(PxVec3(1.0f, -1.0f, -1.0f)); + + PxVec3 n = from.cross(to); + + PxReal C = from.dot(to), S = PxSqrt(1 - C * C), CC = 1 - C; + + PxReal xx = n.x * n.x, yy = n.y * n.y, zz = n.z * n.z, xy = n.x * n.y, yz = n.y * n.z, xz = n.x * n.z; + + PxMat33 R; + + R(0, 0) = 1 + CC * (xx - 1); + R(0, 1) = -n.z * S + CC * xy; + R(0, 2) = n.y * S + CC * xz; + + R(1, 0) = n.z * S + CC * xy; + R(1, 1) = 1 + CC * (yy - 1); + R(1, 2) = -n.x * S + CC * yz; + + R(2, 0) = -n.y * S + CC * xz; + R(2, 1) = n.x * S + CC * yz; + R(2, 2) = 1 + CC * (zz - 1); + + return R; +} + +PX_FOUNDATION_API void integrateTransform(const PxTransform& curTrans, const PxVec3& linvel, const PxVec3& angvel, + PxReal timeStep, PxTransform& result); + +PX_INLINE void computeBasis(const PxVec3& dir, PxVec3& right, PxVec3& up) +{ + // Derive two remaining vectors + if(PxAbs(dir.y) <= 0.9999f) + { + right = PxVec3(dir.z, 0.0f, -dir.x); + right.normalize(); + + // PT: normalize not needed for 'up' because dir & right are unit vectors, + // and by construction the angle between them is 90 degrees (i.e. sin(angle)=1) + up = PxVec3(dir.y * right.z, dir.z * right.x - dir.x * right.z, -dir.y * right.x); + } + else + { + right = PxVec3(1.0f, 0.0f, 0.0f); + + up = PxVec3(0.0f, dir.z, -dir.y); + up.normalize(); + } +} + +PX_INLINE void computeBasis(const PxVec3& p0, const PxVec3& p1, PxVec3& dir, PxVec3& right, PxVec3& up) +{ + // Compute the new direction vector + dir = p1 - p0; + dir.normalize(); + + // Derive two remaining vectors + computeBasis(dir, right, up); +} + +PX_FORCE_INLINE bool isAlmostZero(const PxVec3& v) +{ + if(PxAbs(v.x) > 1e-6f || PxAbs(v.y) > 1e-6f || PxAbs(v.z) > 1e-6f) + return false; + return true; +} + +} // namespace shdfnd +} // namespace physx + +#endif diff --git a/PxShared/src/foundation/include/PsMutex.h b/PxShared/src/foundation/include/PsMutex.h new file mode 100644 index 0000000..7c93796 --- /dev/null +++ b/PxShared/src/foundation/include/PsMutex.h @@ -0,0 +1,330 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSMUTEX_H +#define PSFOUNDATION_PSMUTEX_H + +#include "PsAllocator.h" + +/* + * This <new> inclusion is a best known fix for gcc 4.4.1 error: + * Creating object file for apex/src/PsAllocator.cpp ... + * In file included from apex/include/PsFoundation.h:30, + * from apex/src/PsAllocator.cpp:26: + * apex/include/PsMutex.h: In constructor 'physx::shdfnd::MutexT<Alloc>::MutexT(const Alloc&)': + * apex/include/PsMutex.h:92: error: no matching function for call to 'operator new(unsigned int, + * physx::shdfnd::MutexImpl*&)' + * <built-in>:0: note: candidates are: void* operator new(unsigned int) + */ +#include <new> + +namespace physx +{ +namespace shdfnd +{ +class PX_FOUNDATION_API MutexImpl +{ + public: + /** + The constructor for Mutex creates a mutex. It is initially unlocked. + */ + MutexImpl(); + + /** + The destructor for Mutex deletes the mutex. + */ + ~MutexImpl(); + + /** + Acquire (lock) the mutex. If the mutex is already locked + by another thread, this method blocks until the mutex is + unlocked. + */ + void lock(); + + /** + Acquire (lock) the mutex. If the mutex is already locked + by another thread, this method returns false without blocking. + */ + bool trylock(); + + /** + Release (unlock) the mutex. + */ + void unlock(); + + /** + Size of this class. + */ + static const uint32_t& getSize(); +}; + +template <typename Alloc = ReflectionAllocator<MutexImpl> > +class MutexT : protected Alloc +{ + PX_NOCOPY(MutexT) + public: + class ScopedLock + { + MutexT<Alloc>& mMutex; + PX_NOCOPY(ScopedLock) + public: + PX_INLINE ScopedLock(MutexT<Alloc>& mutex) : mMutex(mutex) + { + mMutex.lock(); + } + PX_INLINE ~ScopedLock() + { + mMutex.unlock(); + } + }; + + /** + The constructor for Mutex creates a mutex. It is initially unlocked. + */ + MutexT(const Alloc& alloc = Alloc()) : Alloc(alloc) + { + mImpl = reinterpret_cast<MutexImpl*>(Alloc::allocate(MutexImpl::getSize(), __FILE__, __LINE__)); + PX_PLACEMENT_NEW(mImpl, MutexImpl)(); + } + + /** + The destructor for Mutex deletes the mutex. + */ + ~MutexT() + { + mImpl->~MutexImpl(); + Alloc::deallocate(mImpl); + } + + /** + Acquire (lock) the mutex. If the mutex is already locked + by another thread, this method blocks until the mutex is + unlocked. + */ + void lock() const + { + mImpl->lock(); + } + + /** + Acquire (lock) the mutex. If the mutex is already locked + by another thread, this method returns false without blocking, + returns true if lock is successfully acquired + */ + bool trylock() const + { + return mImpl->trylock(); + } + + /** + Release (unlock) the mutex, the calling thread must have + previously called lock() or method will error + */ + void unlock() const + { + mImpl->unlock(); + } + + private: + MutexImpl* mImpl; +}; + +class PX_FOUNDATION_API ReadWriteLock +{ + PX_NOCOPY(ReadWriteLock) + public: + ReadWriteLock(); + ~ReadWriteLock(); + + void lockReader(); + void lockWriter(); + + void unlockReader(); + void unlockWriter(); + + private: + class ReadWriteLockImpl* mImpl; +}; + +class ScopedReadLock +{ + PX_NOCOPY(ScopedReadLock) + public: + PX_INLINE ScopedReadLock(ReadWriteLock& lock) : mLock(lock) + { + mLock.lockReader(); + } + PX_INLINE ~ScopedReadLock() + { + mLock.unlockReader(); + } + + private: + ReadWriteLock& mLock; +}; + +class ScopedWriteLock +{ + PX_NOCOPY(ScopedWriteLock) + public: + PX_INLINE ScopedWriteLock(ReadWriteLock& lock) : mLock(lock) + { + mLock.lockWriter(); + } + PX_INLINE ~ScopedWriteLock() + { + mLock.unlockWriter(); + } + + private: + ReadWriteLock& mLock; +}; + +typedef MutexT<> Mutex; + +/* + * Use this type of lock for mutex behaviour that must operate on SPU and PPU + * On non-PS3 platforms, it is implemented using Mutex + */ +class AtomicLock +{ + Mutex mMutex; + PX_NOCOPY(AtomicLock) + + public: + AtomicLock() + { + } + + bool lock() + { + mMutex.lock(); + return true; + } + + bool trylock() + { + return mMutex.trylock(); + } + + bool unlock() + { + mMutex.unlock(); + return true; + } +}; + +class AtomicLockCopy +{ + AtomicLock* pLock; + + public: + AtomicLockCopy() : pLock(NULL) + { + } + + AtomicLockCopy& operator=(AtomicLock& lock) + { + pLock = &lock; + return *this; + } + + bool lock() + { + return pLock->lock(); + } + + bool trylock() + { + return pLock->trylock(); + } + + bool unlock() + { + return pLock->unlock(); + } +}; + +class AtomicRwLock +{ + ReadWriteLock m_Lock; + PX_NOCOPY(AtomicRwLock) + + public: + AtomicRwLock() + { + } + + void lockReader() + { + m_Lock.lockReader(); + } + void lockWriter() + { + m_Lock.lockWriter(); + } + + bool tryLockReader() + { + // Todo - implement this + m_Lock.lockReader(); + return true; + } + + void unlockReader() + { + m_Lock.unlockReader(); + } + void unlockWriter() + { + m_Lock.unlockWriter(); + } +}; + +class ScopedAtomicLock +{ + PX_INLINE ScopedAtomicLock(AtomicLock& lock) : mLock(lock) + { + mLock.lock(); + } + PX_INLINE ~ScopedAtomicLock() + { + mLock.unlock(); + } + + PX_NOCOPY(ScopedAtomicLock) + private: + AtomicLock& mLock; +}; + +} // namespace shdfnd +} // namespace physx + +#endif // #ifndef PSFOUNDATION_PSMUTEX_H diff --git a/PxShared/src/foundation/include/PsPool.h b/PxShared/src/foundation/include/PsPool.h new file mode 100644 index 0000000..796251a --- /dev/null +++ b/PxShared/src/foundation/include/PsPool.h @@ -0,0 +1,298 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSPOOL_H +#define PSFOUNDATION_PSPOOL_H + +#include "PsArray.h" +#include "PsSort.h" +#include "PsBasicTemplates.h" +#include "PsInlineArray.h" + +namespace physx +{ +namespace shdfnd +{ + +/*! +Simple allocation pool +*/ +template <class T, class Alloc = typename AllocatorTraits<T>::Type> +class PoolBase : public UserAllocated, public Alloc +{ + PX_NOCOPY(PoolBase) + protected: + PoolBase(const Alloc& alloc, uint32_t elementsPerSlab, uint32_t slabSize) + : Alloc(alloc), mSlabs(alloc), mElementsPerSlab(elementsPerSlab), mUsed(0), mSlabSize(slabSize), mFreeElement(0) + { + PX_COMPILE_TIME_ASSERT(sizeof(T) >= sizeof(size_t)); + } + + public: + ~PoolBase() + { + if(mUsed) + disposeElements(); + + for(void** slabIt = mSlabs.begin(), *slabEnd = mSlabs.end(); slabIt != slabEnd; ++slabIt) + Alloc::deallocate(*slabIt); + } + + // Allocate space for single object + PX_INLINE T* allocate() + { + if(mFreeElement == 0) + allocateSlab(); + T* p = reinterpret_cast<T*>(mFreeElement); + mFreeElement = mFreeElement->mNext; + mUsed++; +/** +Mark a specified amount of memory with 0xcd pattern. This is used to check that the meta data +definition for serialized classes is complete in checked builds. +*/ +#if PX_CHECKED + for(uint32_t i = 0; i < sizeof(T); ++i) + reinterpret_cast<uint8_t*>(p)[i] = 0xcd; +#endif + return p; + } + + // Put space for a single element back in the lists + PX_INLINE void deallocate(T* p) + { + if(p) + { + PX_ASSERT(mUsed); + mUsed--; + push(reinterpret_cast<FreeList*>(p)); + } + } + + PX_INLINE T* construct() + { + T* t = allocate(); + return t ? new (t) T() : 0; + } + + template <class A1> + PX_INLINE T* construct(A1& a) + { + T* t = allocate(); + return t ? new (t) T(a) : 0; + } + + template <class A1, class A2> + PX_INLINE T* construct(A1& a, A2& b) + { + T* t = allocate(); + return t ? new (t) T(a, b) : 0; + } + + template <class A1, class A2, class A3> + PX_INLINE T* construct(A1& a, A2& b, A3& c) + { + T* t = allocate(); + return t ? new (t) T(a, b, c) : 0; + } + + template <class A1, class A2, class A3> + PX_INLINE T* construct(A1* a, A2& b, A3& c) + { + T* t = allocate(); + return t ? new (t) T(a, b, c) : 0; + } + + template <class A1, class A2, class A3, class A4> + PX_INLINE T* construct(A1& a, A2& b, A3& c, A4& d) + { + T* t = allocate(); + return t ? new (t) T(a, b, c, d) : 0; + } + + template <class A1, class A2, class A3, class A4, class A5> + PX_INLINE T* construct(A1& a, A2& b, A3& c, A4& d, A5& e) + { + T* t = allocate(); + return t ? new (t) T(a, b, c, d, e) : 0; + } + + PX_INLINE void destroy(T* const p) + { + if(p) + { + p->~T(); + deallocate(p); + } + } + + protected: + struct FreeList + { + FreeList* mNext; + }; + + // All the allocated slabs, sorted by pointer + InlineArray<void*, 64, Alloc> mSlabs; + + uint32_t mElementsPerSlab; + uint32_t mUsed; + uint32_t mSlabSize; + + FreeList* mFreeElement; // Head of free-list + + // Helper function to get bitmap of allocated elements + + void push(FreeList* p) + { + p->mNext = mFreeElement; + mFreeElement = p; + } + + // Allocate a slab and segregate it into the freelist + void allocateSlab() + { + T* slab = reinterpret_cast<T*>(Alloc::allocate(mSlabSize, __FILE__, __LINE__)); + + mSlabs.pushBack(slab); + + // Build a chain of nodes for the freelist + T* it = slab + mElementsPerSlab; + while(--it >= slab) + push(reinterpret_cast<FreeList*>(it)); + } + + /* + Cleanup method. Go through all active slabs and call destructor for live objects, + then free their memory + */ + void disposeElements() + { + Array<void*, Alloc> freeNodes(*this); + while(mFreeElement) + { + freeNodes.pushBack(mFreeElement); + mFreeElement = mFreeElement->mNext; + } + Alloc& alloc(*this); + sort(freeNodes.begin(), freeNodes.size(), Less<void*>(), alloc); + sort(mSlabs.begin(), mSlabs.size(), Less<void*>(), alloc); + + typename Array<void*, Alloc>::Iterator slabIt = mSlabs.begin(), slabEnd = mSlabs.end(); + for(typename Array<void*, Alloc>::Iterator freeIt = freeNodes.begin(); slabIt != slabEnd; ++slabIt) + { + for(T* tIt = reinterpret_cast<T*>(*slabIt), *tEnd = tIt + mElementsPerSlab; tIt != tEnd; ++tIt) + { + if(freeIt != freeNodes.end() && *freeIt == tIt) + ++freeIt; + else + tIt->~T(); + } + } + } + + /* + Go through all slabs and call destructor if the slab is empty + */ + void releaseEmptySlabs() + { + Array<void*, Alloc> freeNodes(*this); + Array<void*, Alloc> slabNodes(mSlabs, *this); + while(mFreeElement) + { + freeNodes.pushBack(mFreeElement); + mFreeElement = mFreeElement->mNext; + } + + typename Array<void*, Alloc>::Iterator freeIt = freeNodes.begin(), freeEnd = freeNodes.end(), + lastCheck = freeNodes.end() - mElementsPerSlab; + + if(freeNodes.size() > mElementsPerSlab) + { + Alloc& alloc(*this); + sort(freeNodes.begin(), freeNodes.size(), Less<void*>(), alloc); + sort(slabNodes.begin(), slabNodes.size(), Less<void*>(), alloc); + + mSlabs.clear(); + for(void** slabIt = slabNodes.begin(), *slabEnd = slabNodes.end(); slabIt != slabEnd; ++slabIt) + { + while((freeIt < lastCheck) && (*slabIt > (*freeIt))) + { + push(reinterpret_cast<FreeList*>(*freeIt)); + freeIt++; + } + + if(*slabIt == (*freeIt)) // the slab's first element in freeList + { + const size_t endSlabAddress = size_t(*slabIt) + mSlabSize; + const size_t endFreeAddress = size_t(*(freeIt + mElementsPerSlab - 1)); + if(endFreeAddress + sizeof(T) == endSlabAddress) + { // all slab's element in freeList + Alloc::deallocate(*slabIt); + freeIt += mElementsPerSlab; + continue; + } + } + + mSlabs.pushBack(*slabIt); + } + } + + while(freeIt != freeEnd) + { + push(reinterpret_cast<FreeList*>(*freeIt)); + ++freeIt; + } + } +}; + +// original pool implementation +template <class T, class Alloc = typename AllocatorTraits<T>::Type> +class Pool : public PoolBase<T, Alloc> +{ + public: + Pool(const Alloc& alloc = Alloc(), uint32_t elementsPerSlab = 32) + : PoolBase<T, Alloc>(alloc, elementsPerSlab, elementsPerSlab * sizeof(T)) + { + } +}; + +// allows specification of the slab size instead of the occupancy +template <class T, uint32_t slabSize, class Alloc = typename AllocatorTraits<T>::Type> +class Pool2 : public PoolBase<T, Alloc> +{ + public: + Pool2(const Alloc& alloc = Alloc()) : PoolBase<T, Alloc>(alloc, slabSize / sizeof(T), slabSize) + { + } +}; + +} // namespace shdfnd +} // namespace physx + +#endif // #ifndef PSFOUNDATION_PSPOOL_H diff --git a/PxShared/src/foundation/include/PsSList.h b/PxShared/src/foundation/include/PsSList.h new file mode 100644 index 0000000..f811c37 --- /dev/null +++ b/PxShared/src/foundation/include/PsSList.h @@ -0,0 +1,140 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSSLIST_H +#define PSFOUNDATION_PSSLIST_H + +#include "foundation/Px.h" +#include "foundation/PxAssert.h" +#include "PsAlignedMalloc.h" + +#if PX_P64_FAMILY +#define PX_SLIST_ALIGNMENT 16 +#else +#define PX_SLIST_ALIGNMENT 8 +#endif + +namespace physx +{ +namespace shdfnd +{ + +#if PX_VC +#pragma warning(push) +#pragma warning(disable : 4324) // Padding was added at the end of a structure because of a __declspec(align) value. +#endif + +#if !PX_GCC_FAMILY +__declspec(align(PX_SLIST_ALIGNMENT)) +#endif + class SListEntry +{ + friend struct SListImpl; + + public: + SListEntry() : mNext(NULL) + { + PX_ASSERT((size_t(this) & (PX_SLIST_ALIGNMENT - 1)) == 0); + } + + // Only use on elements returned by SList::flush() + // because the operation is not atomic. + SListEntry* next() + { + return mNext; + } + + private: + SListEntry* mNext; +} +#if PX_GCC_FAMILY +__attribute__((aligned(PX_SLIST_ALIGNMENT))); +#else +; +#endif + +#if PX_VC +#pragma warning(pop) +#endif + +// template-less implementation +struct PX_FOUNDATION_API SListImpl +{ + SListImpl(); + ~SListImpl(); + void push(SListEntry* entry); + SListEntry* pop(); + SListEntry* flush(); + static const uint32_t& getSize(); +}; + +template <typename Alloc = ReflectionAllocator<SListImpl> > +class SListT : protected Alloc +{ + public: + SListT(const Alloc& alloc = Alloc()) : Alloc(alloc) + { + mImpl = reinterpret_cast<SListImpl*>(Alloc::allocate(SListImpl::getSize(), __FILE__, __LINE__)); + PX_ASSERT((size_t(mImpl) & (PX_SLIST_ALIGNMENT - 1)) == 0); + PX_PLACEMENT_NEW(mImpl, SListImpl)(); + } + ~SListT() + { + mImpl->~SListImpl(); + Alloc::deallocate(mImpl); + } + + // pushes a new element to the list + void push(SListEntry& entry) + { + mImpl->push(&entry); + } + + // pops an element from the list + SListEntry* pop() + { + return mImpl->pop(); + } + + // removes all items from list, returns pointer to first element + SListEntry* flush() + { + return mImpl->flush(); + } + + private: + SListImpl* mImpl; +}; + +typedef SListT<> SList; + +} // namespace shdfnd +} // namespace physx + +#endif // #ifndef PSFOUNDATION_PSSLIST_H diff --git a/PxShared/src/foundation/include/PsSocket.h b/PxShared/src/foundation/include/PsSocket.h new file mode 100644 index 0000000..0d8bf55 --- /dev/null +++ b/PxShared/src/foundation/include/PsSocket.h @@ -0,0 +1,186 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSSOCKET_H +#define PSFOUNDATION_PSSOCKET_H + +#include "PsUserAllocated.h" + +namespace physx +{ +namespace shdfnd +{ +/** +Socket abstraction API +*/ + +class PX_FOUNDATION_API Socket : public UserAllocated +{ + public: + static const uint32_t DEFAULT_BUFFER_SIZE; + + Socket(bool inEnableBuffering = true, bool blocking = true); + + virtual ~Socket(); + + /*! + Opens a network socket for input and/or output + + \param host + Name of the host to connect to. This can be an IP, URL, etc + + \param port + The port to connect to on the remote host + + \param timeout + Timeout in ms until the connection must be established. + + \return + True if the connection was successful, false otherwise + */ + bool connect(const char* host, uint16_t port, uint32_t timeout = 1000); + + /*! + Opens a network socket for input and/or output as a server. Put the connection in listening mode + + \param port + The port on which the socket listens + */ + bool listen(uint16_t port); + + /*! + Accept a connection on a socket that is in listening mode + + \note + This method only supports a single connection client. Additional clients + that connect to the listening port will overwrite the existing socket handle. + + \param block + whether or not the call should block + + \return whether a connection was established + */ + bool accept(bool block); + + /*! + Disconnects an open socket + */ + void disconnect(); + + /*! + Returns whether the socket is currently open (connected) or not. + + \return + True if the socket is connected, false otherwise + */ + bool isConnected() const; + + /*! + Returns the name of the connected host. This is the same as the string + that was supplied to the connect call. + + \return + The name of the connected host + */ + const char* getHost() const; + + /*! + Returns the port of the connected host. This is the same as the port + that was supplied to the connect call. + + \return + The port of the connected host + */ + uint16_t getPort() const; + + /*! + Flushes the output stream. Until the stream is flushed, there is no + guarantee that the written data has actually reached the destination + storage. Flush forces all buffered data to be sent to the output. + + \note flush always blocks. If the socket is in non-blocking mode, this will result + the thread spinning. + + \return + True if the flush was successful, false otherwise + */ + bool flush(); + + /*! + Writes data to the output stream. + + \param data + Pointer to a block of data to write to the stream + + \param length + Amount of data to write, in bytes + + \return + Number of bytes actually written. This could be lower than length if the socket is non-blocking. + */ + + uint32_t write(const uint8_t* data, uint32_t length); + + /*! + Reads data from the output stream. + + \param data + Pointer to a buffer where the read data will be stored. + + \param length + Amount of data to read, in bytes. + + \return + Number of bytes actually read. This could be lower than length if the stream end is + encountered or the socket is non-blocking. + */ + uint32_t read(uint8_t* data, uint32_t length); + + /*! + Sets blocking mode of the socket. + Socket must be connected, otherwise calling this method won't take any effect. + */ + void setBlocking(bool blocking); + + /*! + Returns whether read/write/flush calls to the socket are blocking. + + \return + True if the socket is blocking. + */ + bool isBlocking() const; + + private: + class SocketImpl* mImpl; +}; + +} // namespace shdfnd +} // namespace physx + +#endif // PSFOUNDATION_PSSOCKET_H diff --git a/PxShared/src/foundation/include/PsSort.h b/PxShared/src/foundation/include/PsSort.h new file mode 100644 index 0000000..30808ae --- /dev/null +++ b/PxShared/src/foundation/include/PsSort.h @@ -0,0 +1,130 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSSORT_H +#define PSFOUNDATION_PSSORT_H + +/** \addtogroup foundation +@{ +*/ + +#include "PsSortInternals.h" +#include "PsAlloca.h" + +#define PX_SORT_PARANOIA PX_DEBUG + +/** +\brief Sorts an array of objects in ascending order, assuming +that the predicate implements the < operator: + +\see Less, Greater +*/ + +#if PX_VC +#pragma warning(push) +#pragma warning(disable : 4706) // disable the warning that we did an assignment within a conditional expression, as +// this was intentional. +#endif + +namespace physx +{ +namespace shdfnd +{ +template <class T, class Predicate, class Allocator> +void sort(T* elements, uint32_t count, const Predicate& compare, const Allocator& inAllocator, + const uint32_t initialStackSize = 32) +{ + static const uint32_t SMALL_SORT_CUTOFF = 5; // must be >= 3 since we need 3 for median + + PX_ALLOCA(stackMem, int32_t, initialStackSize); + internal::Stack<Allocator> stack(stackMem, initialStackSize, inAllocator); + + int32_t first = 0, last = int32_t(count - 1); + if(last > first) + { + for(;;) + { + while(last > first) + { + PX_ASSERT(first >= 0 && last < int32_t(count)); + if(uint32_t(last - first) < SMALL_SORT_CUTOFF) + { + internal::smallSort(elements, first, last, compare); + break; + } + else + { + const int32_t partIndex = internal::partition(elements, first, last, compare); + + // push smaller sublist to minimize stack usage + if((partIndex - first) < (last - partIndex)) + { + stack.push(first, partIndex - 1); + first = partIndex + 1; + } + else + { + stack.push(partIndex + 1, last); + last = partIndex - 1; + } + } + } + + if(stack.empty()) + break; + + stack.pop(first, last); + } + } +#if PX_SORT_PARANOIA + for(uint32_t i = 1; i < count; i++) + PX_ASSERT(!compare(elements[i], elements[i - 1])); +#endif +} + +template <class T, class Predicate> +void sort(T* elements, uint32_t count, const Predicate& compare) +{ + sort(elements, count, compare, typename shdfnd::AllocatorTraits<T>::Type()); +} + +template <class T> +void sort(T* elements, uint32_t count) +{ + sort(elements, count, shdfnd::Less<T>(), typename shdfnd::AllocatorTraits<T>::Type()); +} + +} // namespace shdfnd +} // namespace physx + +#if PX_VC +#pragma warning(pop) +#endif + +#endif // #ifndef PSFOUNDATION_PSSORT_H diff --git a/PxShared/src/foundation/include/PsSortInternals.h b/PxShared/src/foundation/include/PsSortInternals.h new file mode 100644 index 0000000..3aa0f7f --- /dev/null +++ b/PxShared/src/foundation/include/PsSortInternals.h @@ -0,0 +1,188 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSSORTINTERNALS_H +#define PSFOUNDATION_PSSORTINTERNALS_H + +/** \addtogroup foundation +@{ +*/ + +#include "foundation/PxAssert.h" +#include "foundation/PxIntrinsics.h" +#include "PsBasicTemplates.h" +#include "PsUserAllocated.h" + +namespace physx +{ +namespace shdfnd +{ +namespace internal +{ +template <class T, class Predicate> +PX_INLINE void median3(T* elements, int32_t first, int32_t last, Predicate& compare) +{ + /* + This creates sentinels because we know there is an element at the start minimum(or equal) + than the pivot and an element at the end greater(or equal) than the pivot. Plus the + median of 3 reduces the chance of degenerate behavour. + */ + + int32_t mid = (first + last) / 2; + + if(compare(elements[mid], elements[first])) + swap(elements[first], elements[mid]); + + if(compare(elements[last], elements[first])) + swap(elements[first], elements[last]); + + if(compare(elements[last], elements[mid])) + swap(elements[mid], elements[last]); + + // keep the pivot at last-1 + swap(elements[mid], elements[last - 1]); +} + +template <class T, class Predicate> +PX_INLINE int32_t partition(T* elements, int32_t first, int32_t last, Predicate& compare) +{ + median3(elements, first, last, compare); + + /* + WARNING: using the line: + + T partValue = elements[last-1]; + + and changing the scan loops to: + + while(comparator.greater(partValue, elements[++i])); + while(comparator.greater(elements[--j], partValue); + + triggers a compiler optimizer bug on xenon where it stores a double to the stack for partValue + then loads it as a single...:-( + */ + + int32_t i = first; // we know first is less than pivot(but i gets pre incremented) + int32_t j = last - 1; // pivot is in last-1 (but j gets pre decremented) + + for(;;) + { + while(compare(elements[++i], elements[last - 1])) + ; + while(compare(elements[last - 1], elements[--j])) + ; + + if(i >= j) + break; + + PX_ASSERT(i <= last && j >= first); + swap(elements[i], elements[j]); + } + // put the pivot in place + + PX_ASSERT(i <= last && first <= (last - 1)); + swap(elements[i], elements[last - 1]); + + return i; +} + +template <class T, class Predicate> +PX_INLINE void smallSort(T* elements, int32_t first, int32_t last, Predicate& compare) +{ + // selection sort - could reduce to fsel on 360 with floats. + + for(int32_t i = first; i < last; i++) + { + int32_t m = i; + for(int32_t j = i + 1; j <= last; j++) + if(compare(elements[j], elements[m])) + m = j; + + if(m != i) + swap(elements[m], elements[i]); + } +} + +template <class Allocator> +class Stack +{ + Allocator mAllocator; + uint32_t mSize, mCapacity; + int32_t* mMemory; + bool mRealloc; + + public: + Stack(int32_t* memory, uint32_t capacity, const Allocator& inAllocator) + : mAllocator(inAllocator), mSize(0), mCapacity(capacity), mMemory(memory), mRealloc(false) + { + } + ~Stack() + { + if(mRealloc) + mAllocator.deallocate(mMemory); + } + + void grow() + { + mCapacity *= 2; + int32_t* newMem = + reinterpret_cast<int32_t*>(mAllocator.allocate(sizeof(int32_t) * mCapacity, __FILE__, __LINE__)); + intrinsics::memCopy(newMem, mMemory, mSize * sizeof(int32_t)); + if(mRealloc) + mAllocator.deallocate(mMemory); + mRealloc = true; + mMemory = newMem; + } + + PX_INLINE void push(int32_t start, int32_t end) + { + if(mSize >= mCapacity - 1) + grow(); + mMemory[mSize++] = start; + mMemory[mSize++] = end; + } + + PX_INLINE void pop(int32_t& start, int32_t& end) + { + PX_ASSERT(!empty()); + end = mMemory[--mSize]; + start = mMemory[--mSize]; + } + + PX_INLINE bool empty() + { + return mSize == 0; + } +}; +} // namespace internal + +} // namespace shdfnd +} // namespace physx + +#endif // #ifndef PSFOUNDATION_PSSORTINTERNALS_H diff --git a/PxShared/src/foundation/include/PsString.h b/PxShared/src/foundation/include/PsString.h new file mode 100644 index 0000000..17d25c8 --- /dev/null +++ b/PxShared/src/foundation/include/PsString.h @@ -0,0 +1,90 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSSTRING_H +#define PSFOUNDATION_PSSTRING_H + +#include "foundation/PxPreprocessor.h" +#include "foundation/PxSimpleTypes.h" +#include <stdarg.h> + +namespace physx +{ +namespace shdfnd +{ + +// the following functions have C99 semantics. Note that C99 requires for snprintf and vsnprintf: +// * the resulting string is always NULL-terminated regardless of truncation. +// * in the case of truncation the return value is the number of characters that would have been created. + +PX_FOUNDATION_API int32_t sscanf(const char* buffer, const char* format, ...); +PX_FOUNDATION_API int32_t strcmp(const char* str1, const char* str2); +PX_FOUNDATION_API int32_t strncmp(const char* str1, const char* str2, size_t count); +PX_FOUNDATION_API int32_t snprintf(char* dst, size_t dstSize, const char* format, ...); +PX_FOUNDATION_API int32_t vsnprintf(char* dst, size_t dstSize, const char* src, va_list arg); + +// strlcat and strlcpy have BSD semantics: +// * dstSize is always the size of the destination buffer +// * the resulting string is always NULL-terminated regardless of truncation +// * in the case of truncation the return value is the length of the string that would have been created + +PX_FOUNDATION_API size_t strlcat(char* dst, size_t dstSize, const char* src); +PX_FOUNDATION_API size_t strlcpy(char* dst, size_t dstSize, const char* src); + +// case-insensitive string comparison +PX_FOUNDATION_API int32_t stricmp(const char* str1, const char* str2); +PX_FOUNDATION_API int32_t strnicmp(const char* str1, const char* str2, size_t count); + +// in-place string case conversion +PX_FOUNDATION_API void strlwr(char* str); +PX_FOUNDATION_API void strupr(char* str); + +/** +\brief The maximum supported formatted output string length +(number of characters after replacement). + +@see printFormatted() +*/ +static const size_t MAX_PRINTFORMATTED_LENGTH = 1024; + +/** +\brief Prints the formatted data, trying to make sure it's visible to the app programmer + +@see NS_MAX_PRINTFORMATTED_LENGTH +*/ +PX_FOUNDATION_API void printFormatted(const char*, ...); + +/** +\brief Prints the string literally (does not consume % specifier), trying to make sure it's visible to the app +programmer +*/ +PX_FOUNDATION_API void printString(const char*); +} +} +#endif // #ifndef PSFOUNDATION_PSSTRING_H diff --git a/PxShared/src/foundation/include/PsSync.h b/PxShared/src/foundation/include/PsSync.h new file mode 100644 index 0000000..8b99731 --- /dev/null +++ b/PxShared/src/foundation/include/PsSync.h @@ -0,0 +1,138 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSSYNC_H +#define PSFOUNDATION_PSSYNC_H + +#include "PsAllocator.h" + +namespace physx +{ +namespace shdfnd +{ +/*! +Implementation notes: +* - Calling set() on an already signaled Sync does not change its state. +* - Calling reset() on an already reset Sync does not change its state. +* - Calling set() on a reset Sync wakes all waiting threads (potential for thread contention). +* - Calling wait() on an already signaled Sync will return true immediately. +* - NOTE: be careful when pulsing an event with set() followed by reset(), because a +* thread that is not waiting on the event will miss the signal. +*/ +class PX_FOUNDATION_API SyncImpl +{ + public: + static const uint32_t waitForever = 0xffffffff; + + SyncImpl(); + + ~SyncImpl(); + + /** Wait on the object for at most the given number of ms. Returns + * true if the object is signaled. Sync::waitForever will block forever + * or until the object is signaled. + */ + + bool wait(uint32_t milliseconds = waitForever); + + /** Signal the synchronization object, waking all threads waiting on it */ + + void set(); + + /** Reset the synchronization object */ + + void reset(); + + /** + Size of this class. + */ + static const uint32_t& getSize(); +}; + +/*! +Implementation notes: +* - Calling set() on an already signaled Sync does not change its state. +* - Calling reset() on an already reset Sync does not change its state. +* - Calling set() on a reset Sync wakes all waiting threads (potential for thread contention). +* - Calling wait() on an already signaled Sync will return true immediately. +* - NOTE: be careful when pulsing an event with set() followed by reset(), because a +* thread that is not waiting on the event will miss the signal. +*/ +template <typename Alloc = ReflectionAllocator<SyncImpl> > +class SyncT : protected Alloc +{ + public: + static const uint32_t waitForever = SyncImpl::waitForever; + + SyncT(const Alloc& alloc = Alloc()) : Alloc(alloc) + { + mImpl = reinterpret_cast<SyncImpl*>(Alloc::allocate(SyncImpl::getSize(), __FILE__, __LINE__)); + PX_PLACEMENT_NEW(mImpl, SyncImpl)(); + } + + ~SyncT() + { + mImpl->~SyncImpl(); + Alloc::deallocate(mImpl); + } + + /** Wait on the object for at most the given number of ms. Returns + * true if the object is signaled. Sync::waitForever will block forever + * or until the object is signaled. + */ + + bool wait(uint32_t milliseconds = SyncImpl::waitForever) + { + return mImpl->wait(milliseconds); + } + + /** Signal the synchronization object, waking all threads waiting on it */ + + void set() + { + mImpl->set(); + } + + /** Reset the synchronization object */ + + void reset() + { + mImpl->reset(); + } + + private: + class SyncImpl* mImpl; +}; + +typedef SyncT<> Sync; + +} // namespace shdfnd +} // namespace physx + +#endif // #ifndef PSFOUNDATION_PSSYNC_H diff --git a/PxShared/src/foundation/include/PsTempAllocator.h b/PxShared/src/foundation/include/PsTempAllocator.h new file mode 100644 index 0000000..7a063dc --- /dev/null +++ b/PxShared/src/foundation/include/PsTempAllocator.h @@ -0,0 +1,62 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSTEMPALLOCATOR_H +#define PSFOUNDATION_PSTEMPALLOCATOR_H + +#include "PsAllocator.h" + +namespace physx +{ +namespace shdfnd +{ +union TempAllocatorChunk +{ + TempAllocatorChunk() : mNext(0) + { + } + TempAllocatorChunk* mNext; // while chunk is free + uint32_t mIndex; // while chunk is allocated + uint8_t mPad[16]; // 16 byte aligned allocations +}; + +class TempAllocator +{ + public: + PX_FORCE_INLINE TempAllocator(const char* = 0) + { + } + PX_FOUNDATION_API void* allocate(size_t size, const char* file, int line); + PX_FOUNDATION_API void deallocate(void* ptr); +}; + +} // namespace shdfnd +} // namespace physx + +#endif // #ifndef PSFOUNDATION_PSTEMPALLOCATOR_H diff --git a/PxShared/src/foundation/include/PsThread.h b/PxShared/src/foundation/include/PsThread.h new file mode 100644 index 0000000..8ba553a --- /dev/null +++ b/PxShared/src/foundation/include/PsThread.h @@ -0,0 +1,382 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSTHREAD_H +#define PSFOUNDATION_PSTHREAD_H + +#include "PsUserAllocated.h" + +// dsequeira: according to existing comment here (David Black would be my guess) +// "This is useful to reduce bus contention on tight spin locks. And it needs +// to be a macro as the xenon compiler often ignores even __forceinline." What's not +// clear is why a pause function needs inlining...? (TODO: check with XBox team) + +// todo: these need to go somewhere else + +#if PX_WINDOWS_FAMILY || PX_XBOXONE +#define PxSpinLockPause() __asm pause +#elif PX_LINUX || PX_ANDROID || PX_PS4 || PX_APPLE_FAMILY || PX_NX +#define PxSpinLockPause() asm("nop") +#else +#error "Platform not supported!" +#endif + +namespace physx +{ +namespace shdfnd +{ +struct ThreadPriority // todo: put in some other header file +{ + enum Enum + { + /** + \brief High priority + */ + eHIGH = 0, + + /** + \brief Above Normal priority + */ + eABOVE_NORMAL = 1, + + /** + \brief Normal/default priority + */ + eNORMAL = 2, + + /** + \brief Below Normal priority + */ + eBELOW_NORMAL = 3, + + /** + \brief Low priority. + */ + eLOW = 4, + eFORCE_DWORD = 0xffFFffFF + }; +}; + +class Runnable +{ + public: + Runnable() + { + } + virtual ~Runnable() + { + } + virtual void execute(void) + { + } +}; + +class PX_FOUNDATION_API ThreadImpl +{ + public: + typedef size_t Id; // space for a pointer or an integer + typedef void* (*ExecuteFn)(void*); + + static uint32_t getDefaultStackSize(); + static Id getId(); + + /** + Construct (but do not start) the thread object. The OS thread object will not be created + until start() is called. Executes in the context + of the spawning thread. + */ + + ThreadImpl(); + + /** + Construct and start the the thread, passing the given arg to the given fn. (pthread style) + */ + + ThreadImpl(ExecuteFn fn, void* arg); + + /** + Deallocate all resources associated with the thread. Should be called in the + context of the spawning thread. + */ + + ~ThreadImpl(); + + /** + Create the OS thread and start it running. Called in the context of the spawning thread. + If an affinity mask has previously been set then it will be applied after the + thread has been created. + */ + + void start(uint32_t stackSize, Runnable* r); + + /** + Violently kill the current thread. Blunt instrument, not recommended since + it can leave all kinds of things unreleased (stack, memory, mutexes...) Should + be called in the context of the spawning thread. + */ + + void kill(); + + /** + Stop the thread. Signals the spawned thread that it should stop, so the + thread should check regularly + */ + + void signalQuit(); + + /** + Wait for a thread to stop. Should be called in the context of the spawning + thread. Returns false if the thread has not been started. + */ + + bool waitForQuit(); + + /** + check whether the thread is signalled to quit. Called in the context of the + spawned thread. + */ + + bool quitIsSignalled(); + + /** + Cleanly shut down this thread. Called in the context of the spawned thread. + */ + void quit(); + + /** + Change the affinity mask for this thread. The mask is a platform + specific value. + + On Windows, Linux, PS4, XboxOne and NX platforms, each set mask bit represents + the index of a logical processor that the OS may schedule thread execution on. + Bits outside the range of valid logical processors may be ignored or cause + the function to return an error. + + On Apple platforms, this function has no effect. + + If the thread has not yet been started then the mask is stored + and applied when the thread is started. + + If the thread has already been started then this method returns the + previous affinity mask on success, otherwise it returns zero. + */ + uint32_t setAffinityMask(uint32_t mask); + + static ThreadPriority::Enum getPriority(Id threadId); + + /** Set thread priority. */ + void setPriority(ThreadPriority::Enum prio); + + /** set the thread's name */ + void setName(const char* name); + + /** Put the current thread to sleep for the given number of milliseconds */ + static void sleep(uint32_t ms); + + /** Yield the current thread's slot on the CPU */ + static void yield(); + + /** Return the number of physical cores (does not include hyper-threaded cores), returns 0 on failure */ + static uint32_t getNbPhysicalCores(); + + /** + Size of this class. + */ + static const uint32_t& getSize(); +}; + +/** +Thread abstraction API +*/ +template <typename Alloc = ReflectionAllocator<ThreadImpl> > +class ThreadT : protected Alloc, public UserAllocated, public Runnable +{ + public: + typedef ThreadImpl::Id Id; // space for a pointer or an integer + + /** + Construct (but do not start) the thread object. Executes in the context + of the spawning thread + */ + ThreadT(const Alloc& alloc = Alloc()) : Alloc(alloc) + { + mImpl = reinterpret_cast<ThreadImpl*>(Alloc::allocate(ThreadImpl::getSize(), __FILE__, __LINE__)); + PX_PLACEMENT_NEW(mImpl, ThreadImpl)(); + } + + /** + Construct and start the the thread, passing the given arg to the given fn. (pthread style) + */ + ThreadT(ThreadImpl::ExecuteFn fn, void* arg, const Alloc& alloc = Alloc()) : Alloc(alloc) + { + mImpl = reinterpret_cast<ThreadImpl*>(Alloc::allocate(ThreadImpl::getSize(), __FILE__, __LINE__)); + PX_PLACEMENT_NEW(mImpl, ThreadImpl)(fn, arg); + } + + /** + Deallocate all resources associated with the thread. Should be called in the + context of the spawning thread. + */ + virtual ~ThreadT() + { + mImpl->~ThreadImpl(); + Alloc::deallocate(mImpl); + } + + /** + start the thread running. Called in the context of the spawning thread. + */ + + void start(uint32_t stackSize = ThreadImpl::getDefaultStackSize()) + { + mImpl->start(stackSize, this); + } + + /** + Violently kill the current thread. Blunt instrument, not recommended since + it can leave all kinds of things unreleased (stack, memory, mutexes...) Should + be called in the context of the spawning thread. + */ + + void kill() + { + mImpl->kill(); + } + + /** + The virtual execute() method is the user defined function that will + run in the new thread. Called in the context of the spawned thread. + */ + + virtual void execute(void) + { + } + + /** + stop the thread. Signals the spawned thread that it should stop, so the + thread should check regularly + */ + + void signalQuit() + { + mImpl->signalQuit(); + } + + /** + Wait for a thread to stop. Should be called in the context of the spawning + thread. Returns false if the thread has not been started. + */ + + bool waitForQuit() + { + return mImpl->waitForQuit(); + } + + /** + check whether the thread is signalled to quit. Called in the context of the + spawned thread. + */ + + bool quitIsSignalled() + { + return mImpl->quitIsSignalled(); + } + + /** + Cleanly shut down this thread. Called in the context of the spawned thread. + */ + void quit() + { + mImpl->quit(); + } + + uint32_t setAffinityMask(uint32_t mask) + { + return mImpl->setAffinityMask(mask); + } + + static ThreadPriority::Enum getPriority(ThreadImpl::Id threadId) + { + return ThreadImpl::getPriority(threadId); + } + + /** Set thread priority. */ + void setPriority(ThreadPriority::Enum prio) + { + mImpl->setPriority(prio); + } + + /** set the thread's name */ + void setName(const char* name) + { + mImpl->setName(name); + } + + /** Put the current thread to sleep for the given number of milliseconds */ + static void sleep(uint32_t ms) + { + ThreadImpl::sleep(ms); + } + + /** Yield the current thread's slot on the CPU */ + static void yield() + { + ThreadImpl::yield(); + } + + static uint32_t getDefaultStackSize() + { + return ThreadImpl::getDefaultStackSize(); + } + + static ThreadImpl::Id getId() + { + return ThreadImpl::getId(); + } + + static uint32_t getNbPhysicalCores() + { + return ThreadImpl::getNbPhysicalCores(); + } + + private: + class ThreadImpl* mImpl; +}; + +typedef ThreadT<> Thread; + +PX_FOUNDATION_API uint32_t TlsAlloc(); +PX_FOUNDATION_API void TlsFree(uint32_t index); +PX_FOUNDATION_API void* TlsGet(uint32_t index); +PX_FOUNDATION_API uint32_t TlsSet(uint32_t index, void* value); + +} // namespace shdfnd +} // namespace physx + +#endif // #ifndef PSFOUNDATION_PSTHREAD_H diff --git a/PxShared/src/foundation/include/PsTime.h b/PxShared/src/foundation/include/PsTime.h new file mode 100644 index 0000000..b9c7031 --- /dev/null +++ b/PxShared/src/foundation/include/PsTime.h @@ -0,0 +1,95 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSTIME_H +#define PSFOUNDATION_PSTIME_H + +#include "Ps.h" + +#if PX_LINUX || PX_ANDROID +#include <time.h> +#endif + +namespace physx +{ +namespace shdfnd +{ + +struct CounterFrequencyToTensOfNanos +{ + uint64_t mNumerator; + uint64_t mDenominator; + CounterFrequencyToTensOfNanos(uint64_t inNum, uint64_t inDenom) : mNumerator(inNum), mDenominator(inDenom) + { + } + + // quite slow. + uint64_t toTensOfNanos(uint64_t inCounter) const + { + return (inCounter * mNumerator) / mDenominator; + } +}; + +class PX_FOUNDATION_API Time +{ + public: + typedef double Second; + static const uint64_t sNumTensOfNanoSecondsInASecond = 100000000; + // This is supposedly guaranteed to not change after system boot + // regardless of processors, speedstep, etc. + static const CounterFrequencyToTensOfNanos& getBootCounterFrequency(); + + static CounterFrequencyToTensOfNanos getCounterFrequency(); + + static uint64_t getCurrentCounterValue(); + + // SLOW!! + // Thar be a 64 bit divide in thar! + static uint64_t getCurrentTimeInTensOfNanoSeconds() + { + uint64_t ticks = getCurrentCounterValue(); + return getBootCounterFrequency().toTensOfNanos(ticks); + } + + Time(); + Second getElapsedSeconds(); + Second peekElapsedSeconds(); + Second getLastTime() const; + + private: +#if PX_LINUX || PX_ANDROID || PX_APPLE_FAMILY || PX_PS4 + Second mLastTime; +#else + int64_t mTickCount; +#endif +}; +} // namespace shdfnd +} // namespace physx + +#endif // #ifndef PSFOUNDATION_PSTIME_H diff --git a/PxShared/src/foundation/include/PsUserAllocated.h b/PxShared/src/foundation/include/PsUserAllocated.h new file mode 100644 index 0000000..f41d29e --- /dev/null +++ b/PxShared/src/foundation/include/PsUserAllocated.h @@ -0,0 +1,92 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSUSERALLOCATED_H +#define PSFOUNDATION_PSUSERALLOCATED_H + +#include "PsAllocator.h" + +namespace physx +{ +namespace shdfnd +{ +/** +Provides new and delete using a UserAllocator. +Guarantees that 'delete x;' uses the UserAllocator too. +*/ +class UserAllocated +{ + public: + // PX_SERIALIZATION + PX_INLINE void* operator new(size_t, void* address) + { + return address; + } + //~PX_SERIALIZATION + // Matching operator delete to the above operator new. Don't ask me + // how this makes any sense - Nuernberger. + PX_INLINE void operator delete(void*, void*) + { + } + + template <typename Alloc> + PX_INLINE void* operator new(size_t size, Alloc alloc, const char* fileName, int line) + { + return alloc.allocate(size, fileName, line); + } + template <typename Alloc> + PX_INLINE void* operator new [](size_t size, Alloc alloc, const char* fileName, int line) + { return alloc.allocate(size, fileName, line); } + + // placement delete + template <typename Alloc> + PX_INLINE void operator delete(void* ptr, Alloc alloc, const char* fileName, int line) + { + PX_UNUSED(fileName); + PX_UNUSED(line); + alloc.deallocate(ptr); + } + template <typename Alloc> + PX_INLINE void operator delete [](void* ptr, Alloc alloc, const char* fileName, int line) + { + PX_UNUSED(fileName); + PX_UNUSED(line); + alloc.deallocate(ptr); + } PX_INLINE void + operator delete(void* ptr) + { + NonTrackingAllocator().deallocate(ptr); + } + PX_INLINE void operator delete [](void* ptr) + { NonTrackingAllocator().deallocate(ptr); } +}; +} // namespace shdfnd +} // namespace physx + +#endif // #ifndef PSFOUNDATION_PSUSERALLOCATED_H diff --git a/PxShared/src/foundation/include/PsUtilities.h b/PxShared/src/foundation/include/PsUtilities.h new file mode 100644 index 0000000..32fe4ec --- /dev/null +++ b/PxShared/src/foundation/include/PsUtilities.h @@ -0,0 +1,165 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSUTILITIES_H +#define PSFOUNDATION_PSUTILITIES_H + +#include "foundation/PxVec3.h" +#include "foundation/PxAssert.h" +#include "Ps.h" +#include "PsIntrinsics.h" +#include "PsBasicTemplates.h" + +namespace physx +{ +namespace shdfnd +{ +PX_INLINE char littleEndian() +{ + int i = 1; + return *(reinterpret_cast<char*>(&i)); +} + +// PT: checked casts +PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 to32(PxU64 value) +{ + PX_ASSERT(value <= 0xffffffff); + return PxU32(value); +} +PX_CUDA_CALLABLE PX_FORCE_INLINE PxU16 to16(PxU32 value) +{ + PX_ASSERT(value <= 0xffff); + return PxU16(value); +} +PX_CUDA_CALLABLE PX_FORCE_INLINE PxU8 to8(PxU16 value) +{ + PX_ASSERT(value <= 0xff); + return PxU8(value); +} +PX_CUDA_CALLABLE PX_FORCE_INLINE PxU8 to8(PxU32 value) +{ + PX_ASSERT(value <= 0xff); + return PxU8(value); +} +PX_CUDA_CALLABLE PX_FORCE_INLINE PxU8 to8(PxI32 value) +{ + PX_ASSERT(value <= 0xff); + PX_ASSERT(value >= 0); + return PxU8(value); +} +PX_CUDA_CALLABLE PX_FORCE_INLINE PxI8 toI8(PxU32 value) +{ + PX_ASSERT(value <= 0x7f); + return PxI8(value); +} + +/*! +Get number of elements in array +*/ +template <typename T, size_t N> +char (&ArraySizeHelper(T (&array)[N]))[N]; +#define PX_ARRAY_SIZE(_array) (sizeof(physx::shdfnd::ArraySizeHelper(_array))) + +/*! +Sort two elements using operator< + +On return x will be the smaller of the two +*/ +template <class T> +PX_CUDA_CALLABLE PX_FORCE_INLINE void order(T& x, T& y) +{ + if(y < x) + swap(x, y); +} + +// most architectures can do predication on real comparisons, and on VMX, it matters + +PX_CUDA_CALLABLE PX_FORCE_INLINE void order(PxReal& x, PxReal& y) +{ + PxReal newX = PxMin(x, y); + PxReal newY = PxMax(x, y); + x = newX; + y = newY; +} + +/*! +Sort two elements using operator< and also keep order +of any extra data +*/ +template <class T, class E1> +PX_CUDA_CALLABLE PX_FORCE_INLINE void order(T& x, T& y, E1& xe1, E1& ye1) +{ + if(y < x) + { + swap(x, y); + swap(xe1, ye1); + } +} + +#if PX_GCC_FAMILY && !PX_EMSCRIPTEN +__attribute__((noreturn)) +#endif + PX_INLINE void debugBreak() +{ +#if PX_WINDOWS || PX_XBOXONE + __debugbreak(); +#elif PX_ANDROID + raise(SIGTRAP); // works better than __builtin_trap. Proper call stack and can be continued. +#elif PX_LINUX + asm("int $3"); +#elif PX_GCC_FAMILY + __builtin_trap(); +#else + PX_ASSERT(false); +#endif +} + +bool checkValid(const float&); +bool checkValid(const PxVec3&); +bool checkValid(const PxQuat&); +bool checkValid(const PxMat33&); +bool checkValid(const PxTransform&); +bool checkValid(const char*); + +// equivalent to std::max_element +template <typename T> +inline const T* maxElement(const T* first, const T* last) +{ + const T* m = first; + for(const T* it = first + 1; it < last; ++it) + if(*m < *it) + m = it; + + return m; +} + +} // namespace shdfnd +} // namespace physx + +#endif diff --git a/PxShared/src/foundation/include/PsVecMath.h b/PxShared/src/foundation/include/PsVecMath.h new file mode 100644 index 0000000..4e891d8 --- /dev/null +++ b/PxShared/src/foundation/include/PsVecMath.h @@ -0,0 +1,1335 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSVECMATH_H +#define PSFOUNDATION_PSVECMATH_H + +#include "Ps.h" +#include "PsIntrinsics.h" +#include "foundation/PxVec3.h" +#include "foundation/PxVec4.h" +#include "foundation/PxMat33.h" +#include "foundation/PxUnionCast.h" + +// We can opt to use the scalar version of vectorised functions. +// This can catch type safety issues and might even work out more optimal on pc. +// It will also be useful for benchmarking and testing. +// NEVER submit with vector intrinsics deactivated without good reason. +// AM: deactivating SIMD for debug win64 just so autobuild will also exercise +// non-SIMD path, until a dedicated non-SIMD platform sich as Arm comes online. +// TODO: dima: reference all platforms with SIMD support here, +// all unknown/experimental cases should better default to NO SIMD. + +// enable/disable SIMD +#if !defined(PX_SIMD_DISABLED) +#if PX_INTEL_FAMILY && (!defined(__EMSCRIPTEN__) || defined(__SSE2__)) +#define COMPILE_VECTOR_INTRINSICS 1 +#elif PX_ANDROID&& PX_NEON +#define COMPILE_VECTOR_INTRINSICS 1 +#elif PX_IOS&& PX_NEON +#define COMPILE_VECTOR_INTRINSICS 1 +#elif PX_NX +#define COMPILE_VECTOR_INTRINSICS 1 +#else +#define COMPILE_VECTOR_INTRINSICS 0 +#endif +#else +#define COMPILE_VECTOR_INTRINSICS 0 +#endif + +#if COMPILE_VECTOR_INTRINSICS && PX_INTEL_FAMILY&&(PX_UNIX_FAMILY || PX_PS4) +// only SSE2 compatible platforms should reach this +#if PX_EMSCRIPTEN +#include <emmintrin.h> +#endif +#include <xmmintrin.h> +#endif + +namespace physx +{ +namespace shdfnd +{ +namespace aos +{ + +// Basic AoS types are +// FloatV - 16-byte aligned representation of float. +// Vec3V - 16-byte aligned representation of PxVec3 stored as (x y z 0). +// Vec4V - 16-byte aligned representation of vector of 4 floats stored as (x y z w). +// BoolV - 16-byte aligned representation of vector of 4 bools stored as (x y z w). +// VecU32V - 16-byte aligned representation of 4 unsigned ints stored as (x y z w). +// VecI32V - 16-byte aligned representation of 4 signed ints stored as (x y z w). +// Mat33V - 16-byte aligned representation of any 3x3 matrix. +// Mat34V - 16-byte aligned representation of transformation matrix (rotation in col1,col2,col3 and translation in +// col4). +// Mat44V - 16-byte aligned representation of any 4x4 matrix. + +#if COMPILE_VECTOR_INTRINSICS +#include "PsAoS.h" +#else +#include "PsVecMathAoSScalar.h" +#endif + +////////////////////////////////////////// +// Construct a simd type from a scalar type +////////////////////////////////////////// + +// FloatV +//(f,f,f,f) +PX_FORCE_INLINE FloatV FLoad(const PxF32 f); + +// Vec3V +//(f,f,f,0) +PX_FORCE_INLINE Vec3V V3Load(const PxF32 f); +//(f.x,f.y,f.z,0) +PX_FORCE_INLINE Vec3V V3LoadU(const PxVec3& f); +//(f.x,f.y,f.z,0), f must be 16-byte aligned +PX_FORCE_INLINE Vec3V V3LoadA(const PxVec3& f); +//(f.x,f.y,f.z,w_undefined), f must be 16-byte aligned +PX_FORCE_INLINE Vec3V V3LoadUnsafeA(const PxVec3& f); +//(f.x,f.y,f.z,0) +PX_FORCE_INLINE Vec3V V3LoadU(const PxF32* f); +//(f.x,f.y,f.z,0), f must be 16-byte aligned +PX_FORCE_INLINE Vec3V V3LoadA(const PxF32* f); + +// Vec4V +//(f,f,f,f) +PX_FORCE_INLINE Vec4V V4Load(const PxF32 f); +//(f[0],f[1],f[2],f[3]) +PX_FORCE_INLINE Vec4V V4LoadU(const PxF32* const f); +//(f[0],f[1],f[2],f[3]), f must be 16-byte aligned +PX_FORCE_INLINE Vec4V V4LoadA(const PxF32* const f); +//(x,y,z,w) +PX_FORCE_INLINE Vec4V V4LoadXYZW(const PxF32& x, const PxF32& y, const PxF32& z, const PxF32& w); + +// BoolV +//(f,f,f,f) +PX_FORCE_INLINE BoolV BLoad(const bool f); +//(f[0],f[1],f[2],f[3]) +PX_FORCE_INLINE BoolV BLoad(const bool* const f); + +// VecU32V +//(f,f,f,f) +PX_FORCE_INLINE VecU32V U4Load(const PxU32 f); +//(f[0],f[1],f[2],f[3]) +PX_FORCE_INLINE VecU32V U4LoadU(const PxU32* f); +//(f[0],f[1],f[2],f[3]), f must be 16-byte aligned +PX_FORCE_INLINE VecU32V U4LoadA(const PxU32* f); +//((U32)x, (U32)y, (U32)z, (U32)w) +PX_FORCE_INLINE VecU32V U4LoadXYZW(PxU32 x, PxU32 y, PxU32 z, PxU32 w); + +// VecI32V +//(i,i,i,i) +PX_FORCE_INLINE VecI32V I4Load(const PxI32 i); +//(i,i,i,i) +PX_FORCE_INLINE VecI32V I4LoadU(const PxI32* i); +//(i,i,i,i) +PX_FORCE_INLINE VecI32V I4LoadA(const PxI32* i); + +// QuatV +//(x = v[0], y=v[1], z=v[2], w=v3[3]) and array don't need to aligned +PX_FORCE_INLINE QuatV QuatVLoadU(const PxF32* v); +//(x = v[0], y=v[1], z=v[2], w=v3[3]) and array need to aligned, fast load +PX_FORCE_INLINE QuatV QuatVLoadA(const PxF32* v); +//(x, y, z, w) +PX_FORCE_INLINE QuatV QuatVLoadXYZW(const PxF32 x, const PxF32 y, const PxF32 z, const PxF32 w); + +// not added to public api +Vec4V Vec4V_From_PxVec3_WUndefined(const PxVec3& v); + +/////////////////////////////////////////////////// +// Construct a simd type from a different simd type +/////////////////////////////////////////////////// + +// Vec3V +//(v.x,v.y,v.z,0) +PX_FORCE_INLINE Vec3V Vec3V_From_Vec4V(Vec4V v); +//(v.x,v.y,v.z,undefined) - be very careful with w!=0 because many functions require w==0 for correct operation eg V3Dot, V3Length, V3Cross etc etc. +PX_FORCE_INLINE Vec3V Vec3V_From_Vec4V_WUndefined(const Vec4V v); + +// Vec4V +//(f.x,f.y,f.z,f.w) +PX_FORCE_INLINE Vec4V Vec4V_From_Vec3V(Vec3V f); +//((PxF32)f.x, (PxF32)f.y, (PxF32)f.z, (PxF32)f.w) +PX_FORCE_INLINE Vec4V Vec4V_From_VecU32V(VecU32V a); +//((PxF32)f.x, (PxF32)f.y, (PxF32)f.z, (PxF32)f.w) +PX_FORCE_INLINE Vec4V Vec4V_From_VecI32V(VecI32V a); +//(*(reinterpret_cast<PxF32*>(&f.x), (reinterpret_cast<PxF32*>(&f.y), (reinterpret_cast<PxF32*>(&f.z), +//(reinterpret_cast<PxF32*>(&f.w)) +PX_FORCE_INLINE Vec4V Vec4V_ReinterpretFrom_VecU32V(VecU32V a); +//(*(reinterpret_cast<PxF32*>(&f.x), (reinterpret_cast<PxF32*>(&f.y), (reinterpret_cast<PxF32*>(&f.z), +//(reinterpret_cast<PxF32*>(&f.w)) +PX_FORCE_INLINE Vec4V Vec4V_ReinterpretFrom_VecI32V(VecI32V a); + +// VecU32V +//(*(reinterpret_cast<PxU32*>(&f.x), (reinterpret_cast<PxU32*>(&f.y), (reinterpret_cast<PxU32*>(&f.z), +//(reinterpret_cast<PxU32*>(&f.w)) +PX_FORCE_INLINE VecU32V VecU32V_ReinterpretFrom_Vec4V(Vec4V a); +//(b[0], b[1], b[2], b[3]) +PX_FORCE_INLINE VecU32V VecU32V_From_BoolV(const BoolVArg b); + +// VecI32V +//(*(reinterpret_cast<PxI32*>(&f.x), (reinterpret_cast<PxI32*>(&f.y), (reinterpret_cast<PxI32*>(&f.z), +//(reinterpret_cast<PxI32*>(&f.w)) +PX_FORCE_INLINE VecI32V VecI32V_ReinterpretFrom_Vec4V(Vec4V a); +//((I32)a.x, (I32)a.y, (I32)a.z, (I32)a.w) +PX_FORCE_INLINE VecI32V VecI32V_From_Vec4V(Vec4V a); +//((I32)b.x, (I32)b.y, (I32)b.z, (I32)b.w) +PX_FORCE_INLINE VecI32V VecI32V_From_BoolV(const BoolVArg b); + +/////////////////////////////////////////////////// +// Convert from a simd type back to a scalar type +/////////////////////////////////////////////////// + +// FloatV +// a.x +PX_FORCE_INLINE void FStore(const FloatV a, PxF32* PX_RESTRICT f); + +// Vec3V +//(a.x,a.y,a.z) +PX_FORCE_INLINE void V3StoreA(const Vec3V a, PxVec3& f); +//(a.x,a.y,a.z) +PX_FORCE_INLINE void V3StoreU(const Vec3V a, PxVec3& f); + +// Vec4V +PX_FORCE_INLINE void V4StoreA(const Vec4V a, PxF32* f); +PX_FORCE_INLINE void V4StoreU(const Vec4V a, PxF32* f); + +// BoolV +PX_FORCE_INLINE void BStoreA(const BoolV b, PxU32* f); + +// VecU32V +PX_FORCE_INLINE void U4StoreA(const VecU32V uv, PxU32* u); + +// VecI32V +PX_FORCE_INLINE void I4StoreA(const VecI32V iv, PxI32* i); + +////////////////////////////////////////////////////////////////// +// Test that simd types have elements in the floating point range +////////////////////////////////////////////////////////////////// + +// check for each component is valid ie in floating point range +PX_FORCE_INLINE bool isFiniteFloatV(const FloatV a); +// check for each component is valid ie in floating point range +PX_FORCE_INLINE bool isFiniteVec3V(const Vec3V a); +// check for each component is valid ie in floating point range +PX_FORCE_INLINE bool isFiniteVec4V(const Vec4V a); + +// Check that w-component is zero. +PX_FORCE_INLINE bool isValidVec3V(const Vec3V a); + +////////////////////////////////////////////////////////////////// +// Tests that all elements of two 16-byte types are completely equivalent. +// Use these tests for unit testing and asserts only. +////////////////////////////////////////////////////////////////// + +namespace _VecMathTests +{ +PX_FORCE_INLINE Vec3V getInvalidVec3V(); +PX_FORCE_INLINE bool allElementsEqualFloatV(const FloatV a, const FloatV b); +PX_FORCE_INLINE bool allElementsEqualVec3V(const Vec3V a, const Vec3V b); +PX_FORCE_INLINE bool allElementsEqualVec4V(const Vec4V a, const Vec4V b); +PX_FORCE_INLINE bool allElementsEqualBoolV(const BoolV a, const BoolV b); +PX_FORCE_INLINE bool allElementsEqualVecU32V(const VecU32V a, const VecU32V b); +PX_FORCE_INLINE bool allElementsEqualVecI32V(const VecI32V a, const VecI32V b); + +PX_FORCE_INLINE bool allElementsEqualMat33V(const Mat33V& a, const Mat33V& b) +{ + return (allElementsEqualVec3V(a.col0, b.col0) && allElementsEqualVec3V(a.col1, b.col1) && + allElementsEqualVec3V(a.col2, b.col2)); +} +PX_FORCE_INLINE bool allElementsEqualMat34V(const Mat34V& a, const Mat34V& b) +{ + return (allElementsEqualVec3V(a.col0, b.col0) && allElementsEqualVec3V(a.col1, b.col1) && + allElementsEqualVec3V(a.col2, b.col2) && allElementsEqualVec3V(a.col3, b.col3)); +} +PX_FORCE_INLINE bool allElementsEqualMat44V(const Mat44V& a, const Mat44V& b) +{ + return (allElementsEqualVec4V(a.col0, b.col0) && allElementsEqualVec4V(a.col1, b.col1) && + allElementsEqualVec4V(a.col2, b.col2) && allElementsEqualVec4V(a.col3, b.col3)); +} + +PX_FORCE_INLINE bool allElementsNearEqualFloatV(const FloatV a, const FloatV b); +PX_FORCE_INLINE bool allElementsNearEqualVec3V(const Vec3V a, const Vec3V b); +PX_FORCE_INLINE bool allElementsNearEqualVec4V(const Vec4V a, const Vec4V b); +PX_FORCE_INLINE bool allElementsNearEqualMat33V(const Mat33V& a, const Mat33V& b) +{ + return (allElementsNearEqualVec3V(a.col0, b.col0) && allElementsNearEqualVec3V(a.col1, b.col1) && + allElementsNearEqualVec3V(a.col2, b.col2)); +} +PX_FORCE_INLINE bool allElementsNearEqualMat34V(const Mat34V& a, const Mat34V& b) +{ + return (allElementsNearEqualVec3V(a.col0, b.col0) && allElementsNearEqualVec3V(a.col1, b.col1) && + allElementsNearEqualVec3V(a.col2, b.col2) && allElementsNearEqualVec3V(a.col3, b.col3)); +} +PX_FORCE_INLINE bool allElementsNearEqualMat44V(const Mat44V& a, const Mat44V& b) +{ + return (allElementsNearEqualVec4V(a.col0, b.col0) && allElementsNearEqualVec4V(a.col1, b.col1) && + allElementsNearEqualVec4V(a.col2, b.col2) && allElementsNearEqualVec4V(a.col3, b.col3)); +} +} + +////////////////////////////////////////////////////////////////// +// Math operations on FloatV +////////////////////////////////////////////////////////////////// + +//(0,0,0,0) +PX_FORCE_INLINE FloatV FZero(); +//(1,1,1,1) +PX_FORCE_INLINE FloatV FOne(); +//(0.5,0.5,0.5,0.5) +PX_FORCE_INLINE FloatV FHalf(); +//(PX_EPS_REAL,PX_EPS_REAL,PX_EPS_REAL,PX_EPS_REAL) +PX_FORCE_INLINE FloatV FEps(); +//(PX_MAX_REAL, PX_MAX_REAL, PX_MAX_REAL PX_MAX_REAL) +PX_FORCE_INLINE FloatV FMax(); +//(-PX_MAX_REAL, -PX_MAX_REAL, -PX_MAX_REAL -PX_MAX_REAL) +PX_FORCE_INLINE FloatV FNegMax(); +//(1e-6f, 1e-6f, 1e-6f, 1e-6f) +PX_FORCE_INLINE FloatV FEps6(); +//((PxF32*)&1, (PxF32*)&1, (PxF32*)&1, (PxF32*)&1) + +//-f (per component) +PX_FORCE_INLINE FloatV FNeg(const FloatV f); +// a+b (per component) +PX_FORCE_INLINE FloatV FAdd(const FloatV a, const FloatV b); +// a-b (per component) +PX_FORCE_INLINE FloatV FSub(const FloatV a, const FloatV b); +// a*b (per component) +PX_FORCE_INLINE FloatV FMul(const FloatV a, const FloatV b); +// a/b (per component) +PX_FORCE_INLINE FloatV FDiv(const FloatV a, const FloatV b); +// a/b (per component) +PX_FORCE_INLINE FloatV FDivFast(const FloatV a, const FloatV b); +// 1.0f/a +PX_FORCE_INLINE FloatV FRecip(const FloatV a); +// 1.0f/a +PX_FORCE_INLINE FloatV FRecipFast(const FloatV a); +// 1.0f/sqrt(a) +PX_FORCE_INLINE FloatV FRsqrt(const FloatV a); +// 1.0f/sqrt(a) +PX_FORCE_INLINE FloatV FRsqrtFast(const FloatV a); +// sqrt(a) +PX_FORCE_INLINE FloatV FSqrt(const FloatV a); +// a*b+c +PX_FORCE_INLINE FloatV FScaleAdd(const FloatV a, const FloatV b, const FloatV c); +// c-a*b +PX_FORCE_INLINE FloatV FNegScaleSub(const FloatV a, const FloatV b, const FloatV c); +// fabs(a) +PX_FORCE_INLINE FloatV FAbs(const FloatV a); +// c ? a : b (per component) +PX_FORCE_INLINE FloatV FSel(const BoolV c, const FloatV a, const FloatV b); +// a>b (per component) +PX_FORCE_INLINE BoolV FIsGrtr(const FloatV a, const FloatV b); +// a>=b (per component) +PX_FORCE_INLINE BoolV FIsGrtrOrEq(const FloatV a, const FloatV b); +// a==b (per component) +PX_FORCE_INLINE BoolV FIsEq(const FloatV a, const FloatV b); +// Max(a,b) (per component) +PX_FORCE_INLINE FloatV FMax(const FloatV a, const FloatV b); +// Min(a,b) (per component) +PX_FORCE_INLINE FloatV FMin(const FloatV a, const FloatV b); +// Clamp(a,b) (per component) +PX_FORCE_INLINE FloatV FClamp(const FloatV a, const FloatV minV, const FloatV maxV); + +// a.x>b.x +PX_FORCE_INLINE PxU32 FAllGrtr(const FloatV a, const FloatV b); +// a.x>=b.x +PX_FORCE_INLINE PxU32 FAllGrtrOrEq(const FloatV a, const FloatV b); +// a.x==b.x +PX_FORCE_INLINE PxU32 FAllEq(const FloatV a, const FloatV b); +// a<min || a>max +PX_FORCE_INLINE PxU32 FOutOfBounds(const FloatV a, const FloatV min, const FloatV max); +// a>=min && a<=max +PX_FORCE_INLINE PxU32 FInBounds(const FloatV a, const FloatV min, const FloatV max); +// a<-bounds || a>bounds +PX_FORCE_INLINE PxU32 FOutOfBounds(const FloatV a, const FloatV bounds); +// a>=-bounds && a<=bounds +PX_FORCE_INLINE PxU32 FInBounds(const FloatV a, const FloatV bounds); + +// round float a to the near int +PX_FORCE_INLINE FloatV FRound(const FloatV a); +// calculate the sin of float a +PX_FORCE_INLINE FloatV FSin(const FloatV a); +// calculate the cos of float b +PX_FORCE_INLINE FloatV FCos(const FloatV a); + +////////////////////////////////////////////////////////////////// +// Math operations on Vec3V +////////////////////////////////////////////////////////////////// + +//(f,f,f,f) +PX_FORCE_INLINE Vec3V V3Splat(const FloatV f); + +//(x,y,z) +PX_FORCE_INLINE Vec3V V3Merge(const FloatVArg x, const FloatVArg y, const FloatVArg z); + +//(1,0,0,0) +PX_FORCE_INLINE Vec3V V3UnitX(); +//(0,1,0,0) +PX_FORCE_INLINE Vec3V V3UnitY(); +//(0,0,1,0) +PX_FORCE_INLINE Vec3V V3UnitZ(); + +//(f.x,f.x,f.x,f.x) +PX_FORCE_INLINE FloatV V3GetX(const Vec3V f); +//(f.y,f.y,f.y,f.y) +PX_FORCE_INLINE FloatV V3GetY(const Vec3V f); +//(f.z,f.z,f.z,f.z) +PX_FORCE_INLINE FloatV V3GetZ(const Vec3V f); + +//(f,v.y,v.z,v.w) +PX_FORCE_INLINE Vec3V V3SetX(const Vec3V v, const FloatV f); +//(v.x,f,v.z,v.w) +PX_FORCE_INLINE Vec3V V3SetY(const Vec3V v, const FloatV f); +//(v.x,v.y,f,v.w) +PX_FORCE_INLINE Vec3V V3SetZ(const Vec3V v, const FloatV f); + +// v.x=f +PX_FORCE_INLINE void V3WriteX(Vec3V& v, const PxF32 f); +// v.y=f +PX_FORCE_INLINE void V3WriteY(Vec3V& v, const PxF32 f); +// v.z=f +PX_FORCE_INLINE void V3WriteZ(Vec3V& v, const PxF32 f); +// v.x=f.x, v.y=f.y, v.z=f.z +PX_FORCE_INLINE void V3WriteXYZ(Vec3V& v, const PxVec3& f); +// return v.x +PX_FORCE_INLINE PxF32 V3ReadX(const Vec3V& v); +// return v.y +PX_FORCE_INLINE PxF32 V3ReadY(const Vec3V& v); +// return v.y +PX_FORCE_INLINE PxF32 V3ReadZ(const Vec3V& v); +// return (v.x,v.y,v.z) +PX_FORCE_INLINE const PxVec3& V3ReadXYZ(const Vec3V& v); + +//(a.x, b.x, c.x) +PX_FORCE_INLINE Vec3V V3ColX(const Vec3V a, const Vec3V b, const Vec3V c); +//(a.y, b.y, c.y) +PX_FORCE_INLINE Vec3V V3ColY(const Vec3V a, const Vec3V b, const Vec3V c); +//(a.z, b.z, c.z) +PX_FORCE_INLINE Vec3V V3ColZ(const Vec3V a, const Vec3V b, const Vec3V c); + +//(0,0,0,0) +PX_FORCE_INLINE Vec3V V3Zero(); +//(1,1,1,1) +PX_FORCE_INLINE Vec3V V3One(); +//(PX_EPS_REAL,PX_EPS_REAL,PX_EPS_REAL,PX_EPS_REAL) +PX_FORCE_INLINE Vec3V V3Eps(); +//-c (per component) +PX_FORCE_INLINE Vec3V V3Neg(const Vec3V c); +// a+b (per component) +PX_FORCE_INLINE Vec3V V3Add(const Vec3V a, const Vec3V b); +// a-b (per component) +PX_FORCE_INLINE Vec3V V3Sub(const Vec3V a, const Vec3V b); +// a*b (per component) +PX_FORCE_INLINE Vec3V V3Scale(const Vec3V a, const FloatV b); +// a*b (per component) +PX_FORCE_INLINE Vec3V V3Mul(const Vec3V a, const Vec3V b); +// a/b (per component) +PX_FORCE_INLINE Vec3V V3ScaleInv(const Vec3V a, const FloatV b); +// a/b (per component) +PX_FORCE_INLINE Vec3V V3Div(const Vec3V a, const Vec3V b); +// a/b (per component) +PX_FORCE_INLINE Vec3V V3ScaleInvFast(const Vec3V a, const FloatV b); +// a/b (per component) +PX_FORCE_INLINE Vec3V V3DivFast(const Vec3V a, const Vec3V b); +// 1.0f/a +PX_FORCE_INLINE Vec3V V3Recip(const Vec3V a); +// 1.0f/a +PX_FORCE_INLINE Vec3V V3RecipFast(const Vec3V a); +// 1.0f/sqrt(a) +PX_FORCE_INLINE Vec3V V3Rsqrt(const Vec3V a); +// 1.0f/sqrt(a) +PX_FORCE_INLINE Vec3V V3RsqrtFast(const Vec3V a); +// a*b+c +PX_FORCE_INLINE Vec3V V3ScaleAdd(const Vec3V a, const FloatV b, const Vec3V c); +// c-a*b +PX_FORCE_INLINE Vec3V V3NegScaleSub(const Vec3V a, const FloatV b, const Vec3V c); +// a*b+c +PX_FORCE_INLINE Vec3V V3MulAdd(const Vec3V a, const Vec3V b, const Vec3V c); +// c-a*b +PX_FORCE_INLINE Vec3V V3NegMulSub(const Vec3V a, const Vec3V b, const Vec3V c); +// fabs(a) +PX_FORCE_INLINE Vec3V V3Abs(const Vec3V a); + +// a.b +// Note: a.w and b.w must have value zero +PX_FORCE_INLINE FloatV V3Dot(const Vec3V a, const Vec3V b); +// aXb +// Note: a.w and b.w must have value zero +PX_FORCE_INLINE Vec3V V3Cross(const Vec3V a, const Vec3V b); +// |a.a|^1/2 +// Note: a.w must have value zero +PX_FORCE_INLINE FloatV V3Length(const Vec3V a); +// a.a +// Note: a.w must have value zero +PX_FORCE_INLINE FloatV V3LengthSq(const Vec3V a); +// a*|a.a|^-1/2 +// Note: a.w must have value zero +PX_FORCE_INLINE Vec3V V3Normalize(const Vec3V a); +// a.a>0 ? a*|a.a|^-1/2 : (0,0,0,0) +// Note: a.w must have value zero +PX_FORCE_INLINE FloatV V3Length(const Vec3V a); +// a.a>0 ? a*|a.a|^-1/2 : unsafeReturnValue +// Note: a.w must have value zero +PX_FORCE_INLINE Vec3V V3NormalizeSafe(const Vec3V a, const Vec3V unsafeReturnValue); +// a.x + a.y + a.z +// Note: a.w must have value zero +PX_FORCE_INLINE FloatV V3SumElems(const Vec3V a); + +// c ? a : b (per component) +PX_FORCE_INLINE Vec3V V3Sel(const BoolV c, const Vec3V a, const Vec3V b); +// a>b (per component) +PX_FORCE_INLINE BoolV V3IsGrtr(const Vec3V a, const Vec3V b); +// a>=b (per component) +PX_FORCE_INLINE BoolV V3IsGrtrOrEq(const Vec3V a, const Vec3V b); +// a==b (per component) +PX_FORCE_INLINE BoolV V3IsEq(const Vec3V a, const Vec3V b); +// Max(a,b) (per component) +PX_FORCE_INLINE Vec3V V3Max(const Vec3V a, const Vec3V b); +// Min(a,b) (per component) +PX_FORCE_INLINE Vec3V V3Min(const Vec3V a, const Vec3V b); + +// Extract the maximum value from a +// Note: a.w must have value zero +PX_FORCE_INLINE FloatV V3ExtractMax(const Vec3V a); + +// Extract the minimum value from a +// Note: a.w must have value zero +PX_FORCE_INLINE FloatV V3ExtractMin(const Vec3V a); + +// Clamp(a,b) (per component) +PX_FORCE_INLINE Vec3V V3Clamp(const Vec3V a, const Vec3V minV, const Vec3V maxV); + +// Extract the sign for each component +PX_FORCE_INLINE Vec3V V3Sign(const Vec3V a); + +// Test all components. +// (a.x>b.x && a.y>b.y && a.z>b.z) +// Note: a.w and b.w must have value zero +PX_FORCE_INLINE PxU32 V3AllGrtr(const Vec3V a, const Vec3V b); +// (a.x>=b.x && a.y>=b.y && a.z>=b.z) +// Note: a.w and b.w must have value zero +PX_FORCE_INLINE PxU32 V3AllGrtrOrEq(const Vec3V a, const Vec3V b); +// (a.x==b.x && a.y==b.y && a.z==b.z) +// Note: a.w and b.w must have value zero +PX_FORCE_INLINE PxU32 V3AllEq(const Vec3V a, const Vec3V b); +// a.x<min.x || a.y<min.y || a.z<min.z || a.x>max.x || a.y>max.y || a.z>max.z +// Note: a.w and min.w and max.w must have value zero +PX_FORCE_INLINE PxU32 V3OutOfBounds(const Vec3V a, const Vec3V min, const Vec3V max); +// a.x>=min.x && a.y>=min.y && a.z>=min.z && a.x<=max.x && a.y<=max.y && a.z<=max.z +// Note: a.w and min.w and max.w must have value zero +PX_FORCE_INLINE PxU32 V3InBounds(const Vec3V a, const Vec3V min, const Vec3V max); +// a.x<-bounds.x || a.y<=-bounds.y || a.z<bounds.z || a.x>bounds.x || a.y>bounds.y || a.z>bounds.z +// Note: a.w and bounds.w must have value zero +PX_FORCE_INLINE PxU32 V3OutOfBounds(const Vec3V a, const Vec3V bounds); +// a.x>=-bounds.x && a.y>=-bounds.y && a.z>=-bounds.z && a.x<=bounds.x && a.y<=bounds.y && a.z<=bounds.z +// Note: a.w and bounds.w must have value zero +PX_FORCE_INLINE PxU32 V3InBounds(const Vec3V a, const Vec3V bounds); + +//(floor(a.x + 0.5f), floor(a.y + 0.5f), floor(a.z + 0.5f)) +PX_FORCE_INLINE Vec3V V3Round(const Vec3V a); + +//(sinf(a.x), sinf(a.y), sinf(a.z)) +PX_FORCE_INLINE Vec3V V3Sin(const Vec3V a); +//(cosf(a.x), cosf(a.y), cosf(a.z)) +PX_FORCE_INLINE Vec3V V3Cos(const Vec3V a); + +//(a.y,a.z,a.z) +PX_FORCE_INLINE Vec3V V3PermYZZ(const Vec3V a); +//(a.x,a.y,a.x) +PX_FORCE_INLINE Vec3V V3PermXYX(const Vec3V a); +//(a.y,a.z,a.x) +PX_FORCE_INLINE Vec3V V3PermYZX(const Vec3V a); +//(a.z, a.x, a.y) +PX_FORCE_INLINE Vec3V V3PermZXY(const Vec3V a); +//(a.z,a.z,a.y) +PX_FORCE_INLINE Vec3V V3PermZZY(const Vec3V a); +//(a.y,a.x,a.x) +PX_FORCE_INLINE Vec3V V3PermYXX(const Vec3V a); +//(0, v1.z, v0.y) +PX_FORCE_INLINE Vec3V V3Perm_Zero_1Z_0Y(const Vec3V v0, const Vec3V v1); +//(v0.z, 0, v1.x) +PX_FORCE_INLINE Vec3V V3Perm_0Z_Zero_1X(const Vec3V v0, const Vec3V v1); +//(v1.y, v0.x, 0) +PX_FORCE_INLINE Vec3V V3Perm_1Y_0X_Zero(const Vec3V v0, const Vec3V v1); + +// Transpose 3 Vec3Vs inplace. Sets the w component to zero +// [ x0, y0, z0, w0] [ x1, y1, z1, w1] [ x2, y2, z2, w2] -> [x0 x1 x2 0] [y0 y1 y2 0] [z0 z1 z2 0] +PX_FORCE_INLINE void V3Transpose(Vec3V& col0, Vec3V& col1, Vec3V& col2); + +////////////////////////////////////////////////////////////////// +// Math operations on Vec4V +////////////////////////////////////////////////////////////////// + +//(f,f,f,f) +PX_FORCE_INLINE Vec4V V4Splat(const FloatV f); + +//(f[0],f[1],f[2],f[3]) +PX_FORCE_INLINE Vec4V V4Merge(const FloatV* const f); +//(x,y,z,w) +PX_FORCE_INLINE Vec4V V4Merge(const FloatVArg x, const FloatVArg y, const FloatVArg z, const FloatVArg w); +//(x.w, y.w, z.w, w.w) +PX_FORCE_INLINE Vec4V V4MergeW(const Vec4VArg x, const Vec4VArg y, const Vec4VArg z, const Vec4VArg w); +//(x.z, y.z, z.z, w.z) +PX_FORCE_INLINE Vec4V V4MergeZ(const Vec4VArg x, const Vec4VArg y, const Vec4VArg z, const Vec4VArg w); +//(x.y, y.y, z.y, w.y) +PX_FORCE_INLINE Vec4V V4MergeY(const Vec4VArg x, const Vec4VArg y, const Vec4VArg z, const Vec4VArg w); +//(x.x, y.x, z.x, w.x) +PX_FORCE_INLINE Vec4V V4MergeX(const Vec4VArg x, const Vec4VArg y, const Vec4VArg z, const Vec4VArg w); + +//(a.x, b.x, a.y, b.y) +PX_FORCE_INLINE Vec4V V4UnpackXY(const Vec4VArg a, const Vec4VArg b); +//(a.z, b.z, a.w, b.w) +PX_FORCE_INLINE Vec4V V4UnpackZW(const Vec4VArg a, const Vec4VArg b); + +//(1,0,0,0) +PX_FORCE_INLINE Vec4V V4UnitW(); +//(0,1,0,0) +PX_FORCE_INLINE Vec4V V4UnitY(); +//(0,0,1,0) +PX_FORCE_INLINE Vec4V V4UnitZ(); +//(0,0,0,1) +PX_FORCE_INLINE Vec4V V4UnitW(); + +//(f.x,f.x,f.x,f.x) +PX_FORCE_INLINE FloatV V4GetX(const Vec4V f); +//(f.y,f.y,f.y,f.y) +PX_FORCE_INLINE FloatV V4GetY(const Vec4V f); +//(f.z,f.z,f.z,f.z) +PX_FORCE_INLINE FloatV V4GetZ(const Vec4V f); +//(f.w,f.w,f.w,f.w) +PX_FORCE_INLINE FloatV V4GetW(const Vec4V f); + +//(f,v.y,v.z,v.w) +PX_FORCE_INLINE Vec4V V4SetX(const Vec4V v, const FloatV f); +//(v.x,f,v.z,v.w) +PX_FORCE_INLINE Vec4V V4SetY(const Vec4V v, const FloatV f); +//(v.x,v.y,f,v.w) +PX_FORCE_INLINE Vec4V V4SetZ(const Vec4V v, const FloatV f); +//(v.x,v.y,v.z,f) +PX_FORCE_INLINE Vec4V V4SetW(const Vec4V v, const FloatV f); + +//(v.x,v.y,v.z,0) +PX_FORCE_INLINE Vec4V V4ClearW(const Vec4V v); + +//(a[elementIndex], a[elementIndex], a[elementIndex], a[elementIndex]) +template <int elementIndex> +PX_FORCE_INLINE Vec4V V4SplatElement(Vec4V a); + +// v.x=f +PX_FORCE_INLINE void V4WriteX(Vec4V& v, const PxF32 f); +// v.y=f +PX_FORCE_INLINE void V4WriteY(Vec4V& v, const PxF32 f); +// v.z=f +PX_FORCE_INLINE void V4WriteZ(Vec4V& v, const PxF32 f); +// v.w=f +PX_FORCE_INLINE void V4WriteW(Vec4V& v, const PxF32 f); +// v.x=f.x, v.y=f.y, v.z=f.z +PX_FORCE_INLINE void V4WriteXYZ(Vec4V& v, const PxVec3& f); +// return v.x +PX_FORCE_INLINE PxF32 V4ReadX(const Vec4V& v); +// return v.y +PX_FORCE_INLINE PxF32 V4ReadY(const Vec4V& v); +// return v.z +PX_FORCE_INLINE PxF32 V4ReadZ(const Vec4V& v); +// return v.w +PX_FORCE_INLINE PxF32 V4ReadW(const Vec4V& v); +// return (v.x,v.y,v.z) +PX_FORCE_INLINE const PxVec3& V4ReadXYZ(const Vec4V& v); + +//(0,0,0,0) +PX_FORCE_INLINE Vec4V V4Zero(); +//(1,1,1,1) +PX_FORCE_INLINE Vec4V V4One(); +//(PX_EPS_REAL,PX_EPS_REAL,PX_EPS_REAL,PX_EPS_REAL) +PX_FORCE_INLINE Vec4V V4Eps(); + +//-c (per component) +PX_FORCE_INLINE Vec4V V4Neg(const Vec4V c); +// a+b (per component) +PX_FORCE_INLINE Vec4V V4Add(const Vec4V a, const Vec4V b); +// a-b (per component) +PX_FORCE_INLINE Vec4V V4Sub(const Vec4V a, const Vec4V b); +// a*b (per component) +PX_FORCE_INLINE Vec4V V4Scale(const Vec4V a, const FloatV b); +// a*b (per component) +PX_FORCE_INLINE Vec4V V4Mul(const Vec4V a, const Vec4V b); +// a/b (per component) +PX_FORCE_INLINE Vec4V V4ScaleInv(const Vec4V a, const FloatV b); +// a/b (per component) +PX_FORCE_INLINE Vec4V V4Div(const Vec4V a, const Vec4V b); +// a/b (per component) +PX_FORCE_INLINE Vec4V V4ScaleInvFast(const Vec4V a, const FloatV b); +// a/b (per component) +PX_FORCE_INLINE Vec4V V4DivFast(const Vec4V a, const Vec4V b); +// 1.0f/a +PX_FORCE_INLINE Vec4V V4Recip(const Vec4V a); +// 1.0f/a +PX_FORCE_INLINE Vec4V V4RecipFast(const Vec4V a); +// 1.0f/sqrt(a) +PX_FORCE_INLINE Vec4V V4Rsqrt(const Vec4V a); +// 1.0f/sqrt(a) +PX_FORCE_INLINE Vec4V V4RsqrtFast(const Vec4V a); +// a*b+c +PX_FORCE_INLINE Vec4V V4ScaleAdd(const Vec4V a, const FloatV b, const Vec4V c); +// c-a*b +PX_FORCE_INLINE Vec4V V4NegScaleSub(const Vec4V a, const FloatV b, const Vec4V c); +// a*b+c +PX_FORCE_INLINE Vec4V V4MulAdd(const Vec4V a, const Vec4V b, const Vec4V c); +// c-a*b +PX_FORCE_INLINE Vec4V V4NegMulSub(const Vec4V a, const Vec4V b, const Vec4V c); + +// fabs(a) +PX_FORCE_INLINE Vec4V V4Abs(const Vec4V a); +// bitwise a & ~b +PX_FORCE_INLINE Vec4V V4Andc(const Vec4V a, const VecU32V b); + +// a.b (W is taken into account) +PX_FORCE_INLINE FloatV V4Dot(const Vec4V a, const Vec4V b); +// a.b (same computation as V3Dot. W is ignored in input) +PX_FORCE_INLINE FloatV V4Dot3(const Vec4V a, const Vec4V b); +// aXb (same computation as V3Cross. W is ignored in input and undefined in output) +PX_FORCE_INLINE Vec4V V4Cross(const Vec4V a, const Vec4V b); + +//|a.a|^1/2 +PX_FORCE_INLINE FloatV V4Length(const Vec4V a); +// a.a +PX_FORCE_INLINE FloatV V4LengthSq(const Vec4V a); + +// a*|a.a|^-1/2 +PX_FORCE_INLINE Vec4V V4Normalize(const Vec4V a); +// a.a>0 ? a*|a.a|^-1/2 : unsafeReturnValue +PX_FORCE_INLINE Vec4V V4NormalizeSafe(const Vec4V a, const Vec4V unsafeReturnValue); +// a*|a.a|^-1/2 +PX_FORCE_INLINE Vec4V V4NormalizeFast(const Vec4V a); + +// c ? a : b (per component) +PX_FORCE_INLINE Vec4V V4Sel(const BoolV c, const Vec4V a, const Vec4V b); +// a>b (per component) +PX_FORCE_INLINE BoolV V4IsGrtr(const Vec4V a, const Vec4V b); +// a>=b (per component) +PX_FORCE_INLINE BoolV V4IsGrtrOrEq(const Vec4V a, const Vec4V b); +// a==b (per component) +PX_FORCE_INLINE BoolV V4IsEq(const Vec4V a, const Vec4V b); +// Max(a,b) (per component) +PX_FORCE_INLINE Vec4V V4Max(const Vec4V a, const Vec4V b); +// Min(a,b) (per component) +PX_FORCE_INLINE Vec4V V4Min(const Vec4V a, const Vec4V b); +// Get the maximum component from a +PX_FORCE_INLINE FloatV V4ExtractMax(const Vec4V a); +// Get the minimum component from a +PX_FORCE_INLINE FloatV V4ExtractMin(const Vec4V a); + +// Clamp(a,b) (per component) +PX_FORCE_INLINE Vec4V V4Clamp(const Vec4V a, const Vec4V minV, const Vec4V maxV); + +// return 1 if all components of a are greater than all components of b. +PX_FORCE_INLINE PxU32 V4AllGrtr(const Vec4V a, const Vec4V b); +// return 1 if all components of a are greater than or equal to all components of b +PX_FORCE_INLINE PxU32 V4AllGrtrOrEq(const Vec4V a, const Vec4V b); +// return 1 if XYZ components of a are greater than or equal to XYZ components of b. W is ignored. +PX_FORCE_INLINE PxU32 V4AllGrtrOrEq3(const Vec4V a, const Vec4V b); +// return 1 if all components of a are equal to all components of b +PX_FORCE_INLINE PxU32 V4AllEq(const Vec4V a, const Vec4V b); +// return 1 if any XYZ component of a is greater than the corresponding component of b. W is ignored. +PX_FORCE_INLINE PxU32 V4AnyGrtr3(const Vec4V a, const Vec4V b); + +// round(a)(per component) +PX_FORCE_INLINE Vec4V V4Round(const Vec4V a); +// sin(a) (per component) +PX_FORCE_INLINE Vec4V V4Sin(const Vec4V a); +// cos(a) (per component) +PX_FORCE_INLINE Vec4V V4Cos(const Vec4V a); + +// Permute v into a new vec4v with YXWZ format +PX_FORCE_INLINE Vec4V V4PermYXWZ(const Vec4V v); +// Permute v into a new vec4v with XZXZ format +PX_FORCE_INLINE Vec4V V4PermXZXZ(const Vec4V v); +// Permute v into a new vec4v with YWYW format +PX_FORCE_INLINE Vec4V V4PermYWYW(const Vec4V v); +// Permute v into a new vec4v with YZXW format +PX_FORCE_INLINE Vec4V V4PermYZXW(const Vec4V v); + +// Permute v into a new vec4v with format {a[x], a[y], a[z], a[w]} +// V4Perm<1,3,1,3> is equal to V4PermYWYW +// V4Perm<0,2,0,2> is equal to V4PermXZXZ +// V3Perm<1,0,3,2> is equal to V4PermYXWZ +template <PxU8 x, PxU8 y, PxU8 z, PxU8 w> +PX_FORCE_INLINE Vec4V V4Perm(const Vec4V a); + +// Transpose 4 Vec4Vs inplace. +// [ x0, y0, z0, w0] [ x1, y1, z1, w1] [ x2, y2, z2, w2] [ x3, y3, z3, w3] -> +// [ x0, x1, x2, x3] [ y0, y1, y2, y3] [ z0, z1, z2, z3] [ w0, w1, w2, w3] +PX_FORCE_INLINE void V3Transpose(Vec3V& col0, Vec3V& col1, Vec3V& col2); + +// q = cos(a/2) + u*sin(a/2) +PX_FORCE_INLINE QuatV QuatV_From_RotationAxisAngle(const Vec3V u, const FloatV a); +// convert q to a unit quaternion +PX_FORCE_INLINE QuatV QuatNormalize(const QuatV q); +//|q.q|^1/2 +PX_FORCE_INLINE FloatV QuatLength(const QuatV q); +// q.q +PX_FORCE_INLINE FloatV QuatLengthSq(const QuatV q); +// a.b +PX_FORCE_INLINE FloatV QuatDot(const QuatV a, const QuatV b); +//(-q.x, -q.y, -q.z, q.w) +PX_FORCE_INLINE QuatV QuatConjugate(const QuatV q); +//(q.x, q.y, q.z) +PX_FORCE_INLINE Vec3V QuatGetImaginaryPart(const QuatV q); +// convert quaternion to matrix 33 +PX_FORCE_INLINE Mat33V QuatGetMat33V(const QuatVArg q); +// convert quaternion to matrix 33 +PX_FORCE_INLINE void QuatGetMat33V(const QuatVArg q, Vec3V& column0, Vec3V& column1, Vec3V& column2); +// convert matrix 33 to quaternion +PX_FORCE_INLINE QuatV Mat33GetQuatV(const Mat33V& a); +// brief computes rotation of x-axis +PX_FORCE_INLINE Vec3V QuatGetBasisVector0(const QuatV q); +// brief computes rotation of y-axis +PX_FORCE_INLINE Vec3V QuatGetBasisVector1(const QuatV q); +// brief computes rotation of z-axis +PX_FORCE_INLINE Vec3V QuatGetBasisVector2(const QuatV q); +// calculate the rotation vector from q and v +PX_FORCE_INLINE Vec3V QuatRotate(const QuatV q, const Vec3V v); +// calculate the rotation vector from the conjugate quaternion and v +PX_FORCE_INLINE Vec3V QuatRotateInv(const QuatV q, const Vec3V v); +// quaternion multiplication +PX_FORCE_INLINE QuatV QuatMul(const QuatV a, const QuatV b); +// quaternion add +PX_FORCE_INLINE QuatV QuatAdd(const QuatV a, const QuatV b); +// (-q.x, -q.y, -q.z, -q.w) +PX_FORCE_INLINE QuatV QuatNeg(const QuatV q); +// (a.x - b.x, a.y-b.y, a.z-b.z, a.w-b.w ) +PX_FORCE_INLINE QuatV QuatSub(const QuatV a, const QuatV b); +// (a.x*b, a.y*b, a.z*b, a.w*b) +PX_FORCE_INLINE QuatV QuatScale(const QuatV a, const FloatV b); +// (x = v[0], y = v[1], z = v[2], w =v[3]) +PX_FORCE_INLINE QuatV QuatMerge(const FloatV* const v); +// (x = v[0], y = v[1], z = v[2], w =v[3]) +PX_FORCE_INLINE QuatV QuatMerge(const FloatVArg x, const FloatVArg y, const FloatVArg z, const FloatVArg w); +// (x = 0.f, y = 0.f, z = 0.f, w = 1.f) +PX_FORCE_INLINE QuatV QuatIdentity(); +// check for each component is valid +PX_FORCE_INLINE bool isFiniteQuatV(const QuatV q); +// check for each component is valid +PX_FORCE_INLINE bool isValidQuatV(const QuatV q); +// check for each component is valid +PX_FORCE_INLINE bool isSaneQuatV(const QuatV q); + +// Math operations on 16-byte aligned booleans. +// x=false y=false z=false w=false +PX_FORCE_INLINE BoolV BFFFF(); +// x=false y=false z=false w=true +PX_FORCE_INLINE BoolV BFFFT(); +// x=false y=false z=true w=false +PX_FORCE_INLINE BoolV BFFTF(); +// x=false y=false z=true w=true +PX_FORCE_INLINE BoolV BFFTT(); +// x=false y=true z=false w=false +PX_FORCE_INLINE BoolV BFTFF(); +// x=false y=true z=false w=true +PX_FORCE_INLINE BoolV BFTFT(); +// x=false y=true z=true w=false +PX_FORCE_INLINE BoolV BFTTF(); +// x=false y=true z=true w=true +PX_FORCE_INLINE BoolV BFTTT(); +// x=true y=false z=false w=false +PX_FORCE_INLINE BoolV BTFFF(); +// x=true y=false z=false w=true +PX_FORCE_INLINE BoolV BTFFT(); +// x=true y=false z=true w=false +PX_FORCE_INLINE BoolV BTFTF(); +// x=true y=false z=true w=true +PX_FORCE_INLINE BoolV BTFTT(); +// x=true y=true z=false w=false +PX_FORCE_INLINE BoolV BTTFF(); +// x=true y=true z=false w=true +PX_FORCE_INLINE BoolV BTTFT(); +// x=true y=true z=true w=false +PX_FORCE_INLINE BoolV BTTTF(); +// x=true y=true z=true w=true +PX_FORCE_INLINE BoolV BTTTT(); + +// x=false y=false z=false w=true +PX_FORCE_INLINE BoolV BWMask(); +// x=true y=false z=false w=false +PX_FORCE_INLINE BoolV BXMask(); +// x=false y=true z=false w=false +PX_FORCE_INLINE BoolV BYMask(); +// x=false y=false z=true w=false +PX_FORCE_INLINE BoolV BZMask(); + +// get x component +PX_FORCE_INLINE BoolV BGetX(const BoolV f); +// get y component +PX_FORCE_INLINE BoolV BGetY(const BoolV f); +// get z component +PX_FORCE_INLINE BoolV BGetZ(const BoolV f); +// get w component +PX_FORCE_INLINE BoolV BGetW(const BoolV f); + +// Use elementIndex to splat xxxx or yyyy or zzzz or wwww +template <int elementIndex> +PX_FORCE_INLINE BoolV BSplatElement(Vec4V a); + +// component-wise && (AND) +PX_FORCE_INLINE BoolV BAnd(const BoolV a, const BoolV b); +// component-wise || (OR) +PX_FORCE_INLINE BoolV BOr(const BoolV a, const BoolV b); +// component-wise not +PX_FORCE_INLINE BoolV BNot(const BoolV a); + +// if all four components are true, return true, otherwise return false +PX_FORCE_INLINE BoolV BAllTrue4(const BoolV a); + +// if any four components is true, return true, otherwise return false +PX_FORCE_INLINE BoolV BAnyTrue4(const BoolV a); + +// if all three(0, 1, 2) components are true, return true, otherwise return false +PX_FORCE_INLINE BoolV BAllTrue3(const BoolV a); + +// if any three (0, 1, 2) components is true, return true, otherwise return false +PX_FORCE_INLINE BoolV BAnyTrue3(const BoolV a); + +// Return 1 if all components equal, zero otherwise. +PX_FORCE_INLINE PxU32 BAllEq(const BoolV a, const BoolV b); + +// Specialized/faster BAllEq function for b==TTTT +PX_FORCE_INLINE PxU32 BAllEqTTTT(const BoolV a); +// Specialized/faster BAllEq function for b==FFFF +PX_FORCE_INLINE PxU32 BAllEqFFFF(const BoolV a); + +/// Get BoolV as bits set in an PxU32. A bit in the output is set if the element is 'true' in the input. +/// There is a bit for each element in a, with element 0s value held in bit0, element 1 in bit 1s and so forth. +/// If nothing is true in the input it will return 0, and if all are true if will return 0xf. +/// NOTE! That performance of the function varies considerably by platform, thus it is recommended to use +/// where your algorithm really needs a BoolV in an integer variable. +PX_FORCE_INLINE PxU32 BGetBitMask(const BoolV a); + +// VecI32V stuff + +PX_FORCE_INLINE VecI32V VecI32V_Zero(); + +PX_FORCE_INLINE VecI32V VecI32V_One(); + +PX_FORCE_INLINE VecI32V VecI32V_Two(); + +PX_FORCE_INLINE VecI32V VecI32V_MinusOne(); + +// Compute a shift parameter for VecI32V_LeftShift and VecI32V_RightShift +// Each element of shift must be identical ie the vector must have form {count, count, count, count} with count>=0 +PX_FORCE_INLINE VecShiftV VecI32V_PrepareShift(const VecI32VArg shift); + +// Shift each element of a leftwards by the same amount +// Compute shift with VecI32V_PrepareShift +//{a.x<<shift[0], a.y<<shift[0], a.z<<shift[0], a.w<<shift[0]} +PX_FORCE_INLINE VecI32V VecI32V_LeftShift(const VecI32VArg a, const VecShiftVArg shift); + +// Shift each element of a rightwards by the same amount +// Compute shift with VecI32V_PrepareShift +//{a.x>>shift[0], a.y>>shift[0], a.z>>shift[0], a.w>>shift[0]} +PX_FORCE_INLINE VecI32V VecI32V_RightShift(const VecI32VArg a, const VecShiftVArg shift); + +PX_FORCE_INLINE VecI32V VecI32V_Add(const VecI32VArg a, const VecI32VArg b); + +PX_FORCE_INLINE VecI32V VecI32V_Or(const VecI32VArg a, const VecI32VArg b); + +PX_FORCE_INLINE VecI32V VecI32V_GetX(const VecI32VArg a); + +PX_FORCE_INLINE VecI32V VecI32V_GetY(const VecI32VArg a); + +PX_FORCE_INLINE VecI32V VecI32V_GetZ(const VecI32VArg a); + +PX_FORCE_INLINE VecI32V VecI32V_GetW(const VecI32VArg a); + +PX_FORCE_INLINE VecI32V VecI32V_Sub(const VecI32VArg a, const VecI32VArg b); + +PX_FORCE_INLINE BoolV VecI32V_IsGrtr(const VecI32VArg a, const VecI32VArg b); + +PX_FORCE_INLINE BoolV VecI32V_IsEq(const VecI32VArg a, const VecI32VArg b); + +PX_FORCE_INLINE VecI32V V4I32Sel(const BoolV c, const VecI32V a, const VecI32V b); + +// VecU32V stuff + +PX_FORCE_INLINE VecU32V U4Zero(); + +PX_FORCE_INLINE VecU32V U4One(); + +PX_FORCE_INLINE VecU32V U4Two(); + +PX_FORCE_INLINE BoolV V4IsEqU32(const VecU32V a, const VecU32V b); + +PX_FORCE_INLINE VecU32V V4U32Sel(const BoolV c, const VecU32V a, const VecU32V b); + +PX_FORCE_INLINE VecU32V V4U32or(VecU32V a, VecU32V b); + +PX_FORCE_INLINE VecU32V V4U32xor(VecU32V a, VecU32V b); + +PX_FORCE_INLINE VecU32V V4U32and(VecU32V a, VecU32V b); + +PX_FORCE_INLINE VecU32V V4U32Andc(VecU32V a, VecU32V b); + +// VecU32 - why does this not return a bool? +PX_FORCE_INLINE VecU32V V4IsGrtrV32u(const Vec4V a, const Vec4V b); + +// Math operations on 16-byte aligned Mat33s (represents any 3x3 matrix) +// a*b +PX_FORCE_INLINE Vec3V M33MulV3(const Mat33V& a, const Vec3V b); +// A*x + b +PX_FORCE_INLINE Vec3V M33MulV3AddV3(const Mat33V& A, const Vec3V b, const Vec3V c); +// transpose(a) * b +PX_FORCE_INLINE Vec3V M33TrnspsMulV3(const Mat33V& a, const Vec3V b); +// a*b +PX_FORCE_INLINE Mat33V M33MulM33(const Mat33V& a, const Mat33V& b); +// a+b +PX_FORCE_INLINE Mat33V M33Add(const Mat33V& a, const Mat33V& b); +// a+b +PX_FORCE_INLINE Mat33V M33Sub(const Mat33V& a, const Mat33V& b); +//-a +PX_FORCE_INLINE Mat33V M33Neg(const Mat33V& a); +// absolute value of the matrix +PX_FORCE_INLINE Mat33V M33Abs(const Mat33V& a); +// inverse mat +PX_FORCE_INLINE Mat33V M33Inverse(const Mat33V& a); +// transpose(a) +PX_FORCE_INLINE Mat33V M33Trnsps(const Mat33V& a); +// create an identity matrix +PX_FORCE_INLINE Mat33V M33Identity(); + +// create a vec3 to store the diagonal element of the M33 +PX_FORCE_INLINE Mat33V M33Diagonal(const Vec3VArg); + +// Not implemented +// return 1 if all components of a are equal to all components of b +// PX_FORCE_INLINE PxU32 V4U32AllEq(const VecU32V a, const VecU32V b); +// v.w=f +// PX_FORCE_INLINE void V3WriteW(Vec3V& v, const PxF32 f); +// PX_FORCE_INLINE PxF32 V3ReadW(const Vec3V& v); + +// Not used +// PX_FORCE_INLINE Vec4V V4LoadAligned(Vec4V* addr); +// PX_FORCE_INLINE Vec4V V4LoadUnaligned(Vec4V* addr); +// floor(a)(per component) +// PX_FORCE_INLINE Vec4V V4Floor(Vec4V a); +// ceil(a) (per component) +// PX_FORCE_INLINE Vec4V V4Ceil(Vec4V a); +// PX_FORCE_INLINE VecU32V V4ConvertToU32VSaturate(const Vec4V a, PxU32 power); + +// Math operations on 16-byte aligned Mat34s (represents transformation matrix - rotation and translation). +// namespace _Mat34V +//{ +// //a*b +// PX_FORCE_INLINE Vec3V multiplyV(const Mat34V& a, const Vec3V b); +// //a_rotation * b +// PX_FORCE_INLINE Vec3V multiply3X3V(const Mat34V& a, const Vec3V b); +// //transpose(a_rotation)*b +// PX_FORCE_INLINE Vec3V multiplyTranspose3X3V(const Mat34V& a, const Vec3V b); +// //a*b +// PX_FORCE_INLINE Mat34V multiplyV(const Mat34V& a, const Mat34V& b); +// //a_rotation*b +// PX_FORCE_INLINE Mat33V multiply3X3V(const Mat34V& a, const Mat33V& b); +// //a_rotation*b_rotation +// PX_FORCE_INLINE Mat33V multiply3X3V(const Mat34V& a, const Mat34V& b); +// //a+b +// PX_FORCE_INLINE Mat34V addV(const Mat34V& a, const Mat34V& b); +// //a^-1 +// PX_FORCE_INLINE Mat34V getInverseV(const Mat34V& a); +// //transpose(a_rotation) +// PX_FORCE_INLINE Mat33V getTranspose3X3(const Mat34V& a); +//}; //namespace _Mat34V + +// a*b +//#define M34MulV3(a,b) (M34MulV3(a,b)) +////a_rotation * b +//#define M34Mul33V3(a,b) (M34Mul33V3(a,b)) +////transpose(a_rotation)*b +//#define M34TrnspsMul33V3(a,b) (M34TrnspsMul33V3(a,b)) +////a*b +//#define M34MulM34(a,b) (_Mat34V::multiplyV(a,b)) +// a_rotation*b +//#define M34MulM33(a,b) (M34MulM33(a,b)) +// a_rotation*b_rotation +//#define M34Mul33MM34(a,b) (M34MulM33(a,b)) +// a+b +//#define M34Add(a,b) (M34Add(a,b)) +////a^-1 +//#define M34Inverse(a,b) (M34Inverse(a)) +// transpose(a_rotation) +//#define M34Trnsps33(a) (M33Trnsps3X3(a)) + +// Math operations on 16-byte aligned Mat44s (represents any 4x4 matrix) +// namespace _Mat44V +//{ +// //a*b +// PX_FORCE_INLINE Vec4V multiplyV(const Mat44V& a, const Vec4V b); +// //transpose(a)*b +// PX_FORCE_INLINE Vec4V multiplyTransposeV(const Mat44V& a, const Vec4V b); +// //a*b +// PX_FORCE_INLINE Mat44V multiplyV(const Mat44V& a, const Mat44V& b); +// //a+b +// PX_FORCE_INLINE Mat44V addV(const Mat44V& a, const Mat44V& b); +// //a&-1 +// PX_FORCE_INLINE Mat44V getInverseV(const Mat44V& a); +// //transpose(a) +// PX_FORCE_INLINE Mat44V getTransposeV(const Mat44V& a); +//}; //namespace _Mat44V + +// namespace _VecU32V +//{ +// // pack 8 U32s to 8 U16s with saturation +// PX_FORCE_INLINE VecU16V pack2U32VToU16VSaturate(VecU32V a, VecU32V b); +// PX_FORCE_INLINE VecU32V orV(VecU32V a, VecU32V b); +// PX_FORCE_INLINE VecU32V andV(VecU32V a, VecU32V b); +// PX_FORCE_INLINE VecU32V andcV(VecU32V a, VecU32V b); +// // conversion from integer to float +// PX_FORCE_INLINE Vec4V convertToVec4V(VecU32V a); +// // splat a[elementIndex] into all fields of a +// template<int elementIndex> +// PX_FORCE_INLINE VecU32V splatElement(VecU32V a); +// PX_FORCE_INLINE void storeAligned(VecU32V a, VecU32V* address); +//}; + +// namespace _VecI32V +//{ +// template<int a> PX_FORCE_INLINE VecI32V splatI32(); +//}; +// +// namespace _VecU16V +//{ +// PX_FORCE_INLINE VecU16V orV(VecU16V a, VecU16V b); +// PX_FORCE_INLINE VecU16V andV(VecU16V a, VecU16V b); +// PX_FORCE_INLINE VecU16V andcV(VecU16V a, VecU16V b); +// PX_FORCE_INLINE void storeAligned(VecU16V val, VecU16V *address); +// PX_FORCE_INLINE VecU16V loadAligned(VecU16V* addr); +// PX_FORCE_INLINE VecU16V loadUnaligned(VecU16V* addr); +// PX_FORCE_INLINE VecU16V compareGt(VecU16V a, VecU16V b); +// template<int elementIndex> +// PX_FORCE_INLINE VecU16V splatElement(VecU16V a); +// PX_FORCE_INLINE VecU16V subtractModulo(VecU16V a, VecU16V b); +// PX_FORCE_INLINE VecU16V addModulo(VecU16V a, VecU16V b); +// PX_FORCE_INLINE VecU32V getLo16(VecU16V a); // [0,2,4,6] 16-bit values to [0,1,2,3] 32-bit vector +// PX_FORCE_INLINE VecU32V getHi16(VecU16V a); // [1,3,5,7] 16-bit values to [0,1,2,3] 32-bit vector +//}; +// +// namespace _VecI16V +//{ +// template <int val> PX_FORCE_INLINE VecI16V splatImmediate(); +//}; +// +// namespace _VecU8V +//{ +//}; + +// a*b +//#define M44MulV4(a,b) (M44MulV4(a,b)) +////transpose(a)*b +//#define M44TrnspsMulV4(a,b) (M44TrnspsMulV4(a,b)) +////a*b +//#define M44MulM44(a,b) (M44MulM44(a,b)) +////a+b +//#define M44Add(a,b) (M44Add(a,b)) +////a&-1 +//#define M44Inverse(a) (M44Inverse(a)) +////transpose(a) +//#define M44Trnsps(a) (M44Trnsps(a)) + +// dsequeira: these used to be assert'd out in SIMD builds, but they're necessary if +// we want to be able to write some scalar functions which run using SIMD data structures + +PX_FORCE_INLINE void V3WriteX(Vec3V& v, const PxF32 f) +{ + reinterpret_cast<PxVec3&>(v).x = f; +} + +PX_FORCE_INLINE void V3WriteY(Vec3V& v, const PxF32 f) +{ + reinterpret_cast<PxVec3&>(v).y = f; +} + +PX_FORCE_INLINE void V3WriteZ(Vec3V& v, const PxF32 f) +{ + reinterpret_cast<PxVec3&>(v).z = f; +} + +PX_FORCE_INLINE void V3WriteXYZ(Vec3V& v, const PxVec3& f) +{ + reinterpret_cast<PxVec3&>(v) = f; +} + +PX_FORCE_INLINE PxF32 V3ReadX(const Vec3V& v) +{ + return reinterpret_cast<const PxVec3&>(v).x; +} + +PX_FORCE_INLINE PxF32 V3ReadY(const Vec3V& v) +{ + return reinterpret_cast<const PxVec3&>(v).y; +} + +PX_FORCE_INLINE PxF32 V3ReadZ(const Vec3V& v) +{ + return reinterpret_cast<const PxVec3&>(v).z; +} + +PX_FORCE_INLINE const PxVec3& V3ReadXYZ(const Vec3V& v) +{ + return reinterpret_cast<const PxVec3&>(v); +} + +PX_FORCE_INLINE void V4WriteX(Vec4V& v, const PxF32 f) +{ + reinterpret_cast<PxVec4&>(v).x = f; +} + +PX_FORCE_INLINE void V4WriteY(Vec4V& v, const PxF32 f) +{ + reinterpret_cast<PxVec4&>(v).y = f; +} + +PX_FORCE_INLINE void V4WriteZ(Vec4V& v, const PxF32 f) +{ + reinterpret_cast<PxVec4&>(v).z = f; +} + +PX_FORCE_INLINE void V4WriteW(Vec4V& v, const PxF32 f) +{ + reinterpret_cast<PxVec4&>(v).w = f; +} + +PX_FORCE_INLINE void V4WriteXYZ(Vec4V& v, const PxVec3& f) +{ + reinterpret_cast<PxVec3&>(v) = f; +} + +PX_FORCE_INLINE PxF32 V4ReadX(const Vec4V& v) +{ + return reinterpret_cast<const PxVec4&>(v).x; +} + +PX_FORCE_INLINE PxF32 V4ReadY(const Vec4V& v) +{ + return reinterpret_cast<const PxVec4&>(v).y; +} + +PX_FORCE_INLINE PxF32 V4ReadZ(const Vec4V& v) +{ + return reinterpret_cast<const PxVec4&>(v).z; +} + +PX_FORCE_INLINE PxF32 V4ReadW(const Vec4V& v) +{ + return reinterpret_cast<const PxVec4&>(v).w; +} + +PX_FORCE_INLINE const PxVec3& V4ReadXYZ(const Vec4V& v) +{ + return reinterpret_cast<const PxVec3&>(v); +} + +// this macro transposes 4 Vec4V into 3 Vec4V (assuming that the W component can be ignored +#define PX_TRANSPOSE_44_34(inA, inB, inC, inD, outA, outB, outC) \ + \ +outA = V4UnpackXY(inA, inC); \ + \ +inA = V4UnpackZW(inA, inC); \ + \ +inC = V4UnpackXY(inB, inD); \ + \ +inB = V4UnpackZW(inB, inD); \ + \ +outB = V4UnpackZW(outA, inC); \ + \ +outA = V4UnpackXY(outA, inC); \ + \ +outC = V4UnpackXY(inA, inB); + +// this macro transposes 3 Vec4V into 4 Vec4V (with W components as garbage!) +#define PX_TRANSPOSE_34_44(inA, inB, inC, outA, outB, outC, outD) \ + outA = V4UnpackXY(inA, inC); \ + inA = V4UnpackZW(inA, inC); \ + outC = V4UnpackXY(inB, inB); \ + inC = V4UnpackZW(inB, inB); \ + outB = V4UnpackZW(outA, outC); \ + outA = V4UnpackXY(outA, outC); \ + outC = V4UnpackXY(inA, inC); \ + outD = V4UnpackZW(inA, inC); + +#define PX_TRANSPOSE_44(inA, inB, inC, inD, outA, outB, outC, outD) \ + outA = V4UnpackXY(inA, inC); \ + inA = V4UnpackZW(inA, inC); \ + inC = V4UnpackXY(inB, inD); \ + inB = V4UnpackZW(inB, inD); \ + outB = V4UnpackZW(outA, inC); \ + outA = V4UnpackXY(outA, inC); \ + outC = V4UnpackXY(inA, inB); \ + outD = V4UnpackZW(inA, inB); + +// This function returns a Vec4V, where each element is the dot product of one pair of Vec3Vs. On PC, each element in +// the result should be identical to the results if V3Dot was performed +// for each pair of Vec3V. +// However, on other platforms, the result might diverge by some small margin due to differences in FP rounding, e.g. if +// _mm_dp_ps was used or some other approximate dot product or fused madd operations +// were used. +// Where there does not exist a hw-accelerated dot-product operation, this approach should be the fastest way to compute +// the dot product of 4 vectors. +PX_FORCE_INLINE Vec4V V3Dot4(const Vec3VArg a0, const Vec3VArg b0, const Vec3VArg a1, const Vec3VArg b1, + const Vec3VArg a2, const Vec3VArg b2, const Vec3VArg a3, const Vec3VArg b3) +{ + Vec4V a0b0 = Vec4V_From_Vec3V(V3Mul(a0, b0)); + Vec4V a1b1 = Vec4V_From_Vec3V(V3Mul(a1, b1)); + Vec4V a2b2 = Vec4V_From_Vec3V(V3Mul(a2, b2)); + Vec4V a3b3 = Vec4V_From_Vec3V(V3Mul(a3, b3)); + + Vec4V aTrnsps, bTrnsps, cTrnsps; + + PX_TRANSPOSE_44_34(a0b0, a1b1, a2b2, a3b3, aTrnsps, bTrnsps, cTrnsps); + + return V4Add(V4Add(aTrnsps, bTrnsps), cTrnsps); +} + +//(f.x,f.y,f.z,0) - Alternative/faster V3LoadU implementation when it is safe to read "W", i.e. the 32bits after the PxVec3. +PX_FORCE_INLINE Vec3V V3LoadU_SafeReadW(const PxVec3& f) +{ + return Vec3V_From_Vec4V(V4LoadU(&f.x)); +} + +// Now for the cross-platform implementations of the 16-byte aligned maths functions (win32/360/ppu/spu etc). +#if COMPILE_VECTOR_INTRINSICS +#include "PsInlineAoS.h" +#else // #if COMPILE_VECTOR_INTRINSICS +#include "PsVecMathAoSScalarInline.h" +#endif // #if !COMPILE_VECTOR_INTRINSICS +#include "PsVecQuat.h" + +} // namespace aos +} // namespace shdfnd +} // namespace physx + +#endif // PSFOUNDATION_PSVECMATH_H diff --git a/PxShared/src/foundation/include/PsVecMathAoSScalar.h b/PxShared/src/foundation/include/PsVecMathAoSScalar.h new file mode 100644 index 0000000..beb6cdc --- /dev/null +++ b/PxShared/src/foundation/include/PsVecMathAoSScalar.h @@ -0,0 +1,242 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSVECMATHAOSSCALAR_H +#define PSFOUNDATION_PSVECMATHAOSSCALAR_H + +#if COMPILE_VECTOR_INTRINSICS +#error Scalar version should not be included when using vector intrinsics. +#endif + +// Remove this define when all platforms use simd solver. +#define PX_SUPPORT_SIMD + +struct VecI16V; +struct VecU16V; +struct VecI32V; +struct VecU32V; +struct Vec4V; +typedef Vec4V QuatV; + +PX_ALIGN_PREFIX(16) +struct FloatV +{ + PxF32 x; + PxF32 pad[3]; + FloatV() + { + } + FloatV(const PxF32 _x) : x(_x) + { + } +} PX_ALIGN_SUFFIX(16); + +PX_ALIGN_PREFIX(16) +struct Vec4V +{ + PxF32 x, y, z, w; + Vec4V() + { + } + Vec4V(const PxF32 _x, const PxF32 _y, const PxF32 _z, const PxF32 _w) : x(_x), y(_y), z(_z), w(_w) + { + } +} PX_ALIGN_SUFFIX(16); + +PX_ALIGN_PREFIX(16) +struct Vec3V +{ + PxF32 x, y, z; + PxF32 pad; + Vec3V() + { + } + Vec3V(const PxF32 _x, const PxF32 _y, const PxF32 _z) : x(_x), y(_y), z(_z), pad(0.0f) + { + } +} PX_ALIGN_SUFFIX(16); + +PX_ALIGN_PREFIX(16) +struct BoolV +{ + PxU32 ux, uy, uz, uw; + BoolV() + { + } + BoolV(const PxU32 _x, const PxU32 _y, const PxU32 _z, const PxU32 _w) : ux(_x), uy(_y), uz(_z), uw(_w) + { + } +} PX_ALIGN_SUFFIX(16); + +struct Mat33V +{ + Mat33V() + { + } + Mat33V(const Vec3V& c0, const Vec3V& c1, const Vec3V& c2) : col0(c0), col1(c1), col2(c2) + { + } + Vec3V col0; + Vec3V col1; + Vec3V col2; +}; + +struct Mat34V +{ + Mat34V() + { + } + Mat34V(const Vec3V& c0, const Vec3V& c1, const Vec3V& c2, const Vec3V& c3) : col0(c0), col1(c1), col2(c2), col3(c3) + { + } + Vec3V col0; + Vec3V col1; + Vec3V col2; + Vec3V col3; +}; + +struct Mat43V +{ + Mat43V() + { + } + Mat43V(const Vec4V& c0, const Vec4V& c1, const Vec4V& c2) : col0(c0), col1(c1), col2(c2) + { + } + Vec4V col0; + Vec4V col1; + Vec4V col2; +}; + +struct Mat44V +{ + Mat44V() + { + } + Mat44V(const Vec4V& c0, const Vec4V& c1, const Vec4V& c2, const Vec4V& c3) : col0(c0), col1(c1), col2(c2), col3(c3) + { + } + Vec4V col0; + Vec4V col1; + Vec4V col2; + Vec4V col3; +}; + +PX_ALIGN_PREFIX(16) +struct VecU32V +{ + PxU32 u32[4]; + PX_FORCE_INLINE VecU32V() + { + } + PX_FORCE_INLINE VecU32V(PxU32 a, PxU32 b, PxU32 c, PxU32 d) + { + u32[0] = a; + u32[1] = b; + u32[2] = c; + u32[3] = d; + } +} PX_ALIGN_SUFFIX(16); + +PX_ALIGN_PREFIX(16) +struct VecI32V +{ + PxI32 i32[4]; + PX_FORCE_INLINE VecI32V() + { + } + PX_FORCE_INLINE VecI32V(PxI32 a, PxI32 b, PxI32 c, PxI32 d) + { + i32[0] = a; + i32[1] = b; + i32[2] = c; + i32[3] = d; + } +} PX_ALIGN_SUFFIX(16); + +PX_ALIGN_PREFIX(16) +struct VecI16V +{ + PxI16 i16[8]; + PX_FORCE_INLINE VecI16V() + { + } + PX_FORCE_INLINE VecI16V(PxI16 a, PxI16 b, PxI16 c, PxI16 d, PxI16 e, PxI16 f, PxI16 g, PxI16 h) + { + i16[0] = a; + i16[1] = b; + i16[2] = c; + i16[3] = d; + i16[4] = e; + i16[5] = f; + i16[6] = g; + i16[7] = h; + } +} PX_ALIGN_SUFFIX(16); + +PX_ALIGN_PREFIX(16) +struct VecU16V +{ + union + { + PxU16 u16[8]; + PxI16 i16[8]; + }; + PX_FORCE_INLINE VecU16V() + { + } + PX_FORCE_INLINE VecU16V(PxU16 a, PxU16 b, PxU16 c, PxU16 d, PxU16 e, PxU16 f, PxU16 g, PxU16 h) + { + u16[0] = a; + u16[1] = b; + u16[2] = c; + u16[3] = d; + u16[4] = e; + u16[5] = f; + u16[6] = g; + u16[7] = h; + } +} PX_ALIGN_SUFFIX(16); + +#define FloatVArg FloatV & +#define Vec3VArg Vec3V & +#define Vec4VArg Vec4V & +#define BoolVArg BoolV & +#define VecU32VArg VecU32V & +#define VecI32VArg VecI32V & +#define VecU16VArg VecU16V & +#define VecI16VArg VecI16V & +#define QuatVArg QuatV & + +#define VecCrossV Vec3V + +typedef VecI32V VecShiftV; +#define VecShiftVArg VecShiftV & + +#endif // PX_PHYSICS_COMMON_VECMATH_INLINE_SCALAR diff --git a/PxShared/src/foundation/include/PsVecMathAoSScalarInline.h b/PxShared/src/foundation/include/PsVecMathAoSScalarInline.h new file mode 100644 index 0000000..9bef465 --- /dev/null +++ b/PxShared/src/foundation/include/PsVecMathAoSScalarInline.h @@ -0,0 +1,2254 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSVECMATHAOSSCALARINLINE_H +#define PSFOUNDATION_PSVECMATHAOSSCALARINLINE_H + +#if COMPILE_VECTOR_INTRINSICS +#error Scalar version should not be included when using vector intrinsics. +#endif + +#define BOOL_TO_U32(b) (PxU32)(- PxI32(b)) +#define TRUE_TO_U32 (PxU32)(-1) +#define FALSE_TO_U32 (PxU32)(0) + +#define BOOL_TO_U16(b) (PxU16)(- PxI32(b)) + + +#define VECMATHAOS_ASSERT(x) { PX_ASSERT(x); } + +///////////////////////////////////////////////////////////////////// +////INTERNAL USE ONLY AND TESTS +///////////////////////////////////////////////////////////////////// + +namespace internalScalarSimd +{ +PX_FORCE_INLINE PxF32 FStore(const FloatV a) +{ + return a.x; +} + +PX_FORCE_INLINE bool hasZeroElementInFloatV(const FloatV a) +{ + return (0 == a.x); +} + +PX_FORCE_INLINE bool hasZeroElementInVec3V(const Vec3V a) +{ + return (0 == a.x || 0 == a.y || 0 == a.z); +} + +PX_FORCE_INLINE bool hasZeroElementInVec4V(const Vec4V a) +{ + return (0 == a.x || 0 == a.y || 0 == a.z || 0 == a.w); +} +} + +namespace _VecMathTests +{ +// PT: this function returns an invalid Vec3V (W!=0.0f) just for unit-testing 'isValidVec3V' +PX_FORCE_INLINE Vec3V getInvalidVec3V() +{ + Vec3V tmp; + tmp.x = tmp.y = tmp.z = 0.0f; + tmp.pad = 1.0f; + return tmp; +} + +PX_FORCE_INLINE bool allElementsEqualFloatV(const FloatV a, const FloatV b) +{ + return (a.x == b.x); +} + +PX_FORCE_INLINE bool allElementsEqualVec3V(const Vec3V a, const Vec3V b) +{ + return (a.x == b.x && a.y == b.y && a.z == b.z); +} + +PX_FORCE_INLINE bool allElementsEqualVec4V(const Vec4V a, const Vec4V b) +{ + return (a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w); +} + +PX_FORCE_INLINE bool allElementsEqualBoolV(const BoolV a, const BoolV b) +{ + return (a.ux == b.ux && a.uy == b.uy && a.uz == b.uz && a.uw == b.uw); +} + +PX_FORCE_INLINE bool allElementsEqualVecU32V(const VecU32V a, const VecU32V b) +{ + return (a.u32[0] == b.u32[0] && a.u32[1] == b.u32[1] && a.u32[2] == b.u32[2] && a.u32[3] == b.u32[3]); +} + +PX_FORCE_INLINE bool allElementsEqualVecI32V(const VecI32V a, const VecI32V b) +{ + return (a.i32[0] == b.i32[0] && a.i32[1] == b.i32[1] && a.i32[2] == b.i32[2] && a.i32[3] == b.i32[3]); +} + +#define VECMATH_AOS_EPSILON (1e-3f) + +PX_FORCE_INLINE bool allElementsNearEqualFloatV(const FloatV a, const FloatV b) +{ + const PxF32 cx = a.x - b.x; + return (cx > -VECMATH_AOS_EPSILON && cx < VECMATH_AOS_EPSILON); +} + +PX_FORCE_INLINE bool allElementsNearEqualVec3V(const Vec3V a, const Vec3V b) +{ + const PxF32 cx = a.x - b.x; + const PxF32 cy = a.y - b.y; + const PxF32 cz = a.z - b.z; + return (cx > -VECMATH_AOS_EPSILON && cx < VECMATH_AOS_EPSILON && cy > -VECMATH_AOS_EPSILON && + cy < VECMATH_AOS_EPSILON && cz > -VECMATH_AOS_EPSILON && cz < VECMATH_AOS_EPSILON); +} + +PX_FORCE_INLINE bool allElementsNearEqualVec4V(const Vec4V a, const Vec4V b) +{ + const PxF32 cx = a.x - b.x; + const PxF32 cy = a.y - b.y; + const PxF32 cz = a.z - b.z; + const PxF32 cw = a.w - b.w; + return (cx > -VECMATH_AOS_EPSILON && cx < VECMATH_AOS_EPSILON && cy > -VECMATH_AOS_EPSILON && + cy < VECMATH_AOS_EPSILON && cz > -VECMATH_AOS_EPSILON && cz < VECMATH_AOS_EPSILON && + cw > -VECMATH_AOS_EPSILON && cw < VECMATH_AOS_EPSILON); +} +} + +/////////////////////////////////////////////////////// + +PX_FORCE_INLINE bool isValidVec3V(const Vec3V a) +{ + return a.pad == 0.f; +} + +PX_FORCE_INLINE bool isFiniteFloatV(const FloatV a) +{ + return PxIsFinite(a.x); +} + +PX_FORCE_INLINE bool isFiniteVec3V(const Vec3V a) +{ + return PxIsFinite(a.x) && PxIsFinite(a.y) && PxIsFinite(a.z); +} + +PX_FORCE_INLINE bool isFiniteVec4V(const Vec4V a) +{ + return PxIsFinite(a.x) && PxIsFinite(a.y) && PxIsFinite(a.z) && PxIsFinite(a.w); +} + +///////////////////////////////////////////////////////////////////// +////VECTORISED FUNCTION IMPLEMENTATIONS +///////////////////////////////////////////////////////////////////// + +PX_FORCE_INLINE FloatV FLoad(const PxF32 f) +{ + return FloatV(f); +} + +PX_FORCE_INLINE Vec3V V3Load(const PxF32 f) +{ + return Vec3V(f, f, f); +} + +PX_FORCE_INLINE Vec4V V4Load(const PxF32 f) +{ + return Vec4V(f, f, f, f); +} + +PX_FORCE_INLINE BoolV BLoad(const bool f) +{ +#if PX_ARM + // SD: Android ARM builds fail if this is done with a cast. + // Might also fail because of something else but the select + // operator here seems to fix everything that failed in release builds. + return f ? BTTTT() : BFFFF(); +#else + return BoolV(BOOL_TO_U32(f), BOOL_TO_U32(f), BOOL_TO_U32(f), BOOL_TO_U32(f)); +#endif +} + +PX_FORCE_INLINE Vec3V V3LoadA(const PxVec3& f) +{ + return Vec3V(f.x, f.y, f.z); +} + +PX_FORCE_INLINE Vec3V V3LoadU(const PxVec3& f) +{ + return Vec3V(f.x, f.y, f.z); +} + +PX_FORCE_INLINE Vec3V V3LoadUnsafeA(const PxVec3& f) +{ + return Vec3V(f.x, f.y, f.z); +} + +PX_FORCE_INLINE Vec3V V3LoadA(const PxF32* const f) +{ + return Vec3V(f[0], f[1], f[2]); +} + +PX_FORCE_INLINE Vec3V V3LoadU(const PxF32* const f) +{ + return Vec3V(f[0], f[1], f[2]); +} + +PX_FORCE_INLINE Vec3V Vec3V_From_Vec4V(Vec4V f) +{ + return Vec3V(f.x, f.y, f.z); +} + +PX_FORCE_INLINE Vec3V Vec3V_From_Vec4V_WUndefined(const Vec4V v) +{ + return Vec3V(v.x, v.y, v.z); +} + +PX_FORCE_INLINE Vec4V Vec4V_From_Vec3V(Vec3V f) +{ + return Vec4V(f.x, f.y, f.z, 0.0f); +} + +PX_FORCE_INLINE Vec4V Vec4V_From_FloatV(FloatV f) +{ + return Vec4V(f.x, f.x, f.x, f.x); +} + +PX_FORCE_INLINE Vec3V Vec3V_From_FloatV(FloatV f) +{ + return Vec3V(f.x, f.x, f.x); +} + +PX_FORCE_INLINE Vec3V Vec3V_From_FloatV_WUndefined(FloatV f) +{ + return Vec3V(f.x, f.x, f.x); +} + +PX_FORCE_INLINE Vec4V V4LoadA(const PxF32* const f) +{ + return Vec4V(f[0], f[1], f[2], f[3]); +} + +PX_FORCE_INLINE void V4StoreA(const Vec4V a, PxF32* f) +{ + *reinterpret_cast<Vec4V*>(f) = a; +} + +PX_FORCE_INLINE void V4StoreU(const Vec4V a, PxF32* f) +{ + *reinterpret_cast<PxVec4*>(f) = *reinterpret_cast<const PxVec4*>(&a.x); +} + +PX_FORCE_INLINE void BStoreA(const BoolV a, PxU32* f) +{ + *reinterpret_cast<BoolV*>(f) = a; +} + +PX_FORCE_INLINE void U4StoreA(const VecU32V uv, PxU32* u) +{ + *reinterpret_cast<VecU32V*>(u) = uv; +} + +PX_FORCE_INLINE void I4StoreA(const VecI32V iv, PxI32* i) +{ + *reinterpret_cast<VecI32V*>(i) = iv; +} + +PX_FORCE_INLINE Vec4V V4LoadU(const PxF32* const f) +{ + return Vec4V(f[0], f[1], f[2], f[3]); +} + +PX_FORCE_INLINE Vec4V Vec4V_From_PxVec3_WUndefined(const PxVec3& f) +{ + return Vec4V(f[0], f[1], f[2], 0.0f); +} + +PX_FORCE_INLINE BoolV BLoad(const bool* const f) +{ + return BoolV(BOOL_TO_U32(f[0]), BOOL_TO_U32(f[1]), BOOL_TO_U32(f[2]), BOOL_TO_U32(f[3])); +} + +PX_FORCE_INLINE void FStore(const FloatV a, PxF32* PX_RESTRICT f) +{ + *f = a.x; +} + +PX_FORCE_INLINE void V3StoreA(const Vec3V a, PxVec3& f) +{ + f = PxVec3(a.x, a.y, a.z); +} + +PX_FORCE_INLINE void V3StoreU(const Vec3V a, PxVec3& f) +{ + f = PxVec3(a.x, a.y, a.z); +} + +PX_FORCE_INLINE void Store_From_BoolV(const BoolV b, PxU32* b2) +{ + *b2 = b.ux; +} + +////////////////////////// +// FLOATV +////////////////////////// + +PX_FORCE_INLINE FloatV FZero() +{ + return FLoad(0.0f); +} + +PX_FORCE_INLINE FloatV FOne() +{ + return FLoad(1.0f); +} + +PX_FORCE_INLINE FloatV FHalf() +{ + return FLoad(0.5f); +} + +PX_FORCE_INLINE FloatV FEps() +{ + return FLoad(PX_EPS_REAL); +} + +PX_FORCE_INLINE FloatV FEps6() +{ + return FLoad(1e-6f); +} + +PX_FORCE_INLINE FloatV FMax() +{ + return FLoad(PX_MAX_REAL); +} + +PX_FORCE_INLINE FloatV FNegMax() +{ + return FLoad(-PX_MAX_REAL); +} + +PX_FORCE_INLINE FloatV FNeg(const FloatV f) +{ + return FloatV(-f.x); +} + +PX_FORCE_INLINE FloatV FAdd(const FloatV a, const FloatV b) +{ + return FloatV(a.x + b.x); +} + +PX_FORCE_INLINE FloatV FSub(const FloatV a, const FloatV b) +{ + return FloatV(a.x - b.x); +} + +PX_FORCE_INLINE FloatV FMul(const FloatV a, const FloatV b) +{ + return FloatV(a.x * b.x); +} + +PX_FORCE_INLINE FloatV FDiv(const FloatV a, const FloatV b) +{ + VECMATHAOS_ASSERT(b.x != 0.0f); + return FloatV(a.x / b.x); +} + +PX_FORCE_INLINE FloatV FDivFast(const FloatV a, const FloatV b) +{ + VECMATHAOS_ASSERT(b.x != 0.0f); + return FloatV(a.x / b.x); +} + +PX_FORCE_INLINE FloatV FRecip(const FloatV a) +{ + VECMATHAOS_ASSERT(a.x != 0.0f); + return 1.0f / a.x; +} + +PX_FORCE_INLINE FloatV FRecipFast(const FloatV a) +{ + VECMATHAOS_ASSERT(a.x != 0.0f); + return 1.0f / a.x; +} + +PX_FORCE_INLINE FloatV FRsqrt(const FloatV a) +{ + VECMATHAOS_ASSERT(a.x != 0.0f); + return PxRecipSqrt(a.x); +} + +PX_FORCE_INLINE FloatV FSqrt(const FloatV a) +{ + return PxSqrt(a.x); +} + +PX_FORCE_INLINE FloatV FRsqrtFast(const FloatV a) +{ + VECMATHAOS_ASSERT(a.x != 0.0f); + return PxRecipSqrt(a.x); +} + +PX_FORCE_INLINE FloatV FScaleAdd(const FloatV a, const FloatV b, const FloatV c) +{ + return FAdd(FMul(a, b), c); +} + +PX_FORCE_INLINE FloatV FNegScaleSub(const FloatV a, const FloatV b, const FloatV c) +{ + return FSub(c, FMul(a, b)); +} + +PX_FORCE_INLINE FloatV FAbs(const FloatV a) +{ + return FloatV(PxAbs(a.x)); +} + +PX_FORCE_INLINE FloatV FSel(const BoolV c, const FloatV a, const FloatV b) +{ + return FloatV(c.ux ? a.x : b.x); +} + +PX_FORCE_INLINE BoolV FIsGrtr(const FloatV a, const FloatV b) +{ + return BLoad(a.x > b.x); +} + +PX_FORCE_INLINE BoolV FIsGrtrOrEq(const FloatV a, const FloatV b) +{ + return BLoad(a.x >= b.x); +} + +PX_FORCE_INLINE BoolV FIsEq(const FloatV a, const FloatV b) +{ + return BLoad(a.x == b.x); +} + +PX_FORCE_INLINE FloatV FMax(const FloatV a, const FloatV b) +{ + return (a.x > b.x ? FloatV(a.x) : FloatV(b.x)); +} + +PX_FORCE_INLINE FloatV FMin(const FloatV a, const FloatV b) +{ + return (a.x > b.x ? FloatV(b.x) : FloatV(a.x)); +} + +PX_FORCE_INLINE FloatV FClamp(const FloatV a, const FloatV minV, const FloatV maxV) +{ + return FMax(FMin(a, maxV), minV); +} + +PX_FORCE_INLINE PxU32 FAllGrtr(const FloatV a, const FloatV b) +{ + return BOOL_TO_U32(a.x > b.x); +} + +PX_FORCE_INLINE PxU32 FAllGrtrOrEq(const FloatV a, const FloatV b) +{ + return BOOL_TO_U32(a.x >= b.x); +} +PX_FORCE_INLINE PxU32 FAllEq(const FloatV a, const FloatV b) +{ + return BOOL_TO_U32(a.x == b.x); +} + +PX_FORCE_INLINE FloatV FRound(const FloatV a) +{ + return floorf(a.x + 0.5f); +} + +PX_FORCE_INLINE FloatV FSin(const FloatV a) +{ + return sinf(a.x); +} + +PX_FORCE_INLINE FloatV FCos(const FloatV a) +{ + return cosf(a.x); +} + +PX_FORCE_INLINE PxU32 FOutOfBounds(const FloatV a, const FloatV min, const FloatV max) +{ + return BOOL_TO_U32(a.x > max.x || a.x < min.x); +} + +PX_FORCE_INLINE PxU32 FInBounds(const FloatV a, const FloatV min, const FloatV max) +{ + return BOOL_TO_U32(a.x >= min.x && a.x <= max.x); +} + +PX_FORCE_INLINE PxU32 FOutOfBounds(const FloatV a, const FloatV bounds) +{ + return FOutOfBounds(a, FNeg(bounds), bounds); +} + +PX_FORCE_INLINE PxU32 FInBounds(const FloatV a, const FloatV bounds) +{ + return FInBounds(a, FNeg(bounds), bounds); +} + +///////////////////// +// VEC3V +///////////////////// + +PX_FORCE_INLINE Vec3V V3Splat(const FloatV f) +{ + return Vec3V(f.x, f.x, f.x); +} + +PX_FORCE_INLINE Vec3V V3Merge(const FloatVArg x, const FloatVArg y, const FloatVArg z) +{ + return Vec3V(x.x, y.x, z.x); +} + +PX_FORCE_INLINE Vec3V V3UnitX() +{ + return Vec3V(1.0f, 0.0f, 0.0f); +} + +PX_FORCE_INLINE Vec3V V3UnitY() +{ + return Vec3V(0.0f, 1.0f, 0.0f); +} + +PX_FORCE_INLINE Vec3V V3UnitZ() +{ + return Vec3V(0.0f, 0.0f, 1.0f); +} + +PX_FORCE_INLINE FloatV V3GetX(const Vec3V f) +{ + return FloatV(f.x); +} + +PX_FORCE_INLINE FloatV V3GetY(const Vec3V f) +{ + return FloatV(f.y); +} + +PX_FORCE_INLINE FloatV V3GetZ(const Vec3V f) +{ + return FloatV(f.z); +} + +PX_FORCE_INLINE Vec3V V3SetX(const Vec3V v, const FloatV f) +{ + return Vec3V(f.x, v.y, v.z); +} + +PX_FORCE_INLINE Vec3V V3SetY(const Vec3V v, const FloatV f) +{ + return Vec3V(v.x, f.x, v.z); +} + +PX_FORCE_INLINE Vec3V V3SetZ(const Vec3V v, const FloatV f) +{ + return Vec3V(v.x, v.y, f.x); +} + +PX_FORCE_INLINE Vec3V V3ColX(const Vec3V a, const Vec3V b, const Vec3V c) +{ + return Vec3V(a.x, b.x, c.x); +} + +PX_FORCE_INLINE Vec3V V3ColY(const Vec3V a, const Vec3V b, const Vec3V c) +{ + return Vec3V(a.y, b.y, c.y); +} + +PX_FORCE_INLINE Vec3V V3ColZ(const Vec3V a, const Vec3V b, const Vec3V c) +{ + return Vec3V(a.z, b.z, c.z); +} + +PX_FORCE_INLINE Vec3V V3Zero() +{ + return V3Load(0.0f); +} + +PX_FORCE_INLINE Vec3V V3One() +{ + return V3Load(1.0f); +} + +PX_FORCE_INLINE Vec3V V3Eps() +{ + return V3Load(PX_EPS_REAL); +} + +PX_FORCE_INLINE Vec3V V3Neg(const Vec3V c) +{ + return Vec3V(-c.x, -c.y, -c.z); +} + +PX_FORCE_INLINE Vec3V V3Add(const Vec3V a, const Vec3V b) +{ + return Vec3V(a.x + b.x, a.y + b.y, a.z + b.z); +} + +PX_FORCE_INLINE Vec3V V3Sub(const Vec3V a, const Vec3V b) +{ + return Vec3V(a.x - b.x, a.y - b.y, a.z - b.z); +} + +PX_FORCE_INLINE Vec3V V3Scale(const Vec3V a, const FloatV b) +{ + return Vec3V(a.x * b.x, a.y * b.x, a.z * b.x); +} + +PX_FORCE_INLINE Vec3V V3Mul(const Vec3V a, const Vec3V b) +{ + return Vec3V(a.x * b.x, a.y * b.y, a.z * b.z); +} + +PX_FORCE_INLINE Vec3V V3ScaleInv(const Vec3V a, const FloatV b) +{ + const PxF32 bInv = 1.0f / b.x; + return Vec3V(a.x * bInv, a.y * bInv, a.z * bInv); +} + +PX_FORCE_INLINE Vec3V V3Div(const Vec3V a, const Vec3V b) +{ + return Vec3V(a.x / b.x, a.y / b.y, a.z / b.z); +} + +PX_FORCE_INLINE Vec3V V3ScaleInvFast(const Vec3V a, const FloatV b) +{ + const PxF32 bInv = 1.0f / b.x; + return Vec3V(a.x * bInv, a.y * bInv, a.z * bInv); +} + +PX_FORCE_INLINE Vec3V V3DivFast(const Vec3V a, const Vec3V b) +{ + return Vec3V(a.x / b.x, a.y / b.y, a.z / b.z); +} + +PX_FORCE_INLINE Vec3V V3Recip(const Vec3V a) +{ + return Vec3V(1.0f / a.x, 1.0f / a.y, 1.0f / a.z); +} + +PX_FORCE_INLINE Vec3V V3RecipFast(const Vec3V a) +{ + return Vec3V(1.0f / a.x, 1.0f / a.y, 1.0f / a.z); +} + +PX_FORCE_INLINE Vec3V V3Rsqrt(const Vec3V a) +{ + return Vec3V(PxRecipSqrt(a.x), PxRecipSqrt(a.y), PxRecipSqrt(a.z)); +} + +PX_FORCE_INLINE Vec3V V3RsqrtFast(const Vec3V a) +{ + return Vec3V(PxRecipSqrt(a.x), PxRecipSqrt(a.y), PxRecipSqrt(a.z)); +} + +PX_FORCE_INLINE Vec3V V3ScaleAdd(const Vec3V a, const FloatV b, const Vec3V c) +{ + return V3Add(V3Scale(a, b), c); +} + +PX_FORCE_INLINE Vec3V V3NegScaleSub(const Vec3V a, const FloatV b, const Vec3V c) +{ + return V3Sub(c, V3Scale(a, b)); +} + +PX_FORCE_INLINE Vec3V V3MulAdd(const Vec3V a, const Vec3V b, const Vec3V c) +{ + return V3Add(V3Mul(a, b), c); +} + +PX_FORCE_INLINE Vec3V V3NegMulSub(const Vec3V a, const Vec3V b, const Vec3V c) +{ + return V3Sub(c, V3Mul(a, b)); +} + +PX_FORCE_INLINE FloatV V3Dot(const Vec3V a, const Vec3V b) +{ + return FloatV(a.x * b.x + a.y * b.y + a.z * b.z); +} + +PX_FORCE_INLINE VecCrossV V3PrepareCross(const Vec3VArg normal) +{ + return normal; +} + +PX_FORCE_INLINE Vec3V V3Cross(const Vec3V a, const Vec3V b) +{ + return Vec3V(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x); +} + +PX_FORCE_INLINE FloatV V3Length(const Vec3V a) +{ + return FloatV(PxSqrt(a.x * a.x + a.y * a.y + a.z * a.z)); +} + +PX_FORCE_INLINE FloatV V3LengthSq(const Vec3V a) +{ + return FloatV(a.x * a.x + a.y * a.y + a.z * a.z); +} + +PX_FORCE_INLINE Vec3V V3Normalize(const Vec3V a) +{ + VECMATHAOS_ASSERT(a.x != 0 || a.y != 0 || a.z != 0); + const PxF32 lengthInv = 1.0f / PxSqrt(a.x * a.x + a.y * a.y + a.z * a.z); + return Vec3V(a.x * lengthInv, a.y * lengthInv, a.z * lengthInv); +} + +PX_FORCE_INLINE Vec3V V3NormalizeSafe(const Vec3V a, const Vec3V unsafeReturnValue) +{ + const PxF32 length = PxSqrt(a.x * a.x + a.y * a.y + a.z * a.z); + if(PX_EPS_REAL >= length) + { + return unsafeReturnValue; + } + else + { + const PxF32 lengthInv = 1.0f / length; + return Vec3V(a.x * lengthInv, a.y * lengthInv, a.z * lengthInv); + } +} + +PX_FORCE_INLINE Vec3V V3NormalizeFast(const Vec3V a) +{ + VECMATHAOS_ASSERT(a.x != 0 || a.y != 0 || a.z != 0); + const PxF32 lengthInv = 1.0f / PxSqrt(a.x * a.x + a.y * a.y + a.z * a.z); + return Vec3V(a.x * lengthInv, a.y * lengthInv, a.z * lengthInv); +} + +PX_FORCE_INLINE Vec3V V3Sel(const BoolV c, const Vec3V a, const Vec3V b) +{ + return Vec3V(c.ux ? a.x : b.x, c.uy ? a.y : b.y, c.uz ? a.z : b.z); +} + +PX_FORCE_INLINE BoolV V3IsGrtr(const Vec3V a, const Vec3V b) +{ + return BoolV(BOOL_TO_U32(a.x > b.x), BOOL_TO_U32(a.y > b.y), BOOL_TO_U32(a.z > b.z), FALSE_TO_U32); +} + +PX_FORCE_INLINE BoolV V3IsGrtrOrEq(const Vec3V a, const Vec3V b) +{ + return BoolV(BOOL_TO_U32(a.x >= b.x), BOOL_TO_U32(a.y >= b.y), BOOL_TO_U32(a.z >= b.z), TRUE_TO_U32); +} + +PX_FORCE_INLINE BoolV V3IsEq(const Vec3V a, const Vec3V b) +{ + return BoolV(BOOL_TO_U32(a.x == b.x), BOOL_TO_U32(a.y == b.y), BOOL_TO_U32(a.z == b.z), TRUE_TO_U32); +} + +PX_FORCE_INLINE Vec3V V3Max(const Vec3V a, const Vec3V b) +{ + return Vec3V(a.x > b.x ? a.x : b.x, a.y > b.y ? a.y : b.y, a.z > b.z ? a.z : b.z); +} + +PX_FORCE_INLINE Vec3V V3Min(const Vec3V a, const Vec3V b) +{ + return Vec3V(a.x < b.x ? a.x : b.x, a.y < b.y ? a.y : b.y, a.z < b.z ? a.z : b.z); +} + +PX_FORCE_INLINE FloatV V3ExtractMax(const Vec3V a) +{ + const PxF32 t0 = (a.x >= a.y) ? a.x : a.y; + return t0 >= a.z ? t0 : a.z; +} + +PX_FORCE_INLINE FloatV V3ExtractMin(const Vec3V a) +{ + const PxF32 t0 = (a.x <= a.y) ? a.x : a.y; + return t0 <= a.z ? t0 : a.z; +} + +// return (a >= 0.0f) ? 1.0f : -1.0f; +PX_FORCE_INLINE Vec3V V3Sign(const Vec3V a) +{ + return Vec3V((a.x >= 0.f ? 1.f : -1.f), (a.y >= 0.f ? 1.f : -1.f), (a.z >= 0.f ? 1.f : -1.f)); +} + +PX_FORCE_INLINE Vec3V V3Clamp(const Vec3V a, const Vec3V minV, const Vec3V maxV) +{ + return V3Max(V3Min(a, maxV), minV); +} + +PX_FORCE_INLINE Vec3V V3Abs(const Vec3V a) +{ + return V3Max(a, V3Neg(a)); +} + +PX_FORCE_INLINE PxU32 V3AllGrtr(const Vec3V a, const Vec3V b) +{ + return BOOL_TO_U32((a.x > b.x) & (a.y > b.y) & (a.z > b.z)); +} + +PX_FORCE_INLINE PxU32 V3AllGrtrOrEq(const Vec3V a, const Vec3V b) +{ + return BOOL_TO_U32((a.x >= b.x) & (a.y >= b.y) & (a.z >= b.z)); +} + +PX_FORCE_INLINE PxU32 V3AllEq(const Vec3V a, const Vec3V b) +{ + return BOOL_TO_U32((a.x == b.x) & (a.y == b.y) & (a.z == b.z)); +} + +PX_FORCE_INLINE Vec3V V3Round(const Vec3V a) +{ + return Vec3V(floorf(a.x + 0.5f), floorf(a.y + 0.5f), floorf(a.z + 0.5f)); +} + +PX_FORCE_INLINE Vec3V V3Sin(const Vec3V a) +{ + return Vec3V(sinf(a.x), sinf(a.y), sinf(a.z)); +} + +PX_FORCE_INLINE Vec3V V3Cos(const Vec3V a) +{ + return Vec3V(cosf(a.x), cosf(a.y), cosf(a.z)); +} + +PX_FORCE_INLINE Vec3V V3PermYZZ(const Vec3V a) +{ + return Vec3V(a.y, a.z, a.z); +} + +PX_FORCE_INLINE Vec3V V3PermXYX(const Vec3V a) +{ + return Vec3V(a.x, a.y, a.x); +} + +PX_FORCE_INLINE Vec3V V3PermYZX(const Vec3V a) +{ + return Vec3V(a.y, a.z, a.x); +} + +PX_FORCE_INLINE Vec3V V3PermZXY(const Vec3V a) +{ + return Vec3V(a.z, a.x, a.y); +} + +PX_FORCE_INLINE Vec3V V3PermZZY(const Vec3V a) +{ + return Vec3V(a.z, a.z, a.y); +} + +PX_FORCE_INLINE Vec3V V3PermYXX(const Vec3V a) +{ + return Vec3V(a.y, a.x, a.x); +} + +PX_FORCE_INLINE Vec3V V3Perm_Zero_1Z_0Y(const Vec3V v0, const Vec3V v1) +{ + return Vec3V(0.0f, v1.z, v0.y); +} + +PX_FORCE_INLINE Vec3V V3Perm_0Z_Zero_1X(const Vec3V v0, const Vec3V v1) +{ + return Vec3V(v0.z, 0.0f, v1.x); +} + +PX_FORCE_INLINE Vec3V V3Perm_1Y_0X_Zero(const Vec3V v0, const Vec3V v1) +{ + return Vec3V(v1.y, v0.x, 0.0f); +} + +PX_FORCE_INLINE FloatV V3SumElems(const Vec3V a) +{ + return FloatV(a.x + a.y + a.z); +} + +PX_FORCE_INLINE PxU32 V3OutOfBounds(const Vec3V a, const Vec3V min, const Vec3V max) +{ + return BOOL_TO_U32(a.x > max.x || a.y > max.y || a.z > max.z || a.x < min.x || a.y < min.y || a.z < min.z); +} + +PX_FORCE_INLINE PxU32 V3InBounds(const Vec3V a, const Vec3V min, const Vec3V max) +{ + return BOOL_TO_U32(a.x <= max.x && a.y <= max.y && a.z <= max.z && a.x >= min.x && a.y >= min.y && a.z >= min.z); +} + +PX_FORCE_INLINE PxU32 V3OutOfBounds(const Vec3V a, const Vec3V bounds) +{ + return V3OutOfBounds(a, V3Neg(bounds), bounds); +} + +PX_FORCE_INLINE PxU32 V3InBounds(const Vec3V a, const Vec3V bounds) +{ + return V3InBounds(a, V3Neg(bounds), bounds); +} + +PX_FORCE_INLINE void V3Transpose(Vec3V& col0, Vec3V& col1, Vec3V& col2) +{ + const PxF32 t01 = col0.y, t02 = col0.z, t12 = col1.z; + col0.y = col1.x; + col0.z = col2.x; + col1.z = col2.y; + col1.x = t01; + col2.x = t02; + col2.y = t12; +} + +///////////////////////// +// VEC4V +///////////////////////// + +PX_FORCE_INLINE Vec4V V4Splat(const FloatV f) +{ + return Vec4V(f.x, f.x, f.x, f.x); +} + +PX_FORCE_INLINE Vec4V V4Merge(const FloatV* const floatVArray) +{ + return Vec4V(floatVArray[0].x, floatVArray[1].x, floatVArray[2].x, floatVArray[3].x); +} + +PX_FORCE_INLINE Vec4V V4Merge(const FloatVArg x, const FloatVArg y, const FloatVArg z, const FloatVArg w) +{ + return Vec4V(x.x, y.x, z.x, w.x); +} + +PX_FORCE_INLINE Vec4V V4MergeW(const Vec4VArg x, const Vec4VArg y, const Vec4VArg z, const Vec4VArg w) +{ + return Vec4V(x.w, y.w, z.w, w.w); +} + +PX_FORCE_INLINE Vec4V V4MergeZ(const Vec4VArg x, const Vec4VArg y, const Vec4VArg z, const Vec4VArg w) +{ + return Vec4V(x.z, y.z, z.z, w.z); +} + +PX_FORCE_INLINE Vec4V V4MergeY(const Vec4VArg x, const Vec4VArg y, const Vec4VArg z, const Vec4VArg w) +{ + return Vec4V(x.y, y.y, z.y, w.y); +} + +PX_FORCE_INLINE Vec4V V4MergeX(const Vec4VArg x, const Vec4VArg y, const Vec4VArg z, const Vec4VArg w) +{ + return Vec4V(x.x, y.x, z.x, w.x); +} + +PX_FORCE_INLINE Vec4V V4UnpackXY(const Vec4VArg a, const Vec4VArg b) +{ + return Vec4V(a.x, b.x, a.y, b.y); +} + +PX_FORCE_INLINE Vec4V V4UnpackZW(const Vec4VArg a, const Vec4VArg b) +{ + return Vec4V(a.z, b.z, a.w, b.w); +} + +PX_FORCE_INLINE Vec4V V4UnitX() +{ + return Vec4V(1.0f, 0.0f, 0.0f, 0.0f); +} + +PX_FORCE_INLINE Vec4V V4UnitY() +{ + return Vec4V(0.0f, 1.0f, 0.0f, 0.0f); +} + +PX_FORCE_INLINE Vec4V V4UnitZ() +{ + return Vec4V(0.0f, 0.0f, 1.0f, 0.0f); +} + +PX_FORCE_INLINE Vec4V V4UnitW() +{ + return Vec4V(0.0f, 0.0f, 0.0f, 1.0f); +} + +PX_FORCE_INLINE FloatV V4GetX(const Vec4V f) +{ + return FloatV(f.x); +} + +PX_FORCE_INLINE FloatV V4GetY(const Vec4V f) +{ + return FloatV(f.y); +} + +PX_FORCE_INLINE FloatV V4GetZ(const Vec4V f) +{ + return FloatV(f.z); +} + +PX_FORCE_INLINE FloatV V4GetW(const Vec4V f) +{ + return FloatV(f.w); +} + +PX_FORCE_INLINE Vec4V V4SetX(const Vec4V v, const FloatV f) +{ + return Vec4V(f.x, v.y, v.z, v.w); +} + +PX_FORCE_INLINE Vec4V V4SetY(const Vec4V v, const FloatV f) +{ + return Vec4V(v.x, f.x, v.z, v.w); +} + +PX_FORCE_INLINE Vec4V V4SetZ(const Vec4V v, const FloatV f) +{ + return Vec4V(v.x, v.y, f.x, v.w); +} + +PX_FORCE_INLINE Vec4V V4SetW(const Vec4V v, const FloatV f) +{ + return Vec4V(v.x, v.y, v.z, f.x); +} + +PX_FORCE_INLINE Vec4V V4SetW(const Vec3V v, const FloatV f) +{ + return Vec4V(v.x, v.y, v.z, f.x); +} + +PX_FORCE_INLINE Vec4V V4ClearW(const Vec4V v) +{ + return Vec4V(v.x, v.y, v.z, 0.0f); +} + +PX_FORCE_INLINE Vec4V V4PermYXWZ(const Vec4V v) +{ + return Vec4V(v.y, v.x, v.w, v.z); +} + +PX_FORCE_INLINE Vec4V V4PermXZXZ(const Vec4V v) +{ + return Vec4V(v.x, v.z, v.x, v.z); +} + +PX_FORCE_INLINE Vec4V V4PermYWYW(const Vec4V v) +{ + return Vec4V(v.y, v.w, v.y, v.w); +} + +PX_FORCE_INLINE Vec4V V4PermYZXW(const Vec4V v) +{ + return Vec4V(v.y, v.z, v.x, v.w); +} + +template <PxU8 _x, PxU8 _y, PxU8 _z, PxU8 _w> +PX_FORCE_INLINE Vec4V V4Perm(const Vec4V v) +{ + const PxF32 f[4] = { v.x, v.y, v.z, v.w }; + return Vec4V(f[_x], f[_y], f[_z], f[_w]); +} + +PX_FORCE_INLINE Vec4V V4Zero() +{ + return V4Load(0.0f); +} + +PX_FORCE_INLINE Vec4V V4One() +{ + return V4Load(1.0f); +} + +PX_FORCE_INLINE Vec4V V4Eps() +{ + return V4Load(PX_EPS_REAL); +} + +PX_FORCE_INLINE Vec4V V4Neg(const Vec4V c) +{ + return Vec4V(-c.x, -c.y, -c.z, -c.w); +} + +PX_FORCE_INLINE Vec4V V4Add(const Vec4V a, const Vec4V b) +{ + return Vec4V(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); +} + +PX_FORCE_INLINE Vec4V V4Sub(const Vec4V a, const Vec4V b) +{ + return Vec4V(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); +} + +PX_FORCE_INLINE Vec4V V4Scale(const Vec4V a, const FloatV b) +{ + return Vec4V(a.x * b.x, a.y * b.x, a.z * b.x, a.w * b.x); +} + +PX_FORCE_INLINE Vec4V V4Mul(const Vec4V a, const Vec4V b) +{ + return Vec4V(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); +} + +PX_FORCE_INLINE Vec4V V4ScaleInv(const Vec4V a, const FloatV b) +{ + const PxF32 bInv = 1.0f / b.x; + return Vec4V(a.x * bInv, a.y * bInv, a.z * bInv, a.w * bInv); +} + +PX_FORCE_INLINE Vec4V V4Div(const Vec4V a, const Vec4V b) +{ + VECMATHAOS_ASSERT(b.x != 0 && b.y != 0 && b.z != 0 && b.w != 0); + return Vec4V(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); +} + +PX_FORCE_INLINE Vec4V V4ScaleInvFast(const Vec4V a, const FloatV b) +{ + const PxF32 bInv = 1.0f / b.x; + return Vec4V(a.x * bInv, a.y * bInv, a.z * bInv, a.w * bInv); +} + +PX_FORCE_INLINE Vec4V V4DivFast(const Vec4V a, const Vec4V b) +{ + return Vec4V(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); +} + +PX_FORCE_INLINE Vec4V V4Recip(const Vec4V a) +{ + return Vec4V(1.0f / a.x, 1.0f / a.y, 1.0f / a.z, 1.0f / a.w); +} + +PX_FORCE_INLINE Vec4V V4RecipFast(const Vec4V a) +{ + return Vec4V(1.0f / a.x, 1.0f / a.y, 1.0f / a.z, 1.0f / a.w); +} + +PX_FORCE_INLINE Vec4V V4Rsqrt(const Vec4V a) +{ + return Vec4V(PxRecipSqrt(a.x), PxRecipSqrt(a.y), PxRecipSqrt(a.z), PxRecipSqrt(a.w)); +} + +PX_FORCE_INLINE Vec4V V4RsqrtFast(const Vec4V a) +{ + return Vec4V(PxRecipSqrt(a.x), PxRecipSqrt(a.y), PxRecipSqrt(a.z), PxRecipSqrt(a.w)); +} + +PX_FORCE_INLINE Vec4V V4Sqrt(const Vec4V a) +{ + return Vec4V(PxSqrt(a.x), PxSqrt(a.y), PxSqrt(a.z), PxSqrt(a.w)); +} + +PX_FORCE_INLINE Vec4V V4ScaleAdd(const Vec4V a, const FloatV b, const Vec4V c) +{ + return V4Add(V4Scale(a, b), c); +} + +PX_FORCE_INLINE Vec4V V4NegScaleSub(const Vec4V a, const FloatV b, const Vec4V c) +{ + return V4Sub(c, V4Scale(a, b)); +} + +PX_FORCE_INLINE Vec4V V4MulAdd(const Vec4V a, const Vec4V b, const Vec4V c) +{ + return V4Add(V4Mul(a, b), c); +} + +PX_FORCE_INLINE Vec4V V4NegMulSub(const Vec4V a, const Vec4V b, const Vec4V c) +{ + return V4Sub(c, V4Mul(a, b)); +} + +PX_FORCE_INLINE FloatV V4SumElements(const Vec4V a) +{ + return FloatV(a.x + a.y + a.z + a.w); +} + +PX_FORCE_INLINE FloatV V4Dot(const Vec4V a, const Vec4V b) +{ + return FloatV(a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w); +} + +PX_FORCE_INLINE FloatV V4Dot3(const Vec4V a, const Vec4V b) +{ + return FloatV(a.x * b.x + a.y * b.y + a.z * b.z); +} + +PX_FORCE_INLINE Vec4V V4Cross(const Vec4V a, const Vec4V b) +{ + return Vec4V(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x, 0.0f); +} + +PX_FORCE_INLINE FloatV V4Length(const Vec4V a) +{ + return FloatV(PxSqrt(a.x * a.x + a.y * a.y + a.z * a.z + a.w * a.w)); +} + +PX_FORCE_INLINE FloatV V4LengthSq(const Vec4V a) +{ + return V4Dot(a, a); +} + +PX_FORCE_INLINE Vec4V V4Normalize(const Vec4V a) +{ + VECMATHAOS_ASSERT(0 != a.x || 0 != a.y || 0 != a.z || 0 != a.w); + const FloatV length = FloatV(V4Length(a)); + return V4ScaleInv(a, length); +} + +PX_FORCE_INLINE Vec4V V4NormalizeSafe(const Vec4V a, const Vec4V unsafeReturnValue) +{ + const FloatV length = FloatV(V4Length(a)); + if(PX_EPS_REAL >= length.x) + { + return unsafeReturnValue; + } + else + { + return V4ScaleInv(a, length); + } +} +PX_FORCE_INLINE Vec4V V4NormalizeFast(const Vec4V a) +{ + VECMATHAOS_ASSERT(0 != a.x || 0 != a.y || 0 != a.z || 0 != a.w); + const FloatV length = FloatV(V4Length(a)); + return V4ScaleInv(a, length); +} + +PX_FORCE_INLINE Vec4V V4Sel(const BoolV c, const Vec4V a, const Vec4V b) +{ + return Vec4V(c.ux ? a.x : b.x, c.uy ? a.y : b.y, c.uz ? a.z : b.z, c.uw ? a.w : b.w); +} + +PX_FORCE_INLINE BoolV V4IsGrtr(const Vec4V a, const Vec4V b) +{ + return BoolV(BOOL_TO_U32(a.x > b.x), BOOL_TO_U32(a.y > b.y), BOOL_TO_U32(a.z > b.z), BOOL_TO_U32(a.w > b.w)); +} + +PX_FORCE_INLINE BoolV V4IsGrtrOrEq(const Vec4V a, const Vec4V b) +{ + return BoolV(BOOL_TO_U32(a.x >= b.x), BOOL_TO_U32(a.y >= b.y), BOOL_TO_U32(a.z >= b.z), BOOL_TO_U32(a.w >= b.w)); +} + +PX_FORCE_INLINE BoolV V4IsEq(const Vec4V a, const Vec4V b) +{ + return BoolV(BOOL_TO_U32(a.x == b.x), BOOL_TO_U32(a.y == b.y), BOOL_TO_U32(a.z == b.z), BOOL_TO_U32(a.w == b.w)); +} + +PX_FORCE_INLINE Vec4V V4Max(const Vec4V a, const Vec4V b) +{ + return Vec4V(a.x > b.x ? a.x : b.x, a.y > b.y ? a.y : b.y, a.z > b.z ? a.z : b.z, a.w > b.w ? a.w : b.w); +} + +PX_FORCE_INLINE Vec4V V4Min(const Vec4V a, const Vec4V b) +{ + return Vec4V(a.x < b.x ? a.x : b.x, a.y < b.y ? a.y : b.y, a.z < b.z ? a.z : b.z, a.w < b.w ? a.w : b.w); +} + +PX_FORCE_INLINE FloatV V4ExtractMax(const Vec4V a) +{ + const PxF32 t0 = (a.x >= a.y) ? a.x : a.y; + const PxF32 t1 = (a.z >= a.w) ? a.x : a.w; + return t0 >= t1 ? t0 : t1; +} + +PX_FORCE_INLINE FloatV V4ExtractMin(const Vec4V a) +{ + const PxF32 t0 = (a.x <= a.y) ? a.x : a.y; + const PxF32 t1 = (a.z <= a.w) ? a.x : a.w; + return t0 <= t1 ? t0 : t1; +} + +PX_FORCE_INLINE Vec4V V4Clamp(const Vec4V a, const Vec4V minV, const Vec4V maxV) +{ + return V4Max(V4Min(a, maxV), minV); +} + +PX_FORCE_INLINE Vec4V V4Round(const Vec4V a) +{ + return Vec4V(floorf(a.x + 0.5f), floorf(a.y + 0.5f), floorf(a.z + 0.5f), floorf(a.w + 0.5f)); +} + +PX_FORCE_INLINE Vec4V V4Sin(const Vec4V a) +{ + return Vec4V(sinf(a.x), sinf(a.y), sinf(a.z), sinf(a.w)); +} + +PX_FORCE_INLINE Vec4V V4Cos(const Vec4V a) +{ + return Vec4V(cosf(a.x), cosf(a.y), cosf(a.z), cosf(a.w)); +} + +PX_FORCE_INLINE PxU32 V4AllGrtr(const Vec4V a, const Vec4V b) +{ + return BOOL_TO_U32((a.x > b.x) & (a.y > b.y) & (a.z > b.z) & (a.w > b.w)); +} + +PX_FORCE_INLINE PxU32 V4AllGrtrOrEq(const Vec4V a, const Vec4V b) +{ + return BOOL_TO_U32((a.x >= b.x) & (a.y >= b.y) & (a.z >= b.z) & (a.w >= b.w)); +} + +PX_FORCE_INLINE PxU32 V4AllGrtrOrEq3(const Vec4V a, const Vec4V b) +{ + return BOOL_TO_U32((a.x >= b.x) & (a.y >= b.y) & (a.z >= b.z)); +} + +PX_FORCE_INLINE PxU32 V4AllEq(const Vec4V a, const Vec4V b) +{ + return BOOL_TO_U32((a.x == b.x) & (a.y == b.y) & (a.z == b.z) & (a.w == b.w)); +} + +PX_FORCE_INLINE PxU32 V4AnyGrtr3(const Vec4V a, const Vec4V b) +{ + return BOOL_TO_U32((a.x > b.x) | (a.y > b.y) | (a.z > b.z)); +} + +PX_FORCE_INLINE void V4Transpose(Vec4V& col0, Vec4V& col1, Vec4V& col2, Vec4V& col3) +{ + const PxF32 t01 = col0.y, t02 = col0.z, t03 = col0.w; + const PxF32 t12 = col1.z, t13 = col1.w; + const PxF32 t23 = col2.w; + col0.y = col1.x; + col0.z = col2.x; + col0.w = col3.x; + col1.z = col2.y; + col1.w = col3.y; + col2.w = col3.z; + col1.x = t01; + col2.x = t02; + col3.x = t03; + col2.y = t12; + col3.y = t13; + col3.z = t23; +} + +PX_FORCE_INLINE BoolV BFFFF() +{ + return BoolV(FALSE_TO_U32, FALSE_TO_U32, FALSE_TO_U32, FALSE_TO_U32); +} +PX_FORCE_INLINE BoolV BFFFT() +{ + return BoolV(FALSE_TO_U32, FALSE_TO_U32, FALSE_TO_U32, TRUE_TO_U32); +} +PX_FORCE_INLINE BoolV BFFTF() +{ + return BoolV(FALSE_TO_U32, FALSE_TO_U32, TRUE_TO_U32, FALSE_TO_U32); +} +PX_FORCE_INLINE BoolV BFFTT() +{ + return BoolV(FALSE_TO_U32, FALSE_TO_U32, TRUE_TO_U32, TRUE_TO_U32); +} +PX_FORCE_INLINE BoolV BFTFF() +{ + return BoolV(FALSE_TO_U32, TRUE_TO_U32, FALSE_TO_U32, FALSE_TO_U32); +} +PX_FORCE_INLINE BoolV BFTFT() +{ + return BoolV(FALSE_TO_U32, TRUE_TO_U32, FALSE_TO_U32, TRUE_TO_U32); +} +PX_FORCE_INLINE BoolV BFTTF() +{ + return BoolV(FALSE_TO_U32, TRUE_TO_U32, TRUE_TO_U32, FALSE_TO_U32); +} +PX_FORCE_INLINE BoolV BFTTT() +{ + return BoolV(FALSE_TO_U32, TRUE_TO_U32, TRUE_TO_U32, TRUE_TO_U32); +} +PX_FORCE_INLINE BoolV BTFFF() +{ + return BoolV(TRUE_TO_U32, FALSE_TO_U32, FALSE_TO_U32, FALSE_TO_U32); +} +PX_FORCE_INLINE BoolV BTFFT() +{ + return BoolV(TRUE_TO_U32, FALSE_TO_U32, FALSE_TO_U32, TRUE_TO_U32); +} +PX_FORCE_INLINE BoolV BTFTF() +{ + return BoolV(TRUE_TO_U32, FALSE_TO_U32, TRUE_TO_U32, FALSE_TO_U32); +} +PX_FORCE_INLINE BoolV BTFTT() +{ + return BoolV(TRUE_TO_U32, FALSE_TO_U32, TRUE_TO_U32, TRUE_TO_U32); +} +PX_FORCE_INLINE BoolV BTTFF() +{ + return BoolV(TRUE_TO_U32, TRUE_TO_U32, FALSE_TO_U32, FALSE_TO_U32); +} +PX_FORCE_INLINE BoolV BTTFT() +{ + return BoolV(TRUE_TO_U32, TRUE_TO_U32, FALSE_TO_U32, TRUE_TO_U32); +} +PX_FORCE_INLINE BoolV BTTTF() +{ + return BoolV(TRUE_TO_U32, TRUE_TO_U32, TRUE_TO_U32, FALSE_TO_U32); +} +PX_FORCE_INLINE BoolV BTTTT() +{ + return BoolV(TRUE_TO_U32, TRUE_TO_U32, TRUE_TO_U32, TRUE_TO_U32); +} + +PX_FORCE_INLINE BoolV BXMask() +{ + return BTFFF(); +} +PX_FORCE_INLINE BoolV BYMask() +{ + return BFTFF(); +} +PX_FORCE_INLINE BoolV BZMask() +{ + return BFFTF(); +} +PX_FORCE_INLINE BoolV BWMask() +{ + return BFFFT(); +} + +PX_FORCE_INLINE BoolV BGetX(const BoolV a) +{ + return BoolV(a.ux, a.ux, a.ux, a.ux); +} + +PX_FORCE_INLINE BoolV BGetY(const BoolV a) +{ + return BoolV(a.uy, a.uy, a.uy, a.uy); +} + +PX_FORCE_INLINE BoolV BGetZ(const BoolV a) +{ + return BoolV(a.uz, a.uz, a.uz, a.uz); +} + +PX_FORCE_INLINE BoolV BGetW(const BoolV a) +{ + return BoolV(a.uw, a.uw, a.uw, a.uw); +} + +PX_FORCE_INLINE BoolV BSetX(const BoolV v, const BoolV f) +{ + return BoolV(f.ux, v.uy, v.uz, v.uw); +} + +PX_FORCE_INLINE BoolV BSetY(const BoolV v, const BoolV f) +{ + return BoolV(v.ux, f.uy, v.uz, v.uw); +} + +PX_FORCE_INLINE BoolV BSetZ(const BoolV v, const BoolV f) +{ + return BoolV(v.ux, v.uy, f.uz, v.uw); +} + +PX_FORCE_INLINE BoolV BSetW(const BoolV v, const BoolV f) +{ + return BoolV(v.ux, v.uy, v.uz, f.uw); +} + +template <int index> +BoolV BSplatElement(BoolV a) +{ + PxU32* b = (PxU32*)&a; + return BoolV(b[index], b[index], b[index], b[index]); +} + +PX_FORCE_INLINE BoolV BAnd(const BoolV a, const BoolV b) +{ + return BoolV(BOOL_TO_U32(a.ux && b.ux), BOOL_TO_U32(a.uy && b.uy), BOOL_TO_U32(a.uz && b.uz), BOOL_TO_U32(a.uw && b.uw)); +} + +PX_FORCE_INLINE BoolV BAndNot(const BoolV a, const BoolV b) +{ + return BoolV(a.ux & ~b.ux, a.uy & ~b.uy, a.uz & ~b.uz, a.uw & ~b.uw); +} + +PX_FORCE_INLINE BoolV BNot(const BoolV a) +{ + return BoolV(~a.ux, ~a.uy, ~a.uz, ~a.uw); +} + +PX_FORCE_INLINE BoolV BOr(const BoolV a, const BoolV b) +{ + return BoolV(BOOL_TO_U32(a.ux || b.ux), BOOL_TO_U32(a.uy || b.uy), BOOL_TO_U32(a.uz || b.uz), BOOL_TO_U32(a.uw || b.uw)); +} + +PX_FORCE_INLINE PxU32 BAllEq(const BoolV a, const BoolV b) +{ + return (a.ux == b.ux && a.uy == b.uy && a.uz == b.uz && a.uw == b.uw ? TRUE_TO_U32 : FALSE_TO_U32); +} + +PX_FORCE_INLINE PxU32 BAllEqTTTT(const BoolV a) +{ + return BAllEq(a, BTTTT()); +} + +PX_FORCE_INLINE PxU32 BAllEqFFFF(const BoolV a) +{ + return BAllEq(a, BFFFF()); +} + +PX_FORCE_INLINE BoolV BAllTrue4(const BoolV a) +{ + return (a.ux & a.uy & a.uz & a.uw) ? BTTTT() : BFFFF(); +} + +PX_FORCE_INLINE BoolV BAnyTrue4(const BoolV a) +{ + return (a.ux | a.uy | a.uz | a.uw) ? BTTTT() : BFFFF(); +} + +PX_FORCE_INLINE BoolV BAllTrue3(const BoolV a) +{ + return (a.ux & a.uy & a.uz) ? BTTTT() : BFFFF(); +} + +PX_FORCE_INLINE BoolV BAnyTrue3(const BoolV a) +{ + return (a.ux | a.uy | a.uz) ? BTTTT() : BFFFF(); +} + +PX_FORCE_INLINE PxU32 BGetBitMask(const BoolV a) +{ + return (a.ux & 1) | (a.uy & 2) | (a.uz & 4) | (a.uw & 8); +} + +////////////////////////////////// +// MAT33V +////////////////////////////////// + +PX_FORCE_INLINE Vec3V M33MulV3(const Mat33V& a, const Vec3V b) +{ + return Vec3V(a.col0.x * b.x + a.col1.x * b.y + a.col2.x * b.z, a.col0.y * b.x + a.col1.y * b.y + a.col2.y * b.z, + a.col0.z * b.x + a.col1.z * b.y + a.col2.z * b.z); +} + +PX_FORCE_INLINE Vec3V M33TrnspsMulV3(const Mat33V& a, const Vec3V b) +{ + return Vec3V(a.col0.x * b.x + a.col0.y * b.y + a.col0.z * b.z, a.col1.x * b.x + a.col1.y * b.y + a.col1.z * b.z, + a.col2.x * b.x + a.col2.y * b.y + a.col2.z * b.z); +} + +PX_FORCE_INLINE Vec3V M33MulV3AddV3(const Mat33V& A, const Vec3V b, const Vec3V c) +{ + const FloatV x = V3GetX(b); + const FloatV y = V3GetY(b); + const FloatV z = V3GetZ(b); + Vec3V result = V3ScaleAdd(A.col0, x, c); + result = V3ScaleAdd(A.col1, y, result); + return V3ScaleAdd(A.col2, z, result); +} + +PX_FORCE_INLINE Mat33V M33MulM33(const Mat33V& a, const Mat33V& b) +{ + return Mat33V(M33MulV3(a, b.col0), M33MulV3(a, b.col1), M33MulV3(a, b.col2)); +} + +PX_FORCE_INLINE Mat33V M33Add(const Mat33V& a, const Mat33V& b) +{ + return Mat33V(V3Add(a.col0, b.col0), V3Add(a.col1, b.col1), V3Add(a.col2, b.col2)); +} + +PX_FORCE_INLINE Mat33V M33Scale(const Mat33V& a, const FloatV& b) +{ + return Mat33V(V3Scale(a.col0, b), V3Scale(a.col1, b), V3Scale(a.col2, b)); +} + +PX_FORCE_INLINE Mat33V M33Sub(const Mat33V& a, const Mat33V& b) +{ + return Mat33V(V3Sub(a.col0, b.col0), V3Sub(a.col1, b.col1), V3Sub(a.col2, b.col2)); +} + +PX_FORCE_INLINE Mat33V M33Neg(const Mat33V& a) +{ + return Mat33V(V3Neg(a.col0), V3Neg(a.col1), V3Neg(a.col2)); +} + +PX_FORCE_INLINE Mat33V M33Abs(const Mat33V& a) +{ + return Mat33V(V3Abs(a.col0), V3Abs(a.col1), V3Abs(a.col2)); +} + +PX_FORCE_INLINE Mat33V M33Diagonal(const Vec3VArg d) +{ + const Vec3V x = V3Mul(V3UnitX(), d); + const Vec3V y = V3Mul(V3UnitY(), d); + const Vec3V z = V3Mul(V3UnitZ(), d); + return Mat33V(x, y, z); +} + +PX_FORCE_INLINE Mat33V M33Inverse(const Mat33V& a) +{ + const PxF32 det = a.col0.x * (a.col1.y * a.col2.z - a.col1.z * a.col2.y) - + a.col1.x * (a.col0.y * a.col2.z - a.col2.y * a.col0.z) + + a.col2.x * (a.col0.y * a.col1.z - a.col1.y * a.col0.z); + + const PxF32 invDet = 1.0f / det; + + Mat33V ret; + ret.col0.x = invDet * (a.col1.y * a.col2.z - a.col2.y * a.col1.z); + ret.col0.y = invDet * (a.col2.y * a.col0.z - a.col0.y * a.col2.z); + ret.col0.z = invDet * (a.col0.y * a.col1.z - a.col1.y * a.col0.z); + + ret.col1.x = invDet * (a.col2.x * a.col1.z - a.col1.x * a.col2.z); + ret.col1.y = invDet * (a.col0.x * a.col2.z - a.col2.x * a.col0.z); + ret.col1.z = invDet * (a.col1.x * a.col0.z - a.col0.x * a.col1.z); + + ret.col2.x = invDet * (a.col1.x * a.col2.y - a.col2.x * a.col1.y); + ret.col2.y = invDet * (a.col2.x * a.col0.y - a.col0.x * a.col2.y); + ret.col2.z = invDet * (a.col0.x * a.col1.y - a.col1.x * a.col0.y); + + return ret; +} + +PX_FORCE_INLINE Mat33V Mat33V_From_PxMat33(const PxMat33& m) +{ + return Mat33V(V3LoadU(m.column0), V3LoadU(m.column1), V3LoadU(m.column2)); +} + +PX_FORCE_INLINE void PxMat33_From_Mat33V(const Mat33V& m, PxMat33& out) +{ + PX_ASSERT((size_t(&out) & 15) == 0); + V3StoreU(m.col0, out.column0); + V3StoreU(m.col1, out.column1); + V3StoreU(m.col2, out.column2); +} + +PX_FORCE_INLINE Mat33V M33Trnsps(const Mat33V& a) +{ + return Mat33V(Vec3V(a.col0.x, a.col1.x, a.col2.x), Vec3V(a.col0.y, a.col1.y, a.col2.y), + Vec3V(a.col0.z, a.col1.z, a.col2.z)); +} + +PX_FORCE_INLINE Mat33V M33Identity() +{ + return Mat33V(V3UnitX(), V3UnitY(), V3UnitZ()); +} + +////////////////////////////////// +// MAT34V +////////////////////////////////// + +PX_FORCE_INLINE Vec3V M34MulV3(const Mat34V& a, const Vec3V b) +{ + return Vec3V(a.col0.x * b.x + a.col1.x * b.y + a.col2.x * b.z + a.col3.x, + a.col0.y * b.x + a.col1.y * b.y + a.col2.y * b.z + a.col3.y, + a.col0.z * b.x + a.col1.z * b.y + a.col2.z * b.z + a.col3.z); +} + +PX_FORCE_INLINE Vec3V M34Mul33V3(const Mat34V& a, const Vec3V b) +{ + return Vec3V(a.col0.x * b.x + a.col1.x * b.y + a.col2.x * b.z, a.col0.y * b.x + a.col1.y * b.y + a.col2.y * b.z, + a.col0.z * b.x + a.col1.z * b.y + a.col2.z * b.z); +} + +PX_FORCE_INLINE Vec3V M34TrnspsMul33V3(const Mat34V& a, const Vec3V b) +{ + return Vec3V(a.col0.x * b.x + a.col0.y * b.y + a.col0.z * b.z, a.col1.x * b.x + a.col1.y * b.y + a.col1.z * b.z, + a.col2.x * b.x + a.col2.y * b.y + a.col2.z * b.z); +} + +PX_FORCE_INLINE Mat34V M34MulM34(const Mat34V& a, const Mat34V& b) +{ + return Mat34V(M34Mul33V3(a, b.col0), M34Mul33V3(a, b.col1), M34Mul33V3(a, b.col2), M34MulV3(a, b.col3)); +} + +PX_FORCE_INLINE Mat33V M34MulM33(const Mat34V& a, const Mat33V& b) +{ + return Mat33V(M34Mul33V3(a, b.col0), M34Mul33V3(a, b.col1), M34Mul33V3(a, b.col2)); +} + +PX_FORCE_INLINE Mat33V M34Mul33V3(const Mat34V& a, const Mat33V& b) +{ + return Mat33V(M34Mul33V3(a, b.col0), M34Mul33V3(a, b.col1), M34Mul33V3(a, b.col2)); +} + +PX_FORCE_INLINE Mat33V M34Mul33MM34(const Mat34V& a, const Mat34V& b) +{ + return Mat33V(M34Mul33V3(a, b.col0), M34Mul33V3(a, b.col1), M34Mul33V3(a, b.col2)); +} + +PX_FORCE_INLINE Mat34V M34Add(const Mat34V& a, const Mat34V& b) +{ + return Mat34V(V3Add(a.col0, b.col0), V3Add(a.col1, b.col1), V3Add(a.col2, b.col2), V3Add(a.col3, b.col3)); +} + +PX_FORCE_INLINE Mat33V M34Trnsps33(const Mat34V& a) +{ + return Mat33V(Vec3V(a.col0.x, a.col1.x, a.col2.x), Vec3V(a.col0.y, a.col1.y, a.col2.y), + Vec3V(a.col0.z, a.col1.z, a.col2.z)); +} + +////////////////////////////////// +// MAT44V +////////////////////////////////// + +PX_FORCE_INLINE Vec4V M44MulV4(const Mat44V& a, const Vec4V b) +{ + return Vec4V(a.col0.x * b.x + a.col1.x * b.y + a.col2.x * b.z + a.col3.x * b.w, + a.col0.y * b.x + a.col1.y * b.y + a.col2.y * b.z + a.col3.y * b.w, + a.col0.z * b.x + a.col1.z * b.y + a.col2.z * b.z + a.col3.z * b.w, + a.col0.w * b.x + a.col1.w * b.y + a.col2.w * b.z + a.col3.w * b.w); +} + +PX_FORCE_INLINE Vec4V M44TrnspsMulV4(const Mat44V& a, const Vec4V b) +{ + return Vec4V(a.col0.x * b.x + a.col0.y * b.y + a.col0.z * b.z + a.col0.w * b.w, + a.col1.x * b.x + a.col1.y * b.y + a.col1.z * b.z + a.col1.w * b.w, + a.col2.x * b.x + a.col2.y * b.y + a.col2.z * b.z + a.col2.w * b.w, + a.col3.x * b.x + a.col3.y * b.y + a.col3.z * b.z + a.col3.w * b.w); +} + +PX_FORCE_INLINE Mat44V M44MulM44(const Mat44V& a, const Mat44V& b) +{ + return Mat44V(M44MulV4(a, b.col0), M44MulV4(a, b.col1), M44MulV4(a, b.col2), M44MulV4(a, b.col3)); +} + +PX_FORCE_INLINE Mat44V M44Add(const Mat44V& a, const Mat44V& b) +{ + return Mat44V(V4Add(a.col0, b.col0), V4Add(a.col1, b.col1), V4Add(a.col2, b.col2), V4Add(a.col3, b.col3)); +} + +PX_FORCE_INLINE Mat44V M44Inverse(const Mat44V& a) +{ + PxF32 tmp[12]; + PxF32 dst[16]; + PxF32 det; + + const PxF32 src[16] = { a.col0.x, a.col0.y, a.col0.z, a.col0.w, a.col1.x, a.col1.y, a.col1.z, a.col1.w, + a.col2.x, a.col2.y, a.col2.z, a.col2.w, a.col3.x, a.col3.y, a.col3.z, a.col3.w }; + + tmp[0] = src[10] * src[15]; + tmp[1] = src[11] * src[14]; + tmp[2] = src[9] * src[15]; + tmp[3] = src[11] * src[13]; + tmp[4] = src[9] * src[14]; + tmp[5] = src[10] * src[13]; + tmp[6] = src[8] * src[15]; + tmp[7] = src[11] * src[12]; + tmp[8] = src[8] * src[14]; + tmp[9] = src[10] * src[12]; + tmp[10] = src[8] * src[13]; + tmp[11] = src[9] * src[12]; + + dst[0] = tmp[0] * src[5] + tmp[3] * src[6] + tmp[4] * src[7]; + dst[0] -= tmp[1] * src[5] + tmp[2] * src[6] + tmp[5] * src[7]; + dst[1] = tmp[1] * src[4] + tmp[6] * src[6] + tmp[9] * src[7]; + dst[1] -= tmp[0] * src[4] + tmp[7] * src[6] + tmp[8] * src[7]; + dst[2] = tmp[2] * src[4] + tmp[7] * src[5] + tmp[10] * src[7]; + dst[2] -= tmp[3] * src[4] + tmp[6] * src[5] + tmp[11] * src[7]; + dst[3] = tmp[5] * src[4] + tmp[8] * src[5] + tmp[11] * src[6]; + dst[3] -= tmp[4] * src[4] + tmp[9] * src[5] + tmp[10] * src[6]; + dst[4] = tmp[1] * src[1] + tmp[2] * src[2] + tmp[5] * src[3]; + dst[4] -= tmp[0] * src[1] + tmp[3] * src[2] + tmp[4] * src[3]; + dst[5] = tmp[0] * src[0] + tmp[7] * src[2] + tmp[8] * src[3]; + dst[5] -= tmp[1] * src[0] + tmp[6] * src[2] + tmp[9] * src[3]; + dst[6] = tmp[3] * src[0] + tmp[6] * src[1] + tmp[11] * src[3]; + dst[6] -= tmp[2] * src[0] + tmp[7] * src[1] + tmp[10] * src[3]; + dst[7] = tmp[4] * src[0] + tmp[9] * src[1] + tmp[10] * src[2]; + dst[7] -= tmp[5] * src[0] + tmp[8] * src[1] + tmp[11] * src[2]; + + tmp[0] = src[2] * src[7]; + tmp[1] = src[3] * src[6]; + tmp[2] = src[1] * src[7]; + tmp[3] = src[3] * src[5]; + tmp[4] = src[1] * src[6]; + tmp[5] = src[2] * src[5]; + tmp[6] = src[0] * src[7]; + tmp[7] = src[3] * src[4]; + tmp[8] = src[0] * src[6]; + tmp[9] = src[2] * src[4]; + tmp[10] = src[0] * src[5]; + tmp[11] = src[1] * src[4]; + + dst[8] = tmp[0] * src[13] + tmp[3] * src[14] + tmp[4] * src[15]; + dst[8] -= tmp[1] * src[13] + tmp[2] * src[14] + tmp[5] * src[15]; + dst[9] = tmp[1] * src[12] + tmp[6] * src[14] + tmp[9] * src[15]; + dst[9] -= tmp[0] * src[12] + tmp[7] * src[14] + tmp[8] * src[15]; + dst[10] = tmp[2] * src[12] + tmp[7] * src[13] + tmp[10] * src[15]; + dst[10] -= tmp[3] * src[12] + tmp[6] * src[13] + tmp[11] * src[15]; + dst[11] = tmp[5] * src[12] + tmp[8] * src[13] + tmp[11] * src[14]; + dst[11] -= tmp[4] * src[12] + tmp[9] * src[13] + tmp[10] * src[14]; + dst[12] = tmp[2] * src[10] + tmp[5] * src[11] + tmp[1] * src[9]; + dst[12] -= tmp[4] * src[11] + tmp[0] * src[9] + tmp[3] * src[10]; + dst[13] = tmp[8] * src[11] + tmp[0] * src[8] + tmp[7] * src[10]; + dst[13] -= tmp[6] * src[10] + tmp[9] * src[11] + tmp[1] * src[8]; + dst[14] = tmp[6] * src[9] + tmp[11] * src[11] + tmp[3] * src[8]; + dst[14] -= tmp[10] * src[11] + tmp[2] * src[8] + tmp[7] * src[9]; + dst[15] = tmp[10] * src[10] + tmp[4] * src[8] + tmp[9] * src[9]; + dst[15] -= tmp[8] * src[9] + tmp[11] * src[10] + tmp[5] * src[8]; + + det = src[0] * dst[0] + src[1] * dst[1] + src[2] * dst[2] + src[3] * dst[3]; + + det = 1.0f / det; + for(PxU32 j = 0; j < 16; j++) + { + dst[j] *= det; + } + + return Mat44V(Vec4V(dst[0], dst[4], dst[8], dst[12]), Vec4V(dst[1], dst[5], dst[9], dst[13]), + Vec4V(dst[2], dst[6], dst[10], dst[14]), Vec4V(dst[3], dst[7], dst[11], dst[15])); +} + +PX_FORCE_INLINE Mat44V M44Trnsps(const Mat44V& a) +{ + return Mat44V(Vec4V(a.col0.x, a.col1.x, a.col2.x, a.col3.x), Vec4V(a.col0.y, a.col1.y, a.col2.y, a.col3.y), + Vec4V(a.col0.z, a.col1.z, a.col2.z, a.col3.z), Vec4V(a.col0.w, a.col1.w, a.col2.w, a.col3.w)); +} + +PX_FORCE_INLINE Vec4V V4LoadXYZW(const PxF32& x, const PxF32& y, const PxF32& z, const PxF32& w) +{ + return Vec4V(x, y, z, w); +} + +/* +PX_FORCE_INLINE VecU16V V4U32PK(VecU32V a, VecU32V b) +{ + return VecU16V( + PxU16(PxClamp<PxU32>((a).u32[0], 0, 0xFFFF)), + PxU16(PxClamp<PxU32>((a).u32[1], 0, 0xFFFF)), + PxU16(PxClamp<PxU32>((a).u32[2], 0, 0xFFFF)), + PxU16(PxClamp<PxU32>((a).u32[3], 0, 0xFFFF)), + PxU16(PxClamp<PxU32>((b).u32[0], 0, 0xFFFF)), + PxU16(PxClamp<PxU32>((b).u32[1], 0, 0xFFFF)), + PxU16(PxClamp<PxU32>((b).u32[2], 0, 0xFFFF)), + PxU16(PxClamp<PxU32>((b).u32[3], 0, 0xFFFF))); +} +*/ + +PX_FORCE_INLINE VecU32V V4U32Sel(const BoolV c, const VecU32V a, const VecU32V b) +{ + return VecU32V(c.ux ? a.u32[0] : b.u32[0], c.uy ? a.u32[1] : b.u32[1], c.uz ? a.u32[2] : b.u32[2], + c.uw ? a.u32[3] : b.u32[3]); +} + +PX_FORCE_INLINE VecU32V V4U32or(VecU32V a, VecU32V b) +{ + return VecU32V((a).u32[0] | (b).u32[0], (a).u32[1] | (b).u32[1], (a).u32[2] | (b).u32[2], (a).u32[3] | (b).u32[3]); +} + +PX_FORCE_INLINE VecU32V V4U32xor(VecU32V a, VecU32V b) +{ + return VecU32V((a).u32[0] ^ (b).u32[0], (a).u32[1] ^ (b).u32[1], (a).u32[2] ^ (b).u32[2], (a).u32[3] ^ (b).u32[3]); +} + +PX_FORCE_INLINE VecU32V V4U32and(VecU32V a, VecU32V b) +{ + return VecU32V((a).u32[0] & (b).u32[0], (a).u32[1] & (b).u32[1], (a).u32[2] & (b).u32[2], (a).u32[3] & (b).u32[3]); +} + +PX_FORCE_INLINE VecU32V V4U32Andc(VecU32V a, VecU32V b) +{ + return VecU32V((a).u32[0] & ~(b).u32[0], (a).u32[1] & ~(b).u32[1], (a).u32[2] & ~(b).u32[2], + (a).u32[3] & ~(b).u32[3]); +} + +/* +PX_FORCE_INLINE VecU16V V4U16Or(VecU16V a, VecU16V b) +{ + return VecU16V( + (a).u16[0]|(b).u16[0], (a).u16[1]|(b).u16[1], (a).u16[2]|(b).u16[2], (a).u16[3]|(b).u16[3], + (a).u16[4]|(b).u16[4], (a).u16[5]|(b).u16[5], (a).u16[6]|(b).u16[6], (a).u16[7]|(b).u16[7]); +} +*/ + +/* +PX_FORCE_INLINE VecU16V V4U16And(VecU16V a, VecU16V b) +{ + return VecU16V( + (a).u16[0]&(b).u16[0], (a).u16[1]&(b).u16[1], (a).u16[2]&(b).u16[2], (a).u16[3]&(b).u16[3], + (a).u16[4]&(b).u16[4], (a).u16[5]&(b).u16[5], (a).u16[6]&(b).u16[6], (a).u16[7]&(b).u16[7]); +} +*/ + +/* +PX_FORCE_INLINE VecU16V V4U16Andc(VecU16V a, VecU16V b) +{ + return VecU16V( + (a).u16[0]&~(b).u16[0], (a).u16[1]&~(b).u16[1], (a).u16[2]&~(b).u16[2], (a).u16[3]&~(b).u16[3], + (a).u16[4]&~(b).u16[4], (a).u16[5]&~(b).u16[5], (a).u16[6]&~(b).u16[6], (a).u16[7]&~(b).u16[7]); +} +*/ + +/* +template<int a> PX_FORCE_INLINE VecI32V V4ISplat() +{ + return VecI32V(a, a, a, a); +} + +template<PxU32 a> PX_FORCE_INLINE VecU32V V4USplat() +{ + return VecU32V(a, a, a, a); +} +*/ + +/* +PX_FORCE_INLINE void V4U16StoreAligned(VecU16V val, VecU16V* address) +{ + *address = val; +} +*/ + +PX_FORCE_INLINE void V4U32StoreAligned(VecU32V val, VecU32V* address) +{ + *address = val; +} + +PX_FORCE_INLINE Vec4V V4Andc(const Vec4V a, const VecU32V b) +{ + VecU32V r = V4U32Andc(*reinterpret_cast<const VecU32V*>(&a), b); + return (*reinterpret_cast<const Vec4V*>(&r)); +} + +PX_FORCE_INLINE VecU32V V4IsGrtrV32u(const Vec4V a, const Vec4V b) +{ + return VecU32V(a.x > b.x ? 0xFFFFffff : 0, a.y > b.y ? 0xFFFFffff : 0, a.z > b.z ? 0xFFFFffff : 0, + a.w > b.w ? 0xFFFFffff : 0); +} + +PX_FORCE_INLINE VecU16V V4U16LoadAligned(VecU16V* addr) +{ + return *addr; +} + +PX_FORCE_INLINE VecU16V V4U16LoadUnaligned(VecU16V* addr) +{ + return *addr; +} + +PX_FORCE_INLINE VecU16V V4U16CompareGt(VecU16V a, VecU16V b) +{ + return VecU16V + ( + BOOL_TO_U16(a.u16[0] > b.u16[0]), BOOL_TO_U16(a.u16[1] > b.u16[1]), BOOL_TO_U16(a.u16[2] > b.u16[2]), BOOL_TO_U16(a.u16[3] > b.u16[3]), + BOOL_TO_U16(a.u16[4] > b.u16[4]), BOOL_TO_U16(a.u16[5] > b.u16[5]), BOOL_TO_U16(a.u16[6] > b.u16[6]), BOOL_TO_U16(a.u16[7] > b.u16[7]) + ); +} + +PX_FORCE_INLINE VecU16V V4I16CompareGt(VecU16V a, VecU16V b) +{ + return VecU16V + ( + BOOL_TO_U16(a.i16[0] > b.i16[0]), BOOL_TO_U16(a.i16[1] > b.i16[1]), BOOL_TO_U16(a.i16[2] > b.i16[2]), BOOL_TO_U16(a.i16[3] > b.i16[3]), + BOOL_TO_U16(a.i16[4] > b.i16[4]), BOOL_TO_U16(a.i16[5] > b.i16[5]), BOOL_TO_U16(a.i16[6] > b.i16[6]), BOOL_TO_U16(a.i16[7] > b.i16[7]) + ); +} + +PX_FORCE_INLINE Vec4V Vec4V_From_VecU32V(VecU32V a) +{ + return Vec4V(PxF32((a).u32[0]), PxF32((a).u32[1]), PxF32((a).u32[2]), PxF32((a).u32[3])); +} + +PX_FORCE_INLINE Vec4V Vec4V_From_VecI32V(VecI32V a) +{ + return Vec4V(PxF32((a).i32[0]), PxF32((a).i32[1]), PxF32((a).i32[2]), PxF32((a).i32[3])); +} + +PX_FORCE_INLINE VecI32V VecI32V_From_Vec4V(Vec4V a) +{ + float* data = (float*)&a; + return VecI32V(PxI32(data[0]), PxI32(data[1]), PxI32(data[2]), PxI32(data[3])); +} + +PX_FORCE_INLINE Vec4V Vec4V_ReinterpretFrom_VecU32V(VecU32V a) +{ + Vec4V b = *reinterpret_cast<Vec4V*>(&a); + return b; +} + +PX_FORCE_INLINE Vec4V Vec4V_ReinterpretFrom_VecI32V(VecI32V a) +{ + Vec4V b = *reinterpret_cast<Vec4V*>(&a); + return b; +} + +PX_FORCE_INLINE VecU32V VecU32V_ReinterpretFrom_Vec4V(Vec4V a) +{ + VecU32V b = *reinterpret_cast<VecU32V*>(&a); + return b; +} + +PX_FORCE_INLINE VecI32V VecI32V_ReinterpretFrom_Vec4V(Vec4V a) +{ + VecI32V b = *reinterpret_cast<VecI32V*>(&a); + return b; +} + +template <int index> +PX_FORCE_INLINE VecU32V V4U32SplatElement(VecU32V a) +{ + return VecU32V((a).u32[index], (a).u32[index], (a).u32[index], (a).u32[index]); +} + +template <int index> +PX_FORCE_INLINE VecU32V V4U32SplatElement(BoolV a) +{ + const PxU32 u = (&a.ux)[index]; + return VecU32V(u, u, u, u); +} + +template <int index> +PX_FORCE_INLINE Vec4V V4SplatElement(Vec4V a) +{ + float* data = (float*)&a; + return Vec4V(data[index], data[index], data[index], data[index]); +} + +PX_FORCE_INLINE VecU32V U4LoadXYZW(PxU32 x, PxU32 y, PxU32 z, PxU32 w) +{ + return VecU32V(x, y, z, w); +} + +PX_FORCE_INLINE Vec4V V4Abs(const Vec4V a) +{ + return V4Max(a, V4Neg(a)); +} + +PX_FORCE_INLINE BoolV V4IsEqU32(const VecU32V a, const VecU32V b) +{ + return BoolV(BOOL_TO_U32(a.u32[0] == b.u32[0]), BOOL_TO_U32(a.u32[1] == b.u32[1]), BOOL_TO_U32(a.u32[2] == b.u32[2]), BOOL_TO_U32(a.u32[3] == b.u32[3])); +} + +PX_FORCE_INLINE VecU32V U4Load(const PxU32 i) +{ + return VecU32V(i, i, i, i); +} + +PX_FORCE_INLINE VecU32V U4LoadU(const PxU32* i) +{ + return VecU32V(i[0], i[1], i[2], i[3]); +} + +PX_FORCE_INLINE VecU32V U4LoadA(const PxU32* i) +{ + return VecU32V(i[0], i[1], i[2], i[3]); +} + +PX_FORCE_INLINE VecI32V I4Load(const PxI32 i) +{ + return VecI32V(i, i, i, i); +} + +PX_FORCE_INLINE VecI32V I4LoadU(const PxI32* i) +{ + return VecI32V(i[0], i[1], i[2], i[3]); +} + +PX_FORCE_INLINE VecI32V I4LoadA(const PxI32* i) +{ + return VecI32V(i[0], i[1], i[2], i[3]); +} + +PX_FORCE_INLINE VecI32V VecI32V_Add(const VecI32VArg a, const VecI32VArg b) +{ + return VecI32V(a.i32[0] + b.i32[0], a.i32[1] + b.i32[1], a.i32[2] + b.i32[2], a.i32[3] + b.i32[3]); +} + +PX_FORCE_INLINE VecI32V VecI32V_Sub(const VecI32VArg a, const VecI32VArg b) +{ + return VecI32V(a.i32[0] - b.i32[0], a.i32[1] - b.i32[1], a.i32[2] - b.i32[2], a.i32[3] - b.i32[3]); +} + +PX_FORCE_INLINE BoolV VecI32V_IsGrtr(const VecI32VArg a, const VecI32VArg b) +{ + return BoolV(BOOL_TO_U32(a.i32[0] > b.i32[0]), BOOL_TO_U32(a.i32[1] > b.i32[1]), BOOL_TO_U32(a.i32[2] > b.i32[2]), BOOL_TO_U32(a.i32[3] > b.i32[3])); +} + +PX_FORCE_INLINE BoolV VecI32V_IsEq(const VecI32VArg a, const VecI32VArg b) +{ + return BoolV(BOOL_TO_U32(a.i32[0] == b.i32[0]), BOOL_TO_U32(a.i32[1] == b.i32[1]), BOOL_TO_U32(a.i32[2] == b.i32[2]), BOOL_TO_U32(a.i32[3] == b.i32[3])); +} + +PX_FORCE_INLINE VecI32V V4I32Sel(const BoolV c, const VecI32V a, const VecI32V b) +{ + return VecI32V(c.ux ? a.i32[0] : b.i32[0], c.uy ? a.i32[1] : b.i32[1], c.uz ? a.i32[2] : b.i32[2], + c.uw ? a.i32[3] : b.i32[3]); +} + +PX_FORCE_INLINE VecI32V VecI32V_Zero() +{ + return VecI32V(0, 0, 0, 0); +} + +PX_FORCE_INLINE VecI32V VecI32V_One() +{ + return VecI32V(1, 1, 1, 1); +} + +PX_FORCE_INLINE VecI32V VecI32V_Two() +{ + return VecI32V(2, 2, 2, 2); +} + +PX_FORCE_INLINE VecI32V VecI32V_MinusOne() +{ + return VecI32V(-1, -1, -1, -1); +} + +PX_FORCE_INLINE VecU32V U4Zero() +{ + return VecU32V(0, 0, 0, 0); +} + +PX_FORCE_INLINE VecU32V U4One() +{ + return VecU32V(1, 1, 1, 1); +} + +PX_FORCE_INLINE VecU32V U4Two() +{ + return VecU32V(2, 2, 2, 2); +} + +PX_FORCE_INLINE VecShiftV VecI32V_PrepareShift(const VecI32VArg shift) +{ + return shift; +} + +PX_FORCE_INLINE VecI32V VecI32V_LeftShift(const VecI32VArg a, const VecShiftVArg count) +{ + return VecI32V(a.i32[0] << count.i32[0], a.i32[1] << count.i32[1], a.i32[2] << count.i32[2], a.i32[3] + << count.i32[3]); +} + +PX_FORCE_INLINE VecI32V VecI32V_RightShift(const VecI32VArg a, const VecShiftVArg count) +{ + return VecI32V(a.i32[0] >> count.i32[0], a.i32[1] >> count.i32[1], a.i32[2] >> count.i32[2], + a.i32[3] >> count.i32[3]); +} + +PX_FORCE_INLINE VecI32V VecI32V_And(const VecI32VArg a, const VecI32VArg b) +{ + return VecI32V(a.i32[0] & b.i32[0], a.i32[1] & b.i32[1], a.i32[2] & b.i32[2], a.i32[3] & b.i32[3]); +} + +PX_FORCE_INLINE VecI32V VecI32V_Or(const VecI32VArg a, const VecI32VArg b) +{ + return VecI32V(a.i32[0] | b.i32[0], a.i32[1] | b.i32[1], a.i32[2] | b.i32[2], a.i32[3] | b.i32[3]); +} + +PX_FORCE_INLINE VecI32V VecI32V_GetX(const VecI32VArg a) +{ + return VecI32V(a.i32[0], a.i32[0], a.i32[0], a.i32[0]); +} + +PX_FORCE_INLINE VecI32V VecI32V_GetY(const VecI32VArg a) +{ + return VecI32V(a.i32[1], a.i32[1], a.i32[1], a.i32[1]); +} + +PX_FORCE_INLINE VecI32V VecI32V_GetZ(const VecI32VArg a) +{ + return VecI32V(a.i32[2], a.i32[2], a.i32[2], a.i32[2]); +} + +PX_FORCE_INLINE VecI32V VecI32V_GetW(const VecI32VArg a) +{ + return VecI32V(a.i32[3], a.i32[3], a.i32[3], a.i32[3]); +} + +PX_FORCE_INLINE VecI32V VecI32V_Sel(const BoolV c, const VecI32VArg a, const VecI32VArg b) +{ + return VecI32V(c.ux ? a.i32[0] : b.i32[0], c.uy ? a.i32[1] : b.i32[1], c.uz ? a.i32[2] : b.i32[2], + c.uw ? a.i32[3] : b.i32[3]); +} + +PX_FORCE_INLINE VecI32V VecI32V_Merge(const VecI32VArg a, const VecI32VArg b, const VecI32VArg c, const VecI32VArg d) +{ + return VecI32V(a.i32[0], b.i32[0], c.i32[0], d.i32[0]); +} + +PX_FORCE_INLINE void PxI32_From_VecI32V(const VecI32VArg a, PxI32* i) +{ + *i = a.i32[0]; +} + +PX_FORCE_INLINE VecI32V VecI32V_From_BoolV(const BoolVArg b) +{ + return VecI32V(PxI32(b.ux), PxI32(b.uy), PxI32(b.uz), PxI32(b.uw)); +} + +PX_FORCE_INLINE VecU32V VecU32V_From_BoolV(const BoolVArg b) +{ + return VecU32V(b.ux, b.uy, b.uz, b.uw); +} + +PX_FORCE_INLINE void QuatGetMat33V(const QuatVArg q, Vec3V& column0, Vec3V& column1, Vec3V& column2) +{ + const FloatV one = FOne(); + const FloatV x = V4GetX(q); + const FloatV y = V4GetY(q); + const FloatV z = V4GetZ(q); + const FloatV w = V4GetW(q); + + const FloatV x2 = FAdd(x, x); + const FloatV y2 = FAdd(y, y); + const FloatV z2 = FAdd(z, z); + + const FloatV xx = FMul(x2, x); + const FloatV yy = FMul(y2, y); + const FloatV zz = FMul(z2, z); + + const FloatV xy = FMul(x2, y); + const FloatV xz = FMul(x2, z); + const FloatV xw = FMul(x2, w); + + const FloatV yz = FMul(y2, z); + const FloatV yw = FMul(y2, w); + const FloatV zw = FMul(z2, w); + + const FloatV v = FSub(one, xx); + + column0 = V3Merge(FSub(FSub(one, yy), zz), FAdd(xy, zw), FSub(xz, yw)); + column1 = V3Merge(FSub(xy, zw), FSub(v, zz), FAdd(yz, xw)); + column2 = V3Merge(FAdd(xz, yw), FSub(yz, xw), FSub(v, yy)); +} + + +// not used + +/* +PX_FORCE_INLINE Vec4V V4LoadAligned(Vec4V* addr) +{ + return *addr; +} +*/ + +/* +PX_FORCE_INLINE Vec4V V4LoadUnaligned(Vec4V* addr) +{ + return *addr; +} +*/ + +/* +PX_FORCE_INLINE Vec4V V4Ceil(const Vec4V a) +{ + return Vec4V(PxCeil(a.x), PxCeil(a.y), PxCeil(a.z), PxCeil(a.w)); +} + +PX_FORCE_INLINE Vec4V V4Floor(const Vec4V a) +{ + return Vec4V(PxFloor(a.x), PxFloor(a.y), PxFloor(a.z), PxFloor(a.w)); +} +*/ + +/* +PX_FORCE_INLINE VecU32V V4ConvertToU32VSaturate(const Vec4V a, PxU32 power) +{ + PX_ASSERT(power == 0 && "Non-zero power not supported in convertToU32VSaturate"); + PX_UNUSED(power); // prevent warning in release builds + PxF32 ffffFFFFasFloat = PxF32(0xFFFF0000); + return VecU32V( + PxU32(PxClamp<PxF32>((a).x, 0.0f, ffffFFFFasFloat)), + PxU32(PxClamp<PxF32>((a).y, 0.0f, ffffFFFFasFloat)), + PxU32(PxClamp<PxF32>((a).z, 0.0f, ffffFFFFasFloat)), + PxU32(PxClamp<PxF32>((a).w, 0.0f, ffffFFFFasFloat))); +} +*/ + +#endif // PSFOUNDATION_PSVECMATHAOSSCALARINLINE_H diff --git a/PxShared/src/foundation/include/PsVecMathSSE.h b/PxShared/src/foundation/include/PsVecMathSSE.h new file mode 100644 index 0000000..559fa68 --- /dev/null +++ b/PxShared/src/foundation/include/PsVecMathSSE.h @@ -0,0 +1,56 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSVECMATHSSE_H +#define PSFOUNDATION_PSVECMATHSSE_H + +namespace +{ + const PX_ALIGN(16, PxF32) minus1w[4] = { 0.0f, 0.0f, 0.0f, -1.0f }; +} + +PX_FORCE_INLINE void QuatGetMat33V(const QuatVArg q, Vec3V& column0, Vec3V& column1, Vec3V& column2) +{ + const __m128 q2 = V4Add(q, q); + const __m128 qw2 = V4MulAdd(q2, V4GetW(q), _mm_load_ps(minus1w)); // (2wx, 2wy, 2wz, 2ww-1) + const __m128 nw2 = Vec3V_From_Vec4V(V4Neg(qw2)); // (-2wx, -2wy, -2wz, 0) + const __m128 v = Vec3V_From_Vec4V(q); + + const __m128 a0 = _mm_shuffle_ps(qw2, nw2, _MM_SHUFFLE(3, 1, 2, 3)); // (2ww-1, 2wz, -2wy, 0) + column0 = V4MulAdd(v, V4GetX(q2), a0); + + const __m128 a1 = _mm_shuffle_ps(qw2, nw2, _MM_SHUFFLE(3, 2, 0, 3)); // (2ww-1, 2wx, -2wz, 0) + column1 = V4MulAdd(v, V4GetY(q2), _mm_shuffle_ps(a1, a1, _MM_SHUFFLE(3, 1, 0, 2))); + + const __m128 a2 = _mm_shuffle_ps(qw2, nw2, _MM_SHUFFLE(3, 0, 1, 3)); // (2ww-1, 2wy, -2wx, 0) + column2 = V4MulAdd(v, V4GetZ(q2), _mm_shuffle_ps(a2, a2, _MM_SHUFFLE(3, 0, 2, 1))); +} + +#endif // PSFOUNDATION_PSVECMATHSSE_H + diff --git a/PxShared/src/foundation/include/PsVecMathUtilities.h b/PxShared/src/foundation/include/PsVecMathUtilities.h new file mode 100644 index 0000000..21bc50a --- /dev/null +++ b/PxShared/src/foundation/include/PsVecMathUtilities.h @@ -0,0 +1,57 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSVECMATHUTILITIES_H +#define PSFOUNDATION_PSVECMATHUTILITIES_H + +#include "PsVecMath.h" + +namespace physx +{ +namespace shdfnd +{ +namespace aos +{ +/*! + Extend an edge along its length by a factor + */ +PX_FORCE_INLINE void makeFatEdge(Vec3V& p0, Vec3V& p1, const FloatVArg fatCoeff) +{ + const Vec3V delta = V3Sub(p1, p0); + const FloatV m = V3Length(delta); + const BoolV con = FIsGrtr(m, FZero()); + const Vec3V fatDelta = V3Scale(V3ScaleInv(delta, m), fatCoeff); + p0 = V3Sel(con, V3Sub(p0, fatDelta), p0); + p1 = V3Sel(con, V3Add(p1, fatDelta), p1); +} +} +} +} + +#endif diff --git a/PxShared/src/foundation/include/PsVecQuat.h b/PxShared/src/foundation/include/PsVecQuat.h new file mode 100644 index 0000000..c4726fc --- /dev/null +++ b/PxShared/src/foundation/include/PsVecQuat.h @@ -0,0 +1,455 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSVECQUAT_H +#define PSFOUNDATION_PSVECQUAT_H + +//#include "PsInlineAoS.h" + +#ifndef PX_PIDIV2 +#define PX_PIDIV2 1.570796327f +#endif + +////////////////////////////////// +// QuatV +////////////////////////////////// +PX_FORCE_INLINE QuatV QuatVLoadXYZW(const PxF32 x, const PxF32 y, const PxF32 z, const PxF32 w) +{ + return V4LoadXYZW(x, y, z, w); +} + +PX_FORCE_INLINE QuatV QuatVLoadU(const PxF32* v) +{ + return V4LoadU(v); +} + +PX_FORCE_INLINE QuatV QuatVLoadA(const PxF32* v) +{ + return V4LoadA(v); +} + +PX_FORCE_INLINE QuatV QuatV_From_RotationAxisAngle(const Vec3V u, const FloatV a) +{ + // q = cos(a/2) + u*sin(a/2) + const FloatV half = FLoad(0.5f); + const FloatV hangle = FMul(a, half); + const FloatV piByTwo(FLoad(PX_PIDIV2)); + const FloatV PiByTwoMinHangle(FSub(piByTwo, hangle)); + const Vec4V hangle2(Vec4V_From_Vec3V(V3Merge(hangle, PiByTwoMinHangle, hangle))); + + /*const FloatV sina = FSin(hangle); + const FloatV cosa = FCos(hangle);*/ + + const Vec4V _sina = V4Sin(hangle2); + const FloatV sina = V4GetX(_sina); + const FloatV cosa = V4GetY(_sina); + + const Vec3V v = V3Scale(u, sina); + // return V4Sel(BTTTF(), Vec4V_From_Vec3V(v), V4Splat(cosa)); + return V4SetW(Vec4V_From_Vec3V(v), cosa); +} + +// Normalize +PX_FORCE_INLINE QuatV QuatNormalize(const QuatV q) +{ + return V4Normalize(q); +} + +PX_FORCE_INLINE FloatV QuatLength(const QuatV q) +{ + return V4Length(q); +} + +PX_FORCE_INLINE FloatV QuatLengthSq(const QuatV q) +{ + return V4LengthSq(q); +} + +PX_FORCE_INLINE FloatV QuatDot(const QuatV a, const QuatV b) // convert this PxQuat to a unit quaternion +{ + return V4Dot(a, b); +} + +PX_FORCE_INLINE QuatV QuatConjugate(const QuatV q) +{ + return V4SetW(V4Neg(q), V4GetW(q)); +} + +PX_FORCE_INLINE Vec3V QuatGetImaginaryPart(const QuatV q) +{ + return Vec3V_From_Vec4V(q); +} + +/** brief computes rotation of x-axis */ +PX_FORCE_INLINE Vec3V QuatGetBasisVector0(const QuatV q) +{ + /*const PxF32 x2 = x*2.0f; + const PxF32 w2 = w*2.0f; + return PxVec3( (w * w2) - 1.0f + x*x2, + (z * w2) + y*x2, + (-y * w2) + z*x2);*/ + + const FloatV two = FLoad(2.f); + const FloatV w = V4GetW(q); + const Vec3V u = Vec3V_From_Vec4V(q); + + const FloatV x2 = FMul(V3GetX(u), two); + const FloatV w2 = FMul(w, two); + + const Vec3V a = V3Scale(u, x2); + const Vec3V tmp = V3Merge(w, V3GetZ(u), FNeg(V3GetY(u))); + // const Vec3V b = V3Scale(tmp, w2); + // const Vec3V ab = V3Add(a, b); + const Vec3V ab = V3ScaleAdd(tmp, w2, a); + return V3SetX(ab, FSub(V3GetX(ab), FOne())); +} + +/** brief computes rotation of y-axis */ +PX_FORCE_INLINE Vec3V QuatGetBasisVector1(const QuatV q) +{ + /*const PxF32 y2 = y*2.0f; + const PxF32 w2 = w*2.0f; + return PxVec3( (-z * w2) + x*y2, + (w * w2) - 1.0f + y*y2, + (x * w2) + z*y2);*/ + + const FloatV two = FLoad(2.f); + const FloatV w = V4GetW(q); + const Vec3V u = Vec3V_From_Vec4V(q); + + const FloatV y2 = FMul(V3GetY(u), two); + const FloatV w2 = FMul(w, two); + + const Vec3V a = V3Scale(u, y2); + const Vec3V tmp = V3Merge(FNeg(V3GetZ(u)), w, V3GetX(u)); + // const Vec3V b = V3Scale(tmp, w2); + // const Vec3V ab = V3Add(a, b); + const Vec3V ab = V3ScaleAdd(tmp, w2, a); + return V3SetY(ab, FSub(V3GetY(ab), FOne())); +} + +/** brief computes rotation of z-axis */ +PX_FORCE_INLINE Vec3V QuatGetBasisVector2(const QuatV q) +{ + /*const PxF32 z2 = z*2.0f; + const PxF32 w2 = w*2.0f; + return PxVec3( (y * w2) + x*z2, + (-x * w2) + y*z2, + (w * w2) - 1.0f + z*z2);*/ + + const FloatV two = FLoad(2.f); + const FloatV w = V4GetW(q); + const Vec3V u = Vec3V_From_Vec4V(q); + + const FloatV z2 = FMul(V3GetZ(u), two); + const FloatV w2 = FMul(w, two); + + const Vec3V a = V3Scale(u, z2); + const Vec3V tmp = V3Merge(V3GetY(u), FNeg(V3GetX(u)), w); + /*const Vec3V b = V3Scale(tmp, w2); + const Vec3V ab = V3Add(a, b);*/ + const Vec3V ab = V3ScaleAdd(tmp, w2, a); + return V3SetZ(ab, FSub(V3GetZ(ab), FOne())); +} + +PX_FORCE_INLINE Vec3V QuatRotate(const QuatV q, const Vec3V v) +{ + /* + const PxVec3 qv(x,y,z); + return (v*(w*w-0.5f) + (qv.cross(v))*w + qv*(qv.dot(v)))*2; + */ + + const FloatV two = FLoad(2.f); + // const FloatV half = FloatV_From_F32(0.5f); + const FloatV nhalf = FLoad(-0.5f); + const Vec3V u = Vec3V_From_Vec4V(q); + const FloatV w = V4GetW(q); + // const FloatV w2 = FSub(FMul(w, w), half); + const FloatV w2 = FScaleAdd(w, w, nhalf); + const Vec3V a = V3Scale(v, w2); + // const Vec3V b = V3Scale(V3Cross(u, v), w); + // const Vec3V c = V3Scale(u, V3Dot(u, v)); + // return V3Scale(V3Add(V3Add(a, b), c), two); + const Vec3V temp = V3ScaleAdd(V3Cross(u, v), w, a); + return V3Scale(V3ScaleAdd(u, V3Dot(u, v), temp), two); +} + +PX_FORCE_INLINE Vec3V QuatTransform(const QuatV q, const Vec3V p, const Vec3V v) +{ + // p + q.rotate(v) + const FloatV two = FLoad(2.f); + // const FloatV half = FloatV_From_F32(0.5f); + const FloatV nhalf = FLoad(-0.5f); + const Vec3V u = Vec3V_From_Vec4V(q); + const FloatV w = V4GetW(q); + // const FloatV w2 = FSub(FMul(w, w), half); + const FloatV w2 = FScaleAdd(w, w, nhalf); + const Vec3V a = V3Scale(v, w2); + /*const Vec3V b = V3Scale(V3Cross(u, v), w); + const Vec3V c = V3Scale(u, V3Dot(u, v)); + return V3ScaleAdd(V3Add(V3Add(a, b), c), two, p);*/ + const Vec3V temp = V3ScaleAdd(V3Cross(u, v), w, a); + const Vec3V z = V3ScaleAdd(u, V3Dot(u, v), temp); + return V3ScaleAdd(z, two, p); +} + +PX_FORCE_INLINE Vec3V QuatRotateInv(const QuatV q, const Vec3V v) +{ + + // const PxVec3 qv(x,y,z); + // return (v*(w*w-0.5f) - (qv.cross(v))*w + qv*(qv.dot(v)))*2; + + const FloatV two = FLoad(2.f); + const FloatV nhalf = FLoad(-0.5f); + const Vec3V u = Vec3V_From_Vec4V(q); + const FloatV w = V4GetW(q); + const FloatV w2 = FScaleAdd(w, w, nhalf); + const Vec3V a = V3Scale(v, w2); + /*const Vec3V b = V3Scale(V3Cross(u, v), w); + const Vec3V c = V3Scale(u, V3Dot(u, v)); + return V3Scale(V3Add(V3Sub(a, b), c), two);*/ + const Vec3V temp = V3NegScaleSub(V3Cross(u, v), w, a); + return V3Scale(V3ScaleAdd(u, V3Dot(u, v), temp), two); +} + +PX_FORCE_INLINE QuatV QuatMul(const QuatV a, const QuatV b) +{ + const Vec3V imagA = Vec3V_From_Vec4V(a); + const Vec3V imagB = Vec3V_From_Vec4V(b); + const FloatV rA = V4GetW(a); + const FloatV rB = V4GetW(b); + + const FloatV real = FSub(FMul(rA, rB), V3Dot(imagA, imagB)); + const Vec3V v0 = V3Scale(imagA, rB); + const Vec3V v1 = V3Scale(imagB, rA); + const Vec3V v2 = V3Cross(imagA, imagB); + const Vec3V imag = V3Add(V3Add(v0, v1), v2); + + return V4SetW(Vec4V_From_Vec3V(imag), real); +} + +PX_FORCE_INLINE QuatV QuatAdd(const QuatV a, const QuatV b) +{ + return V4Add(a, b); +} + +PX_FORCE_INLINE QuatV QuatNeg(const QuatV q) +{ + return V4Neg(q); +} + +PX_FORCE_INLINE QuatV QuatSub(const QuatV a, const QuatV b) +{ + return V4Sub(a, b); +} + +PX_FORCE_INLINE QuatV QuatScale(const QuatV a, const FloatV b) +{ + return V4Scale(a, b); +} + +PX_FORCE_INLINE QuatV QuatMerge(const FloatV* const floatVArray) +{ + return V4Merge(floatVArray); +} + +PX_FORCE_INLINE QuatV QuatMerge(const FloatVArg x, const FloatVArg y, const FloatVArg z, const FloatVArg w) +{ + return V4Merge(x, y, z, w); +} + +PX_FORCE_INLINE QuatV QuatIdentity() +{ + return V4SetW(V4Zero(), FOne()); +} + +PX_FORCE_INLINE bool isFiniteQuatV(const QuatV q) +{ + return isFiniteVec4V(q); +} + +PX_FORCE_INLINE bool isValidQuatV(const QuatV q) +{ + const FloatV unitTolerance = FLoad(1e-4f); + const FloatV tmp = FAbs(FSub(QuatLength(q), FOne())); + const BoolV con = FIsGrtr(unitTolerance, tmp); + return isFiniteVec4V(q) & (BAllEqTTTT(con) == 1); +} + +PX_FORCE_INLINE bool isSaneQuatV(const QuatV q) +{ + const FloatV unitTolerance = FLoad(1e-2f); + const FloatV tmp = FAbs(FSub(QuatLength(q), FOne())); + const BoolV con = FIsGrtr(unitTolerance, tmp); + return isFiniteVec4V(q) & (BAllEqTTTT(con) == 1); +} + +PX_FORCE_INLINE Mat33V QuatGetMat33V(const QuatVArg q) +{ + // const FloatV two = FloatV_From_F32(2.f); + // const FloatV one = FOne(); + + // const FloatV x = V4GetX(q); + // const FloatV y = V4GetY(q); + // const FloatV z = V4GetZ(q); + // const Vec4V _q = V4Mul(q, two); + // + ////const FloatV w = V4GetW(q); + + // const Vec4V t0 = V4Mul(_q, x); // 2xx, 2xy, 2xz, 2xw + // const Vec4V t1 = V4Mul(_q, y); // 2xy, 2yy, 2yz, 2yw + // const Vec4V t2 = V4Mul(_q, z); // 2xz, 2yz, 2zz, 2zw + ////const Vec4V t3 = V4Mul(_q, w); // 2xw, 2yw, 2zw, 2ww + + // const FloatV xx2 = V4GetX(t0); + // const FloatV xy2 = V4GetY(t0); + // const FloatV xz2 = V4GetZ(t0); + // const FloatV xw2 = V4GetW(t0); + + // const FloatV yy2 = V4GetY(t1); + // const FloatV yz2 = V4GetZ(t1); + // const FloatV yw2 = V4GetW(t1); + + // const FloatV zz2 = V4GetZ(t2); + // const FloatV zw2 = V4GetW(t2); + + ////const FloatV ww2 = V4GetW(t3); + + // const FloatV c00 = FSub(one, FAdd(yy2, zz2)); + // const FloatV c01 = FSub(xy2, zw2); + // const FloatV c02 = FAdd(xz2, yw2); + + // const FloatV c10 = FAdd(xy2, zw2); + // const FloatV c11 = FSub(one, FAdd(xx2, zz2)); + // const FloatV c12 = FSub(yz2, xw2); + + // const FloatV c20 = FSub(xz2, yw2); + // const FloatV c21 = FAdd(yz2, xw2); + // const FloatV c22 = FSub(one, FAdd(xx2, yy2)); + + // const Vec3V c0 = V3Merge(c00, c10, c20); + // const Vec3V c1 = V3Merge(c01, c11, c21); + // const Vec3V c2 = V3Merge(c02, c12, c22); + + // return Mat33V(c0, c1, c2); + + const FloatV one = FOne(); + const FloatV x = V4GetX(q); + const FloatV y = V4GetY(q); + const FloatV z = V4GetZ(q); + const FloatV w = V4GetW(q); + + const FloatV x2 = FAdd(x, x); + const FloatV y2 = FAdd(y, y); + const FloatV z2 = FAdd(z, z); + + const FloatV xx = FMul(x2, x); + const FloatV yy = FMul(y2, y); + const FloatV zz = FMul(z2, z); + + const FloatV xy = FMul(x2, y); + const FloatV xz = FMul(x2, z); + const FloatV xw = FMul(x2, w); + + const FloatV yz = FMul(y2, z); + const FloatV yw = FMul(y2, w); + const FloatV zw = FMul(z2, w); + + const FloatV v = FSub(one, xx); + + const Vec3V column0 = V3Merge(FSub(FSub(one, yy), zz), FAdd(xy, zw), FSub(xz, yw)); + const Vec3V column1 = V3Merge(FSub(xy, zw), FSub(v, zz), FAdd(yz, xw)); + const Vec3V column2 = V3Merge(FAdd(xz, yw), FSub(yz, xw), FSub(v, yy)); + return Mat33V(column0, column1, column2); +} + +PX_FORCE_INLINE QuatV Mat33GetQuatV(const Mat33V& a) +{ + const FloatV one = FOne(); + const FloatV zero = FZero(); + const FloatV half = FLoad(0.5f); + const FloatV two = FLoad(2.f); + const FloatV scale = FLoad(0.25f); + const FloatV a00 = V3GetX(a.col0); + const FloatV a11 = V3GetY(a.col1); + const FloatV a22 = V3GetZ(a.col2); + + const FloatV a21 = V3GetZ(a.col1); // row=2, col=1; + const FloatV a12 = V3GetY(a.col2); // row=1, col=2; + const FloatV a02 = V3GetX(a.col2); // row=0, col=2; + const FloatV a20 = V3GetZ(a.col0); // row=2, col=0; + const FloatV a10 = V3GetY(a.col0); // row=1, col=0; + const FloatV a01 = V3GetX(a.col1); // row=0, col=1; + + const Vec3V vec0 = V3Merge(a21, a02, a10); + const Vec3V vec1 = V3Merge(a12, a20, a01); + const Vec3V v = V3Sub(vec0, vec1); + const Vec3V g = V3Add(vec0, vec1); + + const FloatV trace = FAdd(a00, FAdd(a11, a22)); + + if(FAllGrtrOrEq(trace, zero)) + { + const FloatV h = FSqrt(FAdd(trace, one)); + const FloatV w = FMul(half, h); + const FloatV s = FMul(half, FRecip(h)); + const Vec3V u = V3Scale(v, s); + return V4SetW(Vec4V_From_Vec3V(u), w); + } + else + { + const FloatV ntrace = FNeg(trace); + const Vec3V d = V3Merge(a00, a11, a22); + const BoolV con0 = BAllTrue3(V3IsGrtrOrEq(V3Splat(a00), d)); + const BoolV con1 = BAllTrue3(V3IsGrtrOrEq(V3Splat(a11), d)); + + const FloatV t0 = FAdd(one, FScaleAdd(a00, two, ntrace)); + const FloatV t1 = FAdd(one, FScaleAdd(a11, two, ntrace)); + const FloatV t2 = FAdd(one, FScaleAdd(a22, two, ntrace)); + + const FloatV t = FSel(con0, t0, FSel(con1, t1, t2)); + + const FloatV h = FMul(two, FSqrt(t)); + const FloatV s = FRecip(h); + const FloatV g0 = FMul(scale, h); + const Vec3V vs = V3Scale(v, s); + const Vec3V gs = V3Scale(g, s); + const FloatV gsx = V3GetX(gs); + const FloatV gsy = V3GetY(gs); + const FloatV gsz = V3GetZ(gs); + // vs.x= (a21 - a12)*s; vs.y=(a02 - a20)*s; vs.z=(a10 - a01)*s; + // gs.x= (a21 + a12)*s; gs.y=(a02 + a20)*s; gs.z=(a10 + a01)*s; + const Vec4V v0 = V4Merge(g0, gsz, gsy, V3GetX(vs)); + const Vec4V v1 = V4Merge(gsz, g0, gsx, V3GetY(vs)); + const Vec4V v2 = V4Merge(gsy, gsx, g0, V3GetZ(vs)); + return V4Sel(con0, v0, V4Sel(con1, v1, v2)); + } +} + +#endif diff --git a/PxShared/src/foundation/include/PsVecTransform.h b/PxShared/src/foundation/include/PsVecTransform.h new file mode 100644 index 0000000..5c16339 --- /dev/null +++ b/PxShared/src/foundation/include/PsVecTransform.h @@ -0,0 +1,283 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSVECTRANSFORM_H +#define PSFOUNDATION_PSVECTRANSFORM_H + +#include "PsVecMath.h" +#include "foundation/PxTransform.h" + +namespace physx +{ +namespace shdfnd +{ +namespace aos +{ + +class PsTransformV +{ + public: + QuatV q; + Vec3V p; + + PX_FORCE_INLINE PsTransformV(const PxTransform& orientation) + { + // const PxQuat oq = orientation.q; + // const PxF32 f[4] = {oq.x, oq.y, oq.z, oq.w}; + q = QuatVLoadXYZW(orientation.q.x, orientation.q.y, orientation.q.z, orientation.q.w); + // q = QuatV_From_F32Array(&oq.x); + p = V3LoadU(orientation.p); + } + + PX_FORCE_INLINE PsTransformV(const Vec3VArg p0 = V3Zero(), const QuatVArg q0 = QuatIdentity()) : q(q0), p(p0) + { + PX_ASSERT(isSaneQuatV(q0)); + } + + PX_FORCE_INLINE PsTransformV operator*(const PsTransformV& x) const + { + PX_ASSERT(x.isSane()); + return transform(x); + } + + PX_FORCE_INLINE PsTransformV getInverse() const + { + PX_ASSERT(isFinite()); + // return PxTransform(q.rotateInv(-p),q.getConjugate()); + return PsTransformV(QuatRotateInv(q, V3Neg(p)), QuatConjugate(q)); + } + + PX_FORCE_INLINE void normalize() + { + p = V3Zero(); + q = QuatIdentity(); + } + + PX_FORCE_INLINE void Invalidate() + { + p = V3Splat(FMax()); + q = QuatIdentity(); + } + + PX_FORCE_INLINE Vec3V transform(const Vec3VArg input) const + { + PX_ASSERT(isFinite()); + // return q.rotate(input) + p; + return QuatTransform(q, p, input); + } + + PX_FORCE_INLINE Vec3V transformInv(const Vec3VArg input) const + { + PX_ASSERT(isFinite()); + // return q.rotateInv(input-p); + return QuatRotateInv(q, V3Sub(input, p)); + } + + PX_FORCE_INLINE Vec3V rotate(const Vec3VArg input) const + { + PX_ASSERT(isFinite()); + // return q.rotate(input); + return QuatRotate(q, input); + } + + PX_FORCE_INLINE Vec3V rotateInv(const Vec3VArg input) const + { + PX_ASSERT(isFinite()); + // return q.rotateInv(input); + return QuatRotateInv(q, input); + } + + //! Transform transform to parent (returns compound transform: first src, then *this) + PX_FORCE_INLINE PsTransformV transform(const PsTransformV& src) const + { + PX_ASSERT(src.isSane()); + PX_ASSERT(isSane()); + // src = [srct, srcr] -> [r*srct + t, r*srcr] + // return PxTransform(q.rotate(src.p) + p, q*src.q); + return PsTransformV(V3Add(QuatRotate(q, src.p), p), QuatMul(q, src.q)); + } + + /** + \brief returns true if finite and q is a unit quaternion + */ + + PX_FORCE_INLINE bool isValid() const + { + // return p.isFinite() && q.isFinite() && q.isValid(); + return isFiniteVec3V(p) & isFiniteQuatV(q) & isValidQuatV(q); + } + + /** + \brief returns true if finite and quat magnitude is reasonably close to unit to allow for some accumulation of error + vs isValid + */ + + PX_FORCE_INLINE bool isSane() const + { + // return isFinite() && q.isSane(); + return isFinite() & isSaneQuatV(q); + } + + /** + \brief returns true if all elems are finite (not NAN or INF, etc.) + */ + PX_FORCE_INLINE bool isFinite() const + { + // return p.isFinite() && q.isFinite(); + return isFiniteVec3V(p) & isFiniteQuatV(q); + } + + //! Transform transform from parent (returns compound transform: first src, then this->inverse) + PX_FORCE_INLINE PsTransformV transformInv(const PsTransformV& src) const + { + PX_ASSERT(src.isSane()); + PX_ASSERT(isFinite()); + // src = [srct, srcr] -> [r^-1*(srct-t), r^-1*srcr] + /*PxQuat qinv = q.getConjugate(); + return PxTransform(qinv.rotate(src.p - p), qinv*src.q);*/ + const QuatV qinv = QuatConjugate(q); + const Vec3V v = QuatRotate(qinv, V3Sub(src.p, p)); + const QuatV rot = QuatMul(qinv, src.q); + return PsTransformV(v, rot); + } + + static PX_FORCE_INLINE PsTransformV createIdentity() + { + return PsTransformV(V3Zero()); + } +}; + +PX_FORCE_INLINE PsTransformV loadTransformA(const PxTransform& transform) +{ + const QuatV q0 = QuatVLoadA(&transform.q.x); + const Vec3V p0 = V3LoadA(&transform.p.x); + + return PsTransformV(p0, q0); +} + +PX_FORCE_INLINE PsTransformV loadTransformU(const PxTransform& transform) +{ + const QuatV q0 = QuatVLoadU(&transform.q.x); + const Vec3V p0 = V3LoadU(&transform.p.x); + + return PsTransformV(p0, q0); +} + +class PsMatTransformV +{ + public: + Mat33V rot; + Vec3V p; + + PX_FORCE_INLINE PsMatTransformV() + { + p = V3Zero(); + rot = M33Identity(); + } + PX_FORCE_INLINE PsMatTransformV(const Vec3VArg _p, const Mat33V& _rot) + { + p = _p; + rot = _rot; + } + + PX_FORCE_INLINE PsMatTransformV(const PsTransformV& other) + { + p = other.p; + QuatGetMat33V(other.q, rot.col0, rot.col1, rot.col2); + } + + PX_FORCE_INLINE PsMatTransformV(const Vec3VArg _p, const QuatV& quat) + { + p = _p; + QuatGetMat33V(quat, rot.col0, rot.col1, rot.col2); + } + + PX_FORCE_INLINE Vec3V getCol0() const + { + return rot.col0; + } + + PX_FORCE_INLINE Vec3V getCol1() const + { + return rot.col1; + } + + PX_FORCE_INLINE Vec3V getCol2() const + { + return rot.col2; + } + + PX_FORCE_INLINE void setCol0(const Vec3VArg col0) + { + rot.col0 = col0; + } + + PX_FORCE_INLINE void setCol1(const Vec3VArg col1) + { + rot.col1 = col1; + } + + PX_FORCE_INLINE void setCol2(const Vec3VArg col2) + { + rot.col2 = col2; + } + + PX_FORCE_INLINE Vec3V transform(const Vec3VArg input) const + { + return V3Add(p, M33MulV3(rot, input)); + } + + PX_FORCE_INLINE Vec3V transformInv(const Vec3VArg input) const + { + return M33TrnspsMulV3(rot, V3Sub(input, p)); // QuatRotateInv(q, V3Sub(input, p)); + } + + PX_FORCE_INLINE Vec3V rotate(const Vec3VArg input) const + { + return M33MulV3(rot, input); + } + + PX_FORCE_INLINE Vec3V rotateInv(const Vec3VArg input) const + { + return M33TrnspsMulV3(rot, input); + } + + PX_FORCE_INLINE PsMatTransformV transformInv(const PsMatTransformV& src) const + { + + const Vec3V v = M33TrnspsMulV3(rot, V3Sub(src.p, p)); + const Mat33V mat = M33MulM33(M33Trnsps(rot), src.rot); + return PsMatTransformV(v, mat); + } +}; +} +} +} + +#endif diff --git a/PxShared/src/foundation/include/nx/PsNXAbort.h b/PxShared/src/foundation/include/nx/PsNXAbort.h new file mode 100644 index 0000000..3b0413e --- /dev/null +++ b/PxShared/src/foundation/include/nx/PsNXAbort.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2008-2015, NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA CORPORATION and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA CORPORATION is strictly prohibited. + */ +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PX_FOUNDATION_PX_NX_ABORT_H +#define PX_FOUNDATION_PX_NX_ABORT_H + +#include "foundation/PxPreprocessor.h" +#include "nn/nn_Assert.h" +#include "nn/nn_Log.h" + +void abort(const char* message) +{ + NN_LOG(message); + NN_ASSERT(message == NULL); +} + +#endif // PX_FOUNDATION_PX_NX_ABORT_H diff --git a/PxShared/src/foundation/include/nx/PsNXIntrinsics.h b/PxShared/src/foundation/include/nx/PsNXIntrinsics.h new file mode 100644 index 0000000..789e39c --- /dev/null +++ b/PxShared/src/foundation/include/nx/PsNXIntrinsics.h @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2008-2015, NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA CORPORATION and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA CORPORATION is strictly prohibited. + */ +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef PX_FOUNDATION_PS_NX_INTRINSICS_H +#define PX_FOUNDATION_PS_NX_INTRINSICS_H + +#include "Ps.h" +#include "foundation/PxAssert.h" + +// this file is for internal intrinsics - that is, intrinsics that are used in +// cross platform code but do not appear in the API + +#if !PX_NX + #error "This file should only be included by NX builds!!" +#endif + +#include <math.h> + +namespace physx +{ +namespace shdfnd +{ + /* + * Implements a memory barrier + */ + PX_FORCE_INLINE void memoryBarrier() + { + __sync_synchronize(); + } + + /*! + Returns the index of the highest set bit. Not valid for zero arg. + */ + PX_FORCE_INLINE PxU32 highestSetBitUnsafe(PxU32 v) + { + // http://graphics.stanford.edu/~seander/bithacks.html + static const PxU32 MultiplyDeBruijnBitPosition[32] = + { + 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, + 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 + }; + + v |= v >> 1; // first round up to one less than a power of 2 + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + + return MultiplyDeBruijnBitPosition[(PxU32)(v * 0x07C4ACDDU) >> 27]; + } + + /*! + Returns the index of the highest set bit. Undefined for zero arg. + */ + PX_FORCE_INLINE PxU32 lowestSetBitUnsafe(PxU32 v) + { + // http://graphics.stanford.edu/~seander/bithacks.html + static const PxU32 MultiplyDeBruijnBitPosition[32] = + { + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 + }; + PxI32 w = v; + return MultiplyDeBruijnBitPosition[(PxU32)((w & -w) * 0x077CB531U) >> 27]; + } + + /*! + Returns the number of leading zeros in v. Returns 32 for v=0. + */ + PX_FORCE_INLINE PxU32 countLeadingZeros(PxU32 v) + { + PxI32 result = 0; + PxU32 testBit = (1<<31); + while ((v & testBit) == 0 && testBit != 0) + result ++, testBit >>= 1; + return result; + } + + /*! + Prefetch aligned cache size around \c ptr+offset. + */ + PX_FORCE_INLINE void prefetchLine(const void* ptr, PxU32 offset = 0) + { + __builtin_prefetch((char* PX_RESTRICT)(ptr) + offset, 0, 3); + } + + /*! + Prefetch \c count bytes starting at \c ptr. + */ + PX_FORCE_INLINE void prefetch(const void* ptr, PxU32 count = 1) + { + const char* cp = (char*)ptr; + PxU64 p = size_t(ptr); + PxU64 startLine = p>>6, endLine = (p+count-1)>>6; + PxU64 lines = endLine - startLine + 1; + do + { + prefetchLine(cp); + cp+=64; + } while(--lines); + } + + //! \brief platform-specific reciprocal + PX_CUDA_CALLABLE PX_FORCE_INLINE float recipFast(float a) { return 1.0f/a; } + + //! \brief platform-specific fast reciprocal square root + PX_CUDA_CALLABLE PX_FORCE_INLINE float recipSqrtFast(float a) { return 1.0f/::sqrtf(a); } + + //! \brief platform-specific floor + PX_CUDA_CALLABLE PX_FORCE_INLINE float floatFloor(float x) + { + return ::floorf(x); + } + + #define PX_PRINTF printf + #define PX_EXPECT_TRUE(x) x + #define PX_EXPECT_FALSE(x) x + +} // namespace shdfnd +} // namespace physx + +#define PX_EXPECT_TRUE(x) x +#define PX_EXPECT_FALSE(x) x + +#endif diff --git a/PxShared/src/foundation/include/unix/PsUnixAoS.h b/PxShared/src/foundation/include/unix/PsUnixAoS.h new file mode 100644 index 0000000..122879f --- /dev/null +++ b/PxShared/src/foundation/include/unix/PsUnixAoS.h @@ -0,0 +1,47 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSUNIXAOS_H +#define PSFOUNDATION_PSUNIXAOS_H + +// no includes here! this file should be included from PxcVecMath.h only!!! + +#if !COMPILE_VECTOR_INTRINSICS +#error Vector intrinsics should not be included when using scalar implementation. +#endif + +#if PX_INTEL_FAMILY +#include "sse2/PsUnixSse2AoS.h" +#elif PX_NEON +#include "neon/PsUnixNeonAoS.h" +#else +#error No SIMD implementation for this unix platform. +#endif + +#endif // PSFOUNDATION_PSUNIXAOS_H diff --git a/PxShared/src/foundation/include/unix/PsUnixFPU.h b/PxShared/src/foundation/include/unix/PsUnixFPU.h new file mode 100644 index 0000000..edd5522 --- /dev/null +++ b/PxShared/src/foundation/include/unix/PsUnixFPU.h @@ -0,0 +1,69 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSUNIXFPU_H +#define PSFOUNDATION_PSUNIXFPU_H + +#include "foundation/PxPreprocessor.h" + +#if PX_LINUX || PX_PS4 || PX_OSX + +#if PX_X86 || PX_X64 +#if PX_EMSCRIPTEN +#include <emmintrin.h> +#endif +#include <xmmintrin.h> +#elif PX_NEON +#include <arm_neon.h> +#endif + + +PX_INLINE physx::shdfnd::SIMDGuard::SIMDGuard() +{ +#if !PX_EMSCRIPTEN && (PX_X86 || PX_X64) + mControlWord = _mm_getcsr(); + // set default (disable exceptions: _MM_MASK_MASK) and FTZ (_MM_FLUSH_ZERO_ON), DAZ (_MM_DENORMALS_ZERO_ON: (1<<6)) + _mm_setcsr(_MM_MASK_MASK | _MM_FLUSH_ZERO_ON | (1 << 6)); +#endif +} + +PX_INLINE physx::shdfnd::SIMDGuard::~SIMDGuard() +{ +#if !PX_EMSCRIPTEN && (PX_X86 || PX_X64) + // restore control word and clear exception flags + // (setting exception state flags cause exceptions on the first following fp operation) + _mm_setcsr(mControlWord & ~_MM_EXCEPT_MASK); +#endif +} + +#else +#error No SIMD implementation for this unix platform. +#endif // PX_LINUX || PX_PS4 || PX_OSX + +#endif // #ifndef PSFOUNDATION_PSUNIXFPU_H diff --git a/PxShared/src/foundation/include/unix/PsUnixInlineAoS.h b/PxShared/src/foundation/include/unix/PsUnixInlineAoS.h new file mode 100644 index 0000000..e54f2c8 --- /dev/null +++ b/PxShared/src/foundation/include/unix/PsUnixInlineAoS.h @@ -0,0 +1,48 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSUNIXINLINEAOS_H +#define PSFOUNDATION_PSUNIXINLINEAOS_H + +#if !COMPILE_VECTOR_INTRINSICS +#error Vector intrinsics should not be included when using scalar implementation. +#endif + +// Remove this define when all platforms use simd solver. +#define PX_SUPPORT_SIMD + +#if PX_INTEL_FAMILY +#include "sse2/PsUnixSse2InlineAoS.h" +#elif PX_NEON +#include "neon/PsUnixNeonInlineAoS.h" +#else +#error No SIMD implementation for this unix platform. +#endif + +#endif // PSFOUNDATION_PSUNIXINLINEAOS_H diff --git a/PxShared/src/foundation/include/unix/PsUnixIntrinsics.h b/PxShared/src/foundation/include/unix/PsUnixIntrinsics.h new file mode 100644 index 0000000..4c6c892 --- /dev/null +++ b/PxShared/src/foundation/include/unix/PsUnixIntrinsics.h @@ -0,0 +1,153 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSUNIXINTRINSICS_H +#define PSFOUNDATION_PSUNIXINTRINSICS_H + +#include "Ps.h" +#include "foundation/PxAssert.h" +#include <math.h> + +#if PX_ANDROID +#include <signal.h> // for Ns::debugBreak() { raise(SIGTRAP); } +#endif + +#if 0 +#include <libkern/OSAtomic.h> +#endif + +// this file is for internal intrinsics - that is, intrinsics that are used in +// cross platform code but do not appear in the API + +#if !(PX_LINUX || PX_ANDROID || PX_PS4 || PX_APPLE_FAMILY) +#error "This file should only be included by unix builds!!" +#endif + +namespace physx +{ +namespace shdfnd +{ + +PX_FORCE_INLINE void memoryBarrier() +{ + __sync_synchronize(); +} + +/*! +Return the index of the highest set bit. Undefined for zero arg. +*/ +PX_INLINE uint32_t highestSetBitUnsafe(uint32_t v) +{ + + return 31 - __builtin_clz(v); +} + +/*! +Return the index of the highest set bit. Undefined for zero arg. +*/ +PX_INLINE int32_t lowestSetBitUnsafe(uint32_t v) +{ + return __builtin_ctz(v); +} + +/*! +Returns the index of the highest set bit. Returns 32 for v=0. +*/ +PX_INLINE uint32_t countLeadingZeros(uint32_t v) +{ + if(v) + return __builtin_clz(v); + else + return 32; +} + +/*! +Prefetch aligned 64B x86, 32b ARM around \c ptr+offset. +*/ +PX_FORCE_INLINE void prefetchLine(const void* ptr, uint32_t offset = 0) +{ + __builtin_prefetch(reinterpret_cast<const char* PX_RESTRICT>(ptr) + offset, 0, 3); +} + +/*! +Prefetch \c count bytes starting at \c ptr. +*/ +#if PX_ANDROID || PX_IOS +PX_FORCE_INLINE void prefetch(const void* ptr, uint32_t count = 1) +{ + const char* cp = static_cast<const char*>(ptr); + size_t p = reinterpret_cast<size_t>(ptr); + uint32_t startLine = uint32_t(p >> 5), endLine = uint32_t((p + count - 1) >> 5); + uint32_t lines = endLine - startLine + 1; + do + { + prefetchLine(cp); + cp += 32; + } while(--lines); +} +#else +PX_FORCE_INLINE void prefetch(const void* ptr, uint32_t count = 1) +{ + const char* cp = reinterpret_cast<const char*>(ptr); + uint64_t p = size_t(ptr); + uint64_t startLine = p >> 6, endLine = (p + count - 1) >> 6; + uint64_t lines = endLine - startLine + 1; + do + { + prefetchLine(cp); + cp += 64; + } while(--lines); +} +#endif + +//! \brief platform-specific reciprocal +PX_CUDA_CALLABLE PX_FORCE_INLINE float recipFast(float a) +{ + return 1.0f / a; +} + +//! \brief platform-specific fast reciprocal square root +PX_CUDA_CALLABLE PX_FORCE_INLINE float recipSqrtFast(float a) +{ + return 1.0f / ::sqrtf(a); +} + +//! \brief platform-specific floor +PX_CUDA_CALLABLE PX_FORCE_INLINE float floatFloor(float x) +{ + return ::floorf(x); +} + +#define NS_EXPECT_TRUE(x) x +#define NS_EXPECT_FALSE(x) x + +} // namespace shdfnd +} // namespace physx + +#endif // #ifndef PSFOUNDATION_PSUNIXINTRINSICS_H diff --git a/PxShared/src/foundation/include/unix/PsUnixTrigConstants.h b/PxShared/src/foundation/include/unix/PsUnixTrigConstants.h new file mode 100644 index 0000000..7f54733 --- /dev/null +++ b/PxShared/src/foundation/include/unix/PsUnixTrigConstants.h @@ -0,0 +1,82 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSUNIXTRIGCONSTANTS_H +#define PSFOUNDATION_PSUNIXTRIGCONSTANTS_H + +//#define PX_GLOBALCONST extern const __declspec(selectany) +#define PX_GLOBALCONST extern const __attribute__((weak)) + +PX_ALIGN_PREFIX(16) +struct PX_VECTORF32 +{ + float f[4]; +} PX_ALIGN_SUFFIX(16); + +PX_GLOBALCONST PX_VECTORF32 g_PXSinCoefficients0 = { { 1.0f, -0.166666667f, 8.333333333e-3f, -1.984126984e-4f } }; +PX_GLOBALCONST PX_VECTORF32 +g_PXSinCoefficients1 = { { 2.755731922e-6f, -2.505210839e-8f, 1.605904384e-10f, -7.647163732e-13f } }; +PX_GLOBALCONST PX_VECTORF32 +g_PXSinCoefficients2 = { { 2.811457254e-15f, -8.220635247e-18f, 1.957294106e-20f, -3.868170171e-23f } }; +PX_GLOBALCONST PX_VECTORF32 g_PXCosCoefficients0 = { { 1.0f, -0.5f, 4.166666667e-2f, -1.388888889e-3f } }; +PX_GLOBALCONST PX_VECTORF32 +g_PXCosCoefficients1 = { { 2.480158730e-5f, -2.755731922e-7f, 2.087675699e-9f, -1.147074560e-11f } }; +PX_GLOBALCONST PX_VECTORF32 +g_PXCosCoefficients2 = { { 4.779477332e-14f, -1.561920697e-16f, 4.110317623e-19f, -8.896791392e-22f } }; +PX_GLOBALCONST PX_VECTORF32 g_PXTanCoefficients0 = { { 1.0f, 0.333333333f, 0.133333333f, 5.396825397e-2f } }; +PX_GLOBALCONST PX_VECTORF32 +g_PXTanCoefficients1 = { { 2.186948854e-2f, 8.863235530e-3f, 3.592128167e-3f, 1.455834485e-3f } }; +PX_GLOBALCONST PX_VECTORF32 +g_PXTanCoefficients2 = { { 5.900274264e-4f, 2.391290764e-4f, 9.691537707e-5f, 3.927832950e-5f } }; +PX_GLOBALCONST PX_VECTORF32 +g_PXASinCoefficients0 = { { -0.05806367563904f, -0.41861972469416f, 0.22480114791621f, 2.17337241360606f } }; +PX_GLOBALCONST PX_VECTORF32 +g_PXASinCoefficients1 = { { 0.61657275907170f, 4.29696498283455f, -1.18942822255452f, -6.53784832094831f } }; +PX_GLOBALCONST PX_VECTORF32 +g_PXASinCoefficients2 = { { -1.36926553863413f, -4.48179294237210f, 1.41810672941833f, 5.48179257935713f } }; +PX_GLOBALCONST PX_VECTORF32 g_PXATanCoefficients0 = { { 1.0f, 0.333333334f, 0.2f, 0.142857143f } }; +PX_GLOBALCONST PX_VECTORF32 +g_PXATanCoefficients1 = { { 1.111111111e-1f, 9.090909091e-2f, 7.692307692e-2f, 6.666666667e-2f } }; +PX_GLOBALCONST PX_VECTORF32 +g_PXATanCoefficients2 = { { 5.882352941e-2f, 5.263157895e-2f, 4.761904762e-2f, 4.347826087e-2f } }; +PX_GLOBALCONST PX_VECTORF32 +g_PXSinEstCoefficients = { { 1.0f, -1.66521856991541e-1f, 8.199913018755e-3f, -1.61475937228e-4f } }; +PX_GLOBALCONST PX_VECTORF32 +g_PXCosEstCoefficients = { { 1.0f, -4.95348008918096e-1f, 3.878259962881e-2f, -9.24587976263e-4f } }; +PX_GLOBALCONST PX_VECTORF32 g_PXTanEstCoefficients = { { 2.484f, -1.954923183e-1f, 2.467401101f, PxInvPi } }; +PX_GLOBALCONST PX_VECTORF32 +g_PXATanEstCoefficients = { { 7.689891418951e-1f, 1.104742493348f, 8.661844266006e-1f, PxPiDivTwo } }; +PX_GLOBALCONST PX_VECTORF32 +g_PXASinEstCoefficients = { { -1.36178272886711f, 2.37949493464538f, -8.08228565650486e-1f, 2.78440142746736e-1f } }; +PX_GLOBALCONST PX_VECTORF32 g_PXASinEstConstants = { { 1.00000011921f, PxPiDivTwo, 0.0f, 0.0f } }; +PX_GLOBALCONST PX_VECTORF32 g_PXPiConstants0 = { { PxPi, PxTwoPi, PxInvPi, PxInvTwoPi } }; +PX_GLOBALCONST PX_VECTORF32 g_PXReciprocalTwoPi = { { PxInvTwoPi, PxInvTwoPi, PxInvTwoPi, PxInvTwoPi } }; +PX_GLOBALCONST PX_VECTORF32 g_PXTwoPi = { { PxTwoPi, PxTwoPi, PxTwoPi, PxTwoPi } }; + +#endif diff --git a/PxShared/src/foundation/include/unix/neon/PsUnixNeonAoS.h b/PxShared/src/foundation/include/unix/neon/PsUnixNeonAoS.h new file mode 100644 index 0000000..60a5be8 --- /dev/null +++ b/PxShared/src/foundation/include/unix/neon/PsUnixNeonAoS.h @@ -0,0 +1,129 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSUNIXNEONAOS_H +#define PSFOUNDATION_PSUNIXNEONAOS_H + +// no includes here! this file should be included from PxcVecMath.h only!!! + +#if !COMPILE_VECTOR_INTRINSICS +#error Vector intrinsics should not be included when using scalar implementation. +#endif + +// only ARM NEON compatible platforms should reach this +#include <arm_neon.h> + +typedef float32x2_t FloatV; +typedef float32x4_t Vec3V; +typedef float32x4_t Vec4V; +typedef uint32x4_t BoolV; +typedef float32x4_t QuatV; + +typedef uint32x4_t VecU32V; +typedef int32x4_t VecI32V; +typedef uint16x8_t VecU16V; +typedef int16x8_t VecI16V; +typedef uint8x16_t VecU8V; + +#define FloatVArg FloatV & +#define Vec3VArg Vec3V & +#define Vec4VArg Vec4V & +#define BoolVArg BoolV & +#define VecU32VArg VecU32V & +#define VecI32VArg VecI32V & +#define VecU16VArg VecU16V & +#define VecI16VArg VecI16V & +#define VecU8VArg VecU8V & +#define QuatVArg QuatV & + +// KS - TODO - make an actual VecCrossV type for NEON +#define VecCrossV Vec3V + +typedef VecI32V VecShiftV; +#define VecShiftVArg VecShiftV & + +PX_ALIGN_PREFIX(16) +struct Mat33V +{ + Mat33V() + { + } + Mat33V(const Vec3V& c0, const Vec3V& c1, const Vec3V& c2) : col0(c0), col1(c1), col2(c2) + { + } + Vec3V PX_ALIGN(16, col0); + Vec3V PX_ALIGN(16, col1); + Vec3V PX_ALIGN(16, col2); +} PX_ALIGN_SUFFIX(16); + +PX_ALIGN_PREFIX(16) +struct Mat34V +{ + Mat34V() + { + } + Mat34V(const Vec3V& c0, const Vec3V& c1, const Vec3V& c2, const Vec3V& c3) : col0(c0), col1(c1), col2(c2), col3(c3) + { + } + Vec3V PX_ALIGN(16, col0); + Vec3V PX_ALIGN(16, col1); + Vec3V PX_ALIGN(16, col2); + Vec3V PX_ALIGN(16, col3); +} PX_ALIGN_SUFFIX(16); + +PX_ALIGN_PREFIX(16) +struct Mat43V +{ + Mat43V() + { + } + Mat43V(const Vec4V& c0, const Vec4V& c1, const Vec4V& c2) : col0(c0), col1(c1), col2(c2) + { + } + Vec4V PX_ALIGN(16, col0); + Vec4V PX_ALIGN(16, col1); + Vec4V PX_ALIGN(16, col2); +} PX_ALIGN_SUFFIX(16); + +PX_ALIGN_PREFIX(16) +struct Mat44V +{ + Mat44V() + { + } + Mat44V(const Vec4V& c0, const Vec4V& c1, const Vec4V& c2, const Vec4V& c3) : col0(c0), col1(c1), col2(c2), col3(c3) + { + } + Vec4V PX_ALIGN(16, col0); + Vec4V PX_ALIGN(16, col1); + Vec4V PX_ALIGN(16, col2); + Vec4V PX_ALIGN(16, col3); +} PX_ALIGN_SUFFIX(16); + +#endif // PSFOUNDATION_PSUNIXNEONAOS_H diff --git a/PxShared/src/foundation/include/unix/neon/PsUnixNeonInlineAoS.h b/PxShared/src/foundation/include/unix/neon/PsUnixNeonInlineAoS.h new file mode 100644 index 0000000..2a0578d --- /dev/null +++ b/PxShared/src/foundation/include/unix/neon/PsUnixNeonInlineAoS.h @@ -0,0 +1,3582 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSUNIXNEONINLINEAOS_H +#define PSFOUNDATION_PSUNIXNEONINLINEAOS_H + +#if !COMPILE_VECTOR_INTRINSICS +#error Vector intrinsics should not be included when using scalar implementation. +#endif + +// improved estimates +#define VRECIPEQ recipq_newton<1> +#define VRECIPE recip_newton<1> +#define VRECIPSQRTEQ rsqrtq_newton<1> +#define VRECIPSQRTE rsqrt_newton<1> + +// "exact" +#define VRECIPQ recipq_newton<4> +#if PX_NX +// StabilizationTests.AveragePoint needs more precision to succeed. +#define VRECIP recip_newton<5> +#else +#define VRECIP recip_newton<4> +#endif +#define VRECIPSQRTQ rsqrtq_newton<4> +#define VRECIPSQRT rsqrt_newton<4> + +#define VECMATH_AOS_EPSILON (1e-3f) + +// Remove this define when all platforms use simd solver. +#define PX_SUPPORT_SIMD + +////////////////////////////////////////////////////////////////////// +//Test that Vec3V and FloatV are legal +////////////////////////////////// + +#define FLOAT_COMPONENTS_EQUAL_THRESHOLD 0.01f +PX_FORCE_INLINE bool isValidFloatV(const FloatV a) +{ + /* + PX_ALIGN(16, PxF32) data[4]; + vst1_f32(reinterpret_cast<float32_t*>(data), a); + return + PxU32* intData = reinterpret_cast<PxU32*>(data); + return intData[0] == intData[1]; + */ + PX_ALIGN(16, PxF32) data[4]; + vst1_f32(reinterpret_cast<float32_t*>(data), a); + const float32_t x = data[0]; + const float32_t y = data[1]; + + if (PxAbs(x - y) < FLOAT_COMPONENTS_EQUAL_THRESHOLD) + { + return true; + } + + if (PxAbs((x - y) / x) < FLOAT_COMPONENTS_EQUAL_THRESHOLD) + { + return true; + } + + return false; +} + +PX_FORCE_INLINE bool isValidVec3V(const Vec3V a) +{ + const float32_t w = vgetq_lane_f32(a, 3); + return (0.0f == w); + //const PxU32* intData = reinterpret_cast<const PxU32*>(&w); + //return *intData == 0; +} + +PX_FORCE_INLINE bool isAligned16(const void* a) +{ + return(0 == (size_t(a) & 0x0f)); +} + +#if PX_DEBUG +#define ASSERT_ISVALIDVEC3V(a) PX_ASSERT(isValidVec3V(a)) +#define ASSERT_ISVALIDFLOATV(a) PX_ASSERT(isValidFloatV(a)) +#define ASSERT_ISALIGNED16(a) PX_ASSERT(isAligned16(static_cast<const void*>(a))) +#else +#define ASSERT_ISVALIDVEC3V(a) +#define ASSERT_ISVALIDFLOATV(a) +#define ASSERT_ISALIGNED16(a) +#endif + +namespace internalUnitNeonSimd +{ +PX_FORCE_INLINE PxU32 BAllTrue4_R(const BoolV a) +{ + const uint16x4_t dHigh = vget_high_u16(vreinterpretq_u16_u32(a)); + const uint16x4_t dLow = vmovn_u32(a); + const uint16x8_t combined = vcombine_u16(dLow, dHigh); + const uint32x2_t finalReduce = vreinterpret_u32_u8(vmovn_u16(combined)); + return PxU32(vget_lane_u32(finalReduce, 0) == 0xffffFFFF); +} + +PX_FORCE_INLINE PxU32 BAllTrue3_R(const BoolV a) +{ + const uint16x4_t dHigh = vget_high_u16(vreinterpretq_u16_u32(a)); + const uint16x4_t dLow = vmovn_u32(a); + const uint16x8_t combined = vcombine_u16(dLow, dHigh); + const uint32x2_t finalReduce = vreinterpret_u32_u8(vmovn_u16(combined)); + return PxU32((vget_lane_u32(finalReduce, 0) & 0xffFFff) == 0xffFFff); +} + +PX_FORCE_INLINE PxU32 BAnyTrue4_R(const BoolV a) +{ + const uint16x4_t dHigh = vget_high_u16(vreinterpretq_u16_u32(a)); + const uint16x4_t dLow = vmovn_u32(a); + const uint16x8_t combined = vcombine_u16(dLow, dHigh); + const uint32x2_t finalReduce = vreinterpret_u32_u8(vmovn_u16(combined)); + return PxU32(vget_lane_u32(finalReduce, 0) != 0x0); +} + +PX_FORCE_INLINE PxU32 BAnyTrue3_R(const BoolV a) +{ + const uint16x4_t dHigh = vget_high_u16(vreinterpretq_u16_u32(a)); + const uint16x4_t dLow = vmovn_u32(a); + const uint16x8_t combined = vcombine_u16(dLow, dHigh); + const uint32x2_t finalReduce = vreinterpret_u32_u8(vmovn_u16(combined)); + return PxU32((vget_lane_u32(finalReduce, 0) & 0xffFFff) != 0); +} +} + +namespace _VecMathTests +{ +// PT: this function returns an invalid Vec3V (W!=0.0f) just for unit-testing 'isValidVec3V' +PX_FORCE_INLINE Vec3V getInvalidVec3V() +{ + PX_ALIGN(16, PxF32) data[4] = { 1.0f, 1.0f, 1.0f, 1.0f }; + return V4LoadA(data); +} + +PX_FORCE_INLINE bool allElementsEqualFloatV(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return vget_lane_u32(vceq_f32(a, b), 0) != 0; +} + +PX_FORCE_INLINE bool allElementsEqualVec3V(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return V3AllEq(a, b) != 0; +} + +PX_FORCE_INLINE bool allElementsEqualVec4V(const Vec4V a, const Vec4V b) +{ + return V4AllEq(a, b) != 0; +} + +PX_FORCE_INLINE bool allElementsEqualBoolV(const BoolV a, const BoolV b) +{ + return internalUnitNeonSimd::BAllTrue4_R(vceqq_u32(a, b)) != 0; +} + +PX_FORCE_INLINE PxU32 V4U32AllEq(const VecU32V a, const VecU32V b) +{ + return internalUnitNeonSimd::BAllTrue4_R(V4IsEqU32(a, b)); +} + +PX_FORCE_INLINE bool allElementsEqualVecU32V(const VecU32V a, const VecU32V b) +{ + return V4U32AllEq(a, b) != 0; +} + +PX_FORCE_INLINE BoolV V4IsEqI32(const VecI32V a, const VecI32V b) +{ + return vceqq_s32(a, b); +} + +PX_FORCE_INLINE PxU32 V4I32AllEq(const VecI32V a, const VecI32V b) +{ + return internalUnitNeonSimd::BAllTrue4_R(V4IsEqI32(a, b)); +} + +PX_FORCE_INLINE bool allElementsEqualVecI32V(const VecI32V a, const VecI32V b) +{ + return V4I32AllEq(a, b) != 0; +} + +PX_FORCE_INLINE bool allElementsNearEqualFloatV(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + + const float32x2_t c = vsub_f32(a, b); + const float32x2_t error = vdup_n_f32(VECMATH_AOS_EPSILON); +// absolute compare abs(error) > abs(c) + const uint32x2_t greater = vcagt_f32(error, c); + const uint32x2_t min = vpmin_u32(greater, greater); + return vget_lane_u32(min, 0) != 0x0; +} + +PX_FORCE_INLINE bool allElementsNearEqualVec3V(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + const float32x4_t c = vsubq_f32(a, b); + const float32x4_t error = vdupq_n_f32(VECMATH_AOS_EPSILON); +// absolute compare abs(error) > abs(c) + const uint32x4_t greater = vcagtq_f32(error, c); + return internalUnitNeonSimd::BAllTrue3_R(greater) != 0; +} + +PX_FORCE_INLINE bool allElementsNearEqualVec4V(const Vec4V a, const Vec4V b) +{ + const float32x4_t c = vsubq_f32(a, b); + const float32x4_t error = vdupq_n_f32(VECMATH_AOS_EPSILON); +// absolute compare abs(error) > abs(c) + const uint32x4_t greater = vcagtq_f32(error, c); + return internalUnitNeonSimd::BAllTrue4_R(greater) != 0x0; +} +} + +#if 0 // debugging printfs +#include <stdio.h> +PX_FORCE_INLINE void printVec(const float32x4_t& v, const char* name) +{ + PX_ALIGN(16, float32_t) data[4]; + vst1q_f32(data, v); + printf("%s: (%f, %f, %f, %f)\n", name, data[0], data[1], data[2], data[3]); +} + +PX_FORCE_INLINE void printVec(const float32x2_t& v, const char* name) +{ + PX_ALIGN(16, float32_t) data[2]; + vst1_f32(data, v); + printf("%s: (%f, %f)\n", name, data[0], data[1]); +} + +PX_FORCE_INLINE void printVec(const uint32x4_t& v, const char* name) +{ + PX_ALIGN(16, uint32_t) data[4]; + vst1q_u32(data, v); + printf("%s: (0x%x, 0x%x, 0x%x, 0x%x)\n", name, data[0], data[1], data[2], data[3]); +} + +PX_FORCE_INLINE void printVec(const uint16x8_t& v, const char* name) +{ + PX_ALIGN(16, uint16_t) data[8]; + vst1q_u16(data, v); + printf("%s: (0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x)\n", name, data[0], data[1], data[2], data[3], + data[4], data[5], data[6], data[7]); +} + +PX_FORCE_INLINE void printVec(const int32x4_t& v, const char* name) +{ + PX_ALIGN(16, int32_t) data[4]; + vst1q_s32(data, v); + printf("%s: (0x%x, 0x%x, 0x%x, 0x%x)\n", name, data[0], data[1], data[2], data[3]); +} + +PX_FORCE_INLINE void printVec(const int16x8_t& v, const char* name) +{ + PX_ALIGN(16, int16_t) data[8]; + vst1q_s16(data, v); + printf("%s: (0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x)\n", name, data[0], data[1], data[2], data[3], + data[4], data[5], data[6], data[7]); +} + +PX_FORCE_INLINE void printVec(const uint16x4_t& v, const char* name) +{ + PX_ALIGN(16, uint16_t) data[4]; + vst1_u16(data, v); + printf("%s: (0x%x, 0x%x, 0x%x, 0x%x)\n", name, data[0], data[1], data[2], data[3]); +} + +PX_FORCE_INLINE void printVec(const uint32x2_t& v, const char* name) +{ + PX_ALIGN(16, uint32_t) data[2]; + vst1_u32(data, v); + printf("%s: (0x%x, 0x%x)\n", name, data[0], data[1]); +} + +PX_FORCE_INLINE void printVar(const PxU32 v, const char* name) +{ + printf("%s: 0x%x\n", name, v); +} + +PX_FORCE_INLINE void printVar(const PxF32 v, const char* name) +{ + printf("%s: %f\n", name, v); +} + +#define PRINT_VAR(X) printVar((X), #X) +#define PRINT_VEC(X) printVec((X), #X) +#define PRINT_VEC_TITLE(TITLE, X) printVec((X), TITLE #X) +#endif // debugging printf + +///////////////////////////////////////////////////////////////////// +////FUNCTIONS USED ONLY FOR ASSERTS IN VECTORISED IMPLEMENTATIONS +///////////////////////////////////////////////////////////////////// + +PX_FORCE_INLINE bool isFiniteFloatV(const FloatV a) +{ + PX_ALIGN(16, PxF32) data[4]; + vst1_f32(reinterpret_cast<float32_t*>(data), a); + return PxIsFinite(data[0]) && PxIsFinite(data[1]); +} + +PX_FORCE_INLINE bool isFiniteVec3V(const Vec3V a) +{ + PX_ALIGN(16, PxF32) data[4]; + vst1q_f32(reinterpret_cast<float32_t*>(data), a); + return PxIsFinite(data[0]) && PxIsFinite(data[1]) && PxIsFinite(data[2]); +} + +PX_FORCE_INLINE bool isFiniteVec4V(const Vec4V a) +{ + PX_ALIGN(16, PxF32) data[4]; + vst1q_f32(reinterpret_cast<float32_t*>(data), a); + return PxIsFinite(data[0]) && PxIsFinite(data[1]) && PxIsFinite(data[2]) && PxIsFinite(data[3]); +} + +PX_FORCE_INLINE bool hasZeroElementinFloatV(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + return vget_lane_u32(vreinterpret_u32_f32(a), 0) == 0; +} + +PX_FORCE_INLINE bool hasZeroElementInVec3V(const Vec3V a) +{ + const uint32x2_t dLow = vget_low_u32(vreinterpretq_u32_f32(a)); + const uint32x2_t dMin = vpmin_u32(dLow, dLow); + + return vget_lane_u32(dMin, 0) == 0 || vgetq_lane_u32(vreinterpretq_u32_f32(a), 2) == 0; +} + +PX_FORCE_INLINE bool hasZeroElementInVec4V(const Vec4V a) +{ + const uint32x2_t dHigh = vget_high_u32(vreinterpretq_u32_f32(a)); + const uint32x2_t dLow = vget_low_u32(vreinterpretq_u32_f32(a)); + + const uint32x2_t dMin = vmin_u32(dHigh, dLow); + const uint32x2_t pairMin = vpmin_u32(dMin, dMin); + return vget_lane_u32(pairMin, 0) == 0; +} + +///////////////////////////////////////////////////////////////////// +////VECTORISED FUNCTION IMPLEMENTATIONS +///////////////////////////////////////////////////////////////////// + +PX_FORCE_INLINE FloatV FLoad(const PxF32 f) +{ + return vdup_n_f32(reinterpret_cast<const float32_t&>(f)); +} + +PX_FORCE_INLINE FloatV FLoadA(const PxF32* const f) +{ + ASSERT_ISALIGNED16(f); + return vld1_f32(reinterpret_cast<const float32_t*>(f)); +} + +PX_FORCE_INLINE Vec3V V3Load(const PxF32 f) +{ + PX_ALIGN(16, PxF32) data[4] = { f, f, f, 0.0f }; + return V4LoadA(data); +} + +PX_FORCE_INLINE Vec4V V4Load(const PxF32 f) +{ + return vdupq_n_f32(reinterpret_cast<const float32_t&>(f)); +} + +PX_FORCE_INLINE BoolV BLoad(const bool f) +{ + const PxU32 i = static_cast<PxU32>(-(static_cast<PxI32>(f))); + return vdupq_n_u32(i); +} + +PX_FORCE_INLINE Vec3V V3LoadA(const PxVec3& f) +{ + ASSERT_ISALIGNED16(&f); + PX_ALIGN(16, PxF32) data[4] = { f.x, f.y, f.z, 0.0f }; + return V4LoadA(data); +} + +PX_FORCE_INLINE Vec3V V3LoadU(const PxVec3& f) +{ + PX_ALIGN(16, PxF32) data[4] = { f.x, f.y, f.z, 0.0f }; + return V4LoadA(data); +} + +PX_FORCE_INLINE Vec3V V3LoadUnsafeA(const PxVec3& f) +{ + ASSERT_ISALIGNED16(&f); + PX_ALIGN(16, PxF32) data[4] = { f.x, f.y, f.z, 0.0f }; + return V4LoadA(data); +} + +PX_FORCE_INLINE Vec3V V3LoadA(const PxF32* f) +{ + ASSERT_ISALIGNED16(f); + PX_ALIGN(16, PxF32) data[4] = { f[0], f[1], f[2], 0.0f }; + return V4LoadA(data); +} + +PX_FORCE_INLINE Vec3V V3LoadU(const PxF32* f) +{ + PX_ALIGN(16, PxF32) data[4] = { f[0], f[1], f[2], 0.0f }; + return V4LoadA(data); +} + +PX_FORCE_INLINE Vec3V Vec3V_From_Vec4V(Vec4V v) +{ + return vsetq_lane_f32(0.0f, v, 3); +} + +PX_FORCE_INLINE Vec3V Vec3V_From_Vec4V_WUndefined(Vec4V v) +{ + return v; +} + +PX_FORCE_INLINE Vec4V Vec4V_From_Vec3V(Vec3V f) +{ + return f; // ok if it is implemented as the same type. +} + +PX_FORCE_INLINE Vec4V Vec4V_From_FloatV(FloatV f) +{ + return vcombine_f32(f, f); +} + +PX_FORCE_INLINE Vec3V Vec3V_From_FloatV(FloatV f) +{ + return Vec3V_From_Vec4V(Vec4V_From_FloatV(f)); +} + +PX_FORCE_INLINE Vec3V Vec3V_From_FloatV_WUndefined(FloatV f) +{ + return Vec3V_From_Vec4V_WUndefined(Vec4V_From_FloatV(f)); +} + +PX_FORCE_INLINE Vec4V Vec4V_From_PxVec3_WUndefined(const PxVec3& f) +{ + PX_ALIGN(16, PxF32) data[4] = { f.x, f.y, f.z, 0.0f }; + return V4LoadA(data); +} + +PX_FORCE_INLINE Mat33V Mat33V_From_PxMat33(const PxMat33& m) +{ + return Mat33V(V3LoadU(m.column0), V3LoadU(m.column1), V3LoadU(m.column2)); +} + +PX_FORCE_INLINE void PxMat33_From_Mat33V(const Mat33V& m, PxMat33& out) +{ + V3StoreU(m.col0, out.column0); + V3StoreU(m.col1, out.column1); + V3StoreU(m.col2, out.column2); +} + +PX_FORCE_INLINE Vec4V V4LoadA(const PxF32* const f) +{ + ASSERT_ISALIGNED16(f); + return vld1q_f32(reinterpret_cast<const float32_t*>(f)); +} + +PX_FORCE_INLINE void V4StoreA(Vec4V a, PxF32* f) +{ + ASSERT_ISALIGNED16(f); + vst1q_f32(reinterpret_cast<float32_t*>(f), a); +} + +PX_FORCE_INLINE void V4StoreU(const Vec4V a, PxF32* f) +{ + PX_ALIGN(16, PxF32) f2[4]; + vst1q_f32(reinterpret_cast<float32_t*>(f2), a); + f[0] = f2[0]; + f[1] = f2[1]; + f[2] = f2[2]; + f[3] = f2[3]; +} + +PX_FORCE_INLINE void BStoreA(const BoolV a, PxU32* u) +{ + ASSERT_ISALIGNED16(u); + vst1q_u32(reinterpret_cast<uint32_t*>(u), a); +} + +PX_FORCE_INLINE void U4StoreA(const VecU32V uv, PxU32* u) +{ + ASSERT_ISALIGNED16(u); + vst1q_u32(reinterpret_cast<uint32_t*>(u), uv); +} + +PX_FORCE_INLINE void I4StoreA(const VecI32V iv, PxI32* i) +{ + ASSERT_ISALIGNED16(i); + vst1q_s32(reinterpret_cast<int32_t*>(i), iv); +} + +PX_FORCE_INLINE Vec4V V4LoadU(const PxF32* const f) +{ + return vld1q_f32(reinterpret_cast<const float32_t*>(f)); +} + +PX_FORCE_INLINE BoolV BLoad(const bool* const f) +{ + const PX_ALIGN(16, PxU32) b[4] = { static_cast<PxU32>(-static_cast<PxI32>(f[0])), + static_cast<PxU32>(-static_cast<PxI32>(f[1])), + static_cast<PxU32>(-static_cast<PxI32>(f[2])), + static_cast<PxU32>(-static_cast<PxI32>(f[3])) }; + return vld1q_u32(b); +} + +PX_FORCE_INLINE void FStore(const FloatV a, PxF32* PX_RESTRICT f) +{ + ASSERT_ISVALIDFLOATV(a); + // vst1q_lane_f32(f, a, 0); // causes vst1 alignment bug + *f = vget_lane_f32(a, 0); +} + +PX_FORCE_INLINE void Store_From_BoolV(const BoolV a, PxU32* PX_RESTRICT f) +{ + *f = vget_lane_u32(vget_low_u32(a), 0); +} + +PX_FORCE_INLINE void V3StoreA(const Vec3V a, PxVec3& f) +{ + ASSERT_ISALIGNED16(&f); + PX_ALIGN(16, PxF32) f2[4]; + vst1q_f32(reinterpret_cast<float32_t*>(f2), a); + f = PxVec3(f2[0], f2[1], f2[2]); +} + +PX_FORCE_INLINE void V3StoreU(const Vec3V a, PxVec3& f) +{ + PX_ALIGN(16, PxF32) f2[4]; + vst1q_f32(reinterpret_cast<float32_t*>(f2), a); + f = PxVec3(f2[0], f2[1], f2[2]); +} + +////////////////////////////////// +// FLOATV +////////////////////////////////// + +PX_FORCE_INLINE FloatV FZero() +{ + return FLoad(0.0f); +} + +PX_FORCE_INLINE FloatV FOne() +{ + return FLoad(1.0f); +} + +PX_FORCE_INLINE FloatV FHalf() +{ + return FLoad(0.5f); +} + +PX_FORCE_INLINE FloatV FEps() +{ + return FLoad(PX_EPS_REAL); +} + +PX_FORCE_INLINE FloatV FEps6() +{ + return FLoad(1e-6f); +} + +PX_FORCE_INLINE FloatV FMax() +{ + return FLoad(PX_MAX_REAL); +} + +PX_FORCE_INLINE FloatV FNegMax() +{ + return FLoad(-PX_MAX_REAL); +} + +PX_FORCE_INLINE FloatV IZero() +{ + return vreinterpret_f32_u32(vdup_n_u32(0)); +} + +PX_FORCE_INLINE FloatV IOne() +{ + return vreinterpret_f32_u32(vdup_n_u32(1)); +} + +PX_FORCE_INLINE FloatV ITwo() +{ + return vreinterpret_f32_u32(vdup_n_u32(2)); +} + +PX_FORCE_INLINE FloatV IThree() +{ + return vreinterpret_f32_u32(vdup_n_u32(3)); +} + +PX_FORCE_INLINE FloatV IFour() +{ + return vreinterpret_f32_u32(vdup_n_u32(4)); +} + +PX_FORCE_INLINE FloatV FNeg(const FloatV f) +{ + ASSERT_ISVALIDFLOATV(f); + return vneg_f32(f); +} + +PX_FORCE_INLINE FloatV FAdd(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return vadd_f32(a, b); +} + +PX_FORCE_INLINE FloatV FSub(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return vsub_f32(a, b); +} + +PX_FORCE_INLINE FloatV FMul(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return vmul_f32(a, b); +} + +template <int n> +PX_FORCE_INLINE float32x2_t recip_newton(const float32x2_t& in) +{ + float32x2_t recip = vrecpe_f32(in); + for(int i = 0; i < n; ++i) + recip = vmul_f32(recip, vrecps_f32(in, recip)); + return recip; +} + +template <int n> +PX_FORCE_INLINE float32x4_t recipq_newton(const float32x4_t& in) +{ + float32x4_t recip = vrecpeq_f32(in); + for(int i = 0; i < n; ++i) + recip = vmulq_f32(recip, vrecpsq_f32(recip, in)); + return recip; +} + +template <int n> +PX_FORCE_INLINE float32x2_t rsqrt_newton(const float32x2_t& in) +{ + float32x2_t rsqrt = vrsqrte_f32(in); + for(int i = 0; i < n; ++i) + rsqrt = vmul_f32(rsqrt, vrsqrts_f32(vmul_f32(rsqrt, rsqrt), in)); + return rsqrt; +} + +template <int n> +PX_FORCE_INLINE float32x4_t rsqrtq_newton(const float32x4_t& in) +{ + float32x4_t rsqrt = vrsqrteq_f32(in); + for(int i = 0; i < n; ++i) + rsqrt = vmulq_f32(rsqrt, vrsqrtsq_f32(vmulq_f32(rsqrt, rsqrt), in)); + return rsqrt; +} + +PX_FORCE_INLINE FloatV FDiv(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return vmul_f32(a, VRECIP(b)); +} + +PX_FORCE_INLINE FloatV FDivFast(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return vmul_f32(a, VRECIPE(b)); +} + +PX_FORCE_INLINE FloatV FRecip(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + return VRECIP(a); +} + +PX_FORCE_INLINE FloatV FRecipFast(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + return VRECIPE(a); +} + +PX_FORCE_INLINE FloatV FRsqrt(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + return VRECIPSQRT(a); +} + +PX_FORCE_INLINE FloatV FSqrt(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + return FSel(FIsEq(a, FZero()), a, vmul_f32(a, VRECIPSQRT(a))); +} + +PX_FORCE_INLINE FloatV FRsqrtFast(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + return VRECIPSQRTE(a); +} + +PX_FORCE_INLINE FloatV FScaleAdd(const FloatV a, const FloatV b, const FloatV c) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + ASSERT_ISVALIDFLOATV(c); + return vmla_f32(c, a, b); +} + +PX_FORCE_INLINE FloatV FNegScaleSub(const FloatV a, const FloatV b, const FloatV c) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + ASSERT_ISVALIDFLOATV(c); + return vmls_f32(c, a, b); +} + +PX_FORCE_INLINE FloatV FAbs(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + return vabs_f32(a); +} + +PX_FORCE_INLINE FloatV FSel(const BoolV c, const FloatV a, const FloatV b) +{ + PX_ASSERT( _VecMathTests::allElementsEqualBoolV(c, BTTTT()) || + _VecMathTests::allElementsEqualBoolV(c, BFFFF())); + ASSERT_ISVALIDFLOATV(vbsl_f32(vget_low_u32(c), a, b)); + return vbsl_f32(vget_low_u32(c), a, b); +} + +PX_FORCE_INLINE BoolV FIsGrtr(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return vdupq_lane_u32(vcgt_f32(a, b), 0); +} + +PX_FORCE_INLINE BoolV FIsGrtrOrEq(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return vdupq_lane_u32(vcge_f32(a, b), 0); +} + +PX_FORCE_INLINE BoolV FIsEq(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return vdupq_lane_u32(vceq_f32(a, b), 0); +} + +PX_FORCE_INLINE FloatV FMax(const FloatV a, const FloatV b) +{ + //ASSERT_ISVALIDFLOATV(a); + //ASSERT_ISVALIDFLOATV(b); + return vmax_f32(a, b); +} + +PX_FORCE_INLINE FloatV FMin(const FloatV a, const FloatV b) +{ + //ASSERT_ISVALIDFLOATV(a); + //ASSERT_ISVALIDFLOATV(b); + return vmin_f32(a, b); +} + +PX_FORCE_INLINE FloatV FClamp(const FloatV a, const FloatV minV, const FloatV maxV) +{ + ASSERT_ISVALIDFLOATV(minV); + ASSERT_ISVALIDFLOATV(maxV); + return vmax_f32(vmin_f32(a, maxV), minV); +} + +PX_FORCE_INLINE PxU32 FAllGrtr(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return vget_lane_u32(vcgt_f32(a, b), 0); +} + +PX_FORCE_INLINE PxU32 FAllGrtrOrEq(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return vget_lane_u32(vcge_f32(a, b), 0); +} + +PX_FORCE_INLINE PxU32 FAllEq(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return vget_lane_u32(vceq_f32(a, b), 0); +} + +PX_FORCE_INLINE FloatV FRound(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + + // truncate(a + (0.5f - sign(a))) + const float32x2_t half = vdup_n_f32(0.5f); + const float32x2_t sign = vcvt_f32_u32((vshr_n_u32(vreinterpret_u32_f32(a), 31))); + const float32x2_t aPlusHalf = vadd_f32(a, half); + const float32x2_t aRound = vsub_f32(aPlusHalf, sign); + int32x2_t tmp = vcvt_s32_f32(aRound); + return vcvt_f32_s32(tmp); +} + +PX_FORCE_INLINE FloatV FSin(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + + // Modulo the range of the given angles such that -XM_2PI <= Angles < XM_2PI + const FloatV recipTwoPi = FLoadA(g_PXReciprocalTwoPi.f); + const FloatV twoPi = FLoadA(g_PXTwoPi.f); + const FloatV tmp = FMul(a, recipTwoPi); + const FloatV b = FRound(tmp); + const FloatV V1 = FNegScaleSub(twoPi, b, a); + + // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - + // V^15 / 15! + V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI) + const FloatV V2 = FMul(V1, V1); + const FloatV V3 = FMul(V2, V1); + const FloatV V5 = FMul(V3, V2); + const FloatV V7 = FMul(V5, V2); + const FloatV V9 = FMul(V7, V2); + const FloatV V11 = FMul(V9, V2); + const FloatV V13 = FMul(V11, V2); + const FloatV V15 = FMul(V13, V2); + const FloatV V17 = FMul(V15, V2); + const FloatV V19 = FMul(V17, V2); + const FloatV V21 = FMul(V19, V2); + const FloatV V23 = FMul(V21, V2); + + const Vec4V sinCoefficients0 = V4LoadA(g_PXSinCoefficients0.f); + const Vec4V sinCoefficients1 = V4LoadA(g_PXSinCoefficients1.f); + const Vec4V sinCoefficients2 = V4LoadA(g_PXSinCoefficients2.f); + + const FloatV S1 = V4GetY(sinCoefficients0); + const FloatV S2 = V4GetZ(sinCoefficients0); + const FloatV S3 = V4GetW(sinCoefficients0); + const FloatV S4 = V4GetX(sinCoefficients1); + const FloatV S5 = V4GetY(sinCoefficients1); + const FloatV S6 = V4GetZ(sinCoefficients1); + const FloatV S7 = V4GetW(sinCoefficients1); + const FloatV S8 = V4GetX(sinCoefficients2); + const FloatV S9 = V4GetY(sinCoefficients2); + const FloatV S10 = V4GetZ(sinCoefficients2); + const FloatV S11 = V4GetW(sinCoefficients2); + + FloatV Result; + Result = FScaleAdd(S1, V3, V1); + Result = FScaleAdd(S2, V5, Result); + Result = FScaleAdd(S3, V7, Result); + Result = FScaleAdd(S4, V9, Result); + Result = FScaleAdd(S5, V11, Result); + Result = FScaleAdd(S6, V13, Result); + Result = FScaleAdd(S7, V15, Result); + Result = FScaleAdd(S8, V17, Result); + Result = FScaleAdd(S9, V19, Result); + Result = FScaleAdd(S10, V21, Result); + Result = FScaleAdd(S11, V23, Result); + + return Result; +} + +PX_FORCE_INLINE FloatV FCos(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + + // Modulo the range of the given angles such that -XM_2PI <= Angles < XM_2PI + const FloatV recipTwoPi = FLoadA(g_PXReciprocalTwoPi.f); + const FloatV twoPi = FLoadA(g_PXTwoPi.f); + const FloatV tmp = FMul(a, recipTwoPi); + const FloatV b = FRound(tmp); + const FloatV V1 = FNegScaleSub(twoPi, b, a); + + // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + V^12 / 12! - + // V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI) + const FloatV V2 = FMul(V1, V1); + const FloatV V4 = FMul(V2, V2); + const FloatV V6 = FMul(V4, V2); + const FloatV V8 = FMul(V4, V4); + const FloatV V10 = FMul(V6, V4); + const FloatV V12 = FMul(V6, V6); + const FloatV V14 = FMul(V8, V6); + const FloatV V16 = FMul(V8, V8); + const FloatV V18 = FMul(V10, V8); + const FloatV V20 = FMul(V10, V10); + const FloatV V22 = FMul(V12, V10); + + const Vec4V cosCoefficients0 = V4LoadA(g_PXCosCoefficients0.f); + const Vec4V cosCoefficients1 = V4LoadA(g_PXCosCoefficients1.f); + const Vec4V cosCoefficients2 = V4LoadA(g_PXCosCoefficients2.f); + + const FloatV C1 = V4GetY(cosCoefficients0); + const FloatV C2 = V4GetZ(cosCoefficients0); + const FloatV C3 = V4GetW(cosCoefficients0); + const FloatV C4 = V4GetX(cosCoefficients1); + const FloatV C5 = V4GetY(cosCoefficients1); + const FloatV C6 = V4GetZ(cosCoefficients1); + const FloatV C7 = V4GetW(cosCoefficients1); + const FloatV C8 = V4GetX(cosCoefficients2); + const FloatV C9 = V4GetY(cosCoefficients2); + const FloatV C10 = V4GetZ(cosCoefficients2); + const FloatV C11 = V4GetW(cosCoefficients2); + + FloatV Result; + Result = FScaleAdd(C1, V2, FOne()); + Result = FScaleAdd(C2, V4, Result); + Result = FScaleAdd(C3, V6, Result); + Result = FScaleAdd(C4, V8, Result); + Result = FScaleAdd(C5, V10, Result); + Result = FScaleAdd(C6, V12, Result); + Result = FScaleAdd(C7, V14, Result); + Result = FScaleAdd(C8, V16, Result); + Result = FScaleAdd(C9, V18, Result); + Result = FScaleAdd(C10, V20, Result); + Result = FScaleAdd(C11, V22, Result); + + return Result; +} + +PX_FORCE_INLINE PxU32 FOutOfBounds(const FloatV a, const FloatV min, const FloatV max) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(min); + ASSERT_ISVALIDFLOATV(max); + + const BoolV c = BOr(FIsGrtr(a, max), FIsGrtr(min, a)); + return PxU32(!BAllEqFFFF(c)); +} + +PX_FORCE_INLINE PxU32 FInBounds(const FloatV a, const FloatV min, const FloatV max) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(min); + ASSERT_ISVALIDFLOATV(max); + + const BoolV c = BAnd(FIsGrtrOrEq(a, min), FIsGrtrOrEq(max, a)); + return PxU32(BAllEqTTTT(c)); +} + +PX_FORCE_INLINE PxU32 FOutOfBounds(const FloatV a, const FloatV bounds) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(bounds); + const uint32x2_t greater = vcagt_f32(a, bounds); + return vget_lane_u32(greater, 0); +} + +PX_FORCE_INLINE PxU32 FInBounds(const FloatV a, const FloatV bounds) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(bounds); + const uint32x2_t geq = vcage_f32(bounds, a); + return vget_lane_u32(geq, 0); +} + +////////////////////////////////// +// VEC3V +////////////////////////////////// + +PX_FORCE_INLINE Vec3V V3Splat(const FloatV f) +{ + ASSERT_ISVALIDFLOATV(f); + + const uint32x2_t mask = { 0xffffFFFF, 0x0 }; + + const uint32x2_t uHigh = vreinterpret_u32_f32(f); + const float32x2_t dHigh = vreinterpret_f32_u32(vand_u32(uHigh, mask)); + + return vcombine_f32(f, dHigh); +} + +PX_FORCE_INLINE Vec3V V3Merge(const FloatVArg x, const FloatVArg y, const FloatVArg z) +{ + ASSERT_ISVALIDFLOATV(x); + ASSERT_ISVALIDFLOATV(y); + ASSERT_ISVALIDFLOATV(z); + + const uint32x2_t mask = { 0xffffFFFF, 0x0 }; + + const uint32x2_t dHigh = vand_u32(vreinterpret_u32_f32(z), mask); + const uint32x2_t dLow = vext_u32(vreinterpret_u32_f32(x), vreinterpret_u32_f32(y), 1); + return vreinterpretq_f32_u32(vcombine_u32(dLow, dHigh)); +} + +PX_FORCE_INLINE Vec3V V3UnitX() +{ + const float32x4_t x = { 1.0f, 0.0f, 0.0f, 0.0f }; + return x; +} + +PX_FORCE_INLINE Vec3V V3UnitY() +{ + const float32x4_t y = { 0, 1.0f, 0, 0 }; + return y; +} + +PX_FORCE_INLINE Vec3V V3UnitZ() +{ + const float32x4_t z = { 0, 0, 1.0f, 0 }; + return z; +} + +PX_FORCE_INLINE FloatV V3GetX(const Vec3V f) +{ + ASSERT_ISVALIDVEC3V(f); + const float32x2_t fLow = vget_low_f32(f); + return vdup_lane_f32(fLow, 0); +} + +PX_FORCE_INLINE FloatV V3GetY(const Vec3V f) +{ + ASSERT_ISVALIDVEC3V(f); + const float32x2_t fLow = vget_low_f32(f); + return vdup_lane_f32(fLow, 1); +} + +PX_FORCE_INLINE FloatV V3GetZ(const Vec3V f) +{ + ASSERT_ISVALIDVEC3V(f); + const float32x2_t fhigh = vget_high_f32(f); + return vdup_lane_f32(fhigh, 0); +} + +PX_FORCE_INLINE Vec3V V3SetX(const Vec3V v, const FloatV f) +{ + ASSERT_ISVALIDVEC3V(v); + ASSERT_ISVALIDFLOATV(f); + return V4Sel(BFTTT(), v, vcombine_f32(f, f)); +} + +PX_FORCE_INLINE Vec3V V3SetY(const Vec3V v, const FloatV f) +{ + ASSERT_ISVALIDVEC3V(v); + ASSERT_ISVALIDFLOATV(f); + return V4Sel(BTFTT(), v, vcombine_f32(f, f)); +} + +PX_FORCE_INLINE Vec3V V3SetZ(const Vec3V v, const FloatV f) +{ + ASSERT_ISVALIDVEC3V(v); + ASSERT_ISVALIDFLOATV(f); + return V4Sel(BTTFT(), v, vcombine_f32(f, f)); +} + +PX_FORCE_INLINE Vec3V V3ColX(const Vec3V a, const Vec3V b, const Vec3V c) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + ASSERT_ISVALIDVEC3V(c); + + const float32x2_t aLow = vget_low_f32(a); + const float32x2_t bLow = vget_low_f32(b); + const float32x2_t cLow = vget_low_f32(c); + const float32x2_t zero = vdup_n_f32(0.0f); + + const float32x2x2_t zipL = vzip_f32(aLow, bLow); + const float32x2x2_t zipH = vzip_f32(cLow, zero); + + return vcombine_f32(zipL.val[0], zipH.val[0]); +} + +PX_FORCE_INLINE Vec3V V3ColY(const Vec3V a, const Vec3V b, const Vec3V c) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + ASSERT_ISVALIDVEC3V(c); + + const float32x2_t aLow = vget_low_f32(a); + const float32x2_t bLow = vget_low_f32(b); + const float32x2_t cLow = vget_low_f32(c); + const float32x2_t zero = vdup_n_f32(0.0f); + + const float32x2x2_t zipL = vzip_f32(aLow, bLow); + const float32x2x2_t zipH = vzip_f32(cLow, zero); + + return vcombine_f32(zipL.val[1], zipH.val[1]); +} + +PX_FORCE_INLINE Vec3V V3ColZ(const Vec3V a, const Vec3V b, const Vec3V c) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + ASSERT_ISVALIDVEC3V(c); + + const float32x2_t aHi = vget_high_f32(a); + const float32x2_t bHi = vget_high_f32(b); + const float32x2_t cHi = vget_high_f32(c); + + const float32x2x2_t zipL = vzip_f32(aHi, bHi); + + return vcombine_f32(zipL.val[0], cHi); +} + +PX_FORCE_INLINE Vec3V V3Zero() +{ + return vdupq_n_f32(0.0f); +} + +PX_FORCE_INLINE Vec3V V3Eps() +{ + return V3Load(PX_EPS_REAL); +} + +PX_FORCE_INLINE Vec3V V3One() +{ + return V3Load(1.0f); +} + +PX_FORCE_INLINE Vec3V V3Neg(const Vec3V f) +{ + ASSERT_ISVALIDVEC3V(f); + const float32x4_t tmp = vnegq_f32(f); + return vsetq_lane_f32(0.0f, tmp, 3); +} + +PX_FORCE_INLINE Vec3V V3Add(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return vaddq_f32(a, b); +} + +PX_FORCE_INLINE Vec3V V3Add(const Vec3V a, const FloatV b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDFLOATV(b); + return vaddq_f32(a, Vec3V_From_FloatV(b)); +} + +PX_FORCE_INLINE Vec3V V3Sub(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return vsubq_f32(a, b); +} + +PX_FORCE_INLINE Vec3V V3Sub(const Vec3V a, const FloatV b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDFLOATV(b); + return vsubq_f32(a, Vec3V_From_FloatV(b)); +} + +PX_FORCE_INLINE Vec3V V3Scale(const Vec3V a, const FloatV b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDFLOATV(b); + const float32x4_t tmp = vmulq_lane_f32(a, b, 0); + return vsetq_lane_f32(0.0f, tmp, 3); +} + +PX_FORCE_INLINE Vec3V V3Mul(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return vmulq_f32(a, b); +} + +PX_FORCE_INLINE Vec3V V3ScaleInv(const Vec3V a, const FloatV b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDFLOATV(b); + const float32x2_t invB = VRECIP(b); + const float32x4_t tmp = vmulq_lane_f32(a, invB, 0); + return vsetq_lane_f32(0.0f, tmp, 3); +} + +PX_FORCE_INLINE Vec3V V3Div(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + float32x4_t invB = VRECIPQ(b); + invB = vsetq_lane_f32(0.0f, invB, 3); + return vmulq_f32(a, invB); +} + +PX_FORCE_INLINE Vec3V V3ScaleInvFast(const Vec3V a, const FloatV b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDFLOATV(b); + const float32x2_t invB = VRECIPE(b); + const float32x4_t tmp = vmulq_lane_f32(a, invB, 0); + return vsetq_lane_f32(0.0f, tmp, 3); +} + +PX_FORCE_INLINE Vec3V V3DivFast(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + float32x4_t invB = VRECIPEQ(b); + invB = vsetq_lane_f32(0.0f, invB, 3); + return vmulq_f32(a, invB); +} + +PX_FORCE_INLINE Vec3V V3Recip(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + const float32x4_t recipA = VRECIPQ(a); + return vsetq_lane_f32(0.0f, recipA, 3); +} + +PX_FORCE_INLINE Vec3V V3RecipFast(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + const float32x4_t recipA = VRECIPEQ(a); + return vsetq_lane_f32(0.0f, recipA, 3); +} + +PX_FORCE_INLINE Vec3V V3Rsqrt(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + const float32x4_t rSqrA = VRECIPSQRTQ(a); + return vsetq_lane_f32(0.0f, rSqrA, 3); +} + +PX_FORCE_INLINE Vec3V V3RsqrtFast(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + const float32x4_t rSqrA = VRECIPSQRTEQ(a); + return vsetq_lane_f32(0.0f, rSqrA, 3); +} + +PX_FORCE_INLINE Vec3V V3ScaleAdd(const Vec3V a, const FloatV b, const Vec3V c) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDFLOATV(b); + ASSERT_ISVALIDVEC3V(c); + + float32x4_t tmp = vmlaq_lane_f32(c, a, b, 0); + // using vsetq_lane_f32 resulted in failures, + // probably related to a compiler bug on + // ndk r9d-win32, gcc 4.8, cardhu/shield + + // code with issue + // return vsetq_lane_f32(0.0f, tmp, 3); + + // workaround + float32x2_t w_z = vget_high_f32(tmp); + float32x2_t y_x = vget_low_f32(tmp); + w_z = vset_lane_f32(0.0f, w_z, 1); + return vcombine_f32(y_x, w_z); +} + +PX_FORCE_INLINE Vec3V V3NegScaleSub(const Vec3V a, const FloatV b, const Vec3V c) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDFLOATV(b); + ASSERT_ISVALIDVEC3V(c); + + float32x4_t tmp = vmlsq_lane_f32(c, a, b, 0); + // using vsetq_lane_f32 resulted in failures, + // probably related to a compiler bug on + // ndk r9d-win32, gcc 4.8, cardhu/shield + + // code with issue + // return vsetq_lane_f32(0.0f, tmp, 3); + + // workaround + float32x2_t w_z = vget_high_f32(tmp); + float32x2_t y_x = vget_low_f32(tmp); + w_z = vset_lane_f32(0.0f, w_z, 1); + return vcombine_f32(y_x, w_z); +} + +PX_FORCE_INLINE Vec3V V3MulAdd(const Vec3V a, const Vec3V b, const Vec3V c) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + ASSERT_ISVALIDVEC3V(c); + return vmlaq_f32(c, a, b); +} + +PX_FORCE_INLINE Vec3V V3NegMulSub(const Vec3V a, const Vec3V b, const Vec3V c) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + ASSERT_ISVALIDVEC3V(c); + return vmlsq_f32(c, a, b); +} + +PX_FORCE_INLINE Vec3V V3Abs(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + return vabsq_f32(a); +} + +PX_FORCE_INLINE FloatV V3Dot(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + + // const uint32x2_t mask = {0xffffFFFF, 0x0}; + const float32x4_t tmp = vmulq_f32(a, b); + + const float32x2_t low = vget_low_f32(tmp); + const float32x2_t high = vget_high_f32(tmp); + // const float32x2_t high = vreinterpret_f32_u32(vand_u32(vreinterpret_u32_f32(high_), mask)); + + const float32x2_t sumTmp = vpadd_f32(low, high); // = {0+z, x+y} + const float32x2_t sum0ZYX = vpadd_f32(sumTmp, sumTmp); // = {x+y+z, x+y+z} + + return sum0ZYX; +} + +PX_FORCE_INLINE Vec3V V3Cross(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + + const uint32x2_t TF = { 0xffffFFFF, 0x0 }; + const float32x2_t ay_ax = vget_low_f32(a); // d2 + const float32x2_t aw_az = vget_high_f32(a); // d3 + const float32x2_t by_bx = vget_low_f32(b); // d4 + const float32x2_t bw_bz = vget_high_f32(b); // d5 + // Hi, Lo + const float32x2_t bz_by = vext_f32(by_bx, bw_bz, 1); // bz, by + const float32x2_t az_ay = vext_f32(ay_ax, aw_az, 1); // az, ay + + const float32x2_t azbx = vmul_f32(aw_az, by_bx); // 0, az*bx + const float32x2_t aybz_axby = vmul_f32(ay_ax, bz_by); // ay*bz, ax*by + + const float32x2_t azbxSUBaxbz = vmls_f32(azbx, bw_bz, ay_ax); // 0, az*bx-ax*bz + const float32x2_t aybzSUBazby_axbySUBaybx = vmls_f32(aybz_axby, by_bx, az_ay); // ay*bz-az*by, ax*by-ay*bx + + const float32x2_t retLow = vext_f32(aybzSUBazby_axbySUBaybx, azbxSUBaxbz, 1); // az*bx-ax*bz, ay*bz-az*by + const uint32x2_t retHigh = vand_u32(TF, vreinterpret_u32_f32(aybzSUBazby_axbySUBaybx)); // 0, ax*by-ay*bx + + return vcombine_f32(retLow, vreinterpret_f32_u32(retHigh)); +} + +PX_FORCE_INLINE VecCrossV V3PrepareCross(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + return a; +} + +PX_FORCE_INLINE FloatV V3Length(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + + // const uint32x2_t mask = {0xffffFFFF, 0x0}; + + const float32x4_t tmp = vmulq_f32(a, a); + const float32x2_t low = vget_low_f32(tmp); + const float32x2_t high = vget_high_f32(tmp); + // const float32x2_t high = vreinterpret_f32_u32(vand_u32(vreinterpret_u32_f32(high_), mask)); + + const float32x2_t sumTmp = vpadd_f32(low, high); // = {0+z, x+y} + const float32x2_t sum0ZYX = vpadd_f32(sumTmp, sumTmp); // = {x+y+z, x+y+z} + + return FSqrt(sum0ZYX); +} + +PX_FORCE_INLINE FloatV V3LengthSq(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + return V3Dot(a, a); +} + +PX_FORCE_INLINE Vec3V V3Normalize(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + //PX_ASSERT(!FAllEq(V4LengthSq(a), FZero())); + return V3ScaleInv(a, V3Length(a)); +} + +PX_FORCE_INLINE Vec3V V3NormalizeFast(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + //PX_ASSERT(!FAllEq(V4LengthSq(a), FZero())); + return V3Scale(a, VRECIPSQRTE(V3Dot(a, a))); +} + +PX_FORCE_INLINE Vec3V V3NormalizeSafe(const Vec3V a, const Vec3V unsafeReturnValue) +{ + ASSERT_ISVALIDVEC3V(a); + const FloatV zero = vdup_n_f32(0.0f); + const FloatV length = V3Length(a); + const uint32x4_t isGreaterThanZero = FIsGrtr(length, zero); + return V3Sel(isGreaterThanZero, V3ScaleInv(a, length), unsafeReturnValue); +} + +PX_FORCE_INLINE Vec3V V3Sel(const BoolV c, const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V( vbslq_f32(c, a, b)); + return vbslq_f32(c, a, b); +} + +PX_FORCE_INLINE BoolV V3IsGrtr(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return vcgtq_f32(a, b); +} + +PX_FORCE_INLINE BoolV V3IsGrtrOrEq(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return vcgeq_f32(a, b); +} + +PX_FORCE_INLINE BoolV V3IsEq(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return vceqq_f32(a, b); +} + +PX_FORCE_INLINE Vec3V V3Max(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return vmaxq_f32(a, b); +} + +PX_FORCE_INLINE Vec3V V3Min(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return vminq_f32(a, b); +} + +PX_FORCE_INLINE FloatV V3ExtractMax(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + + const float32x2_t low = vget_low_f32(a); + const float32x2_t high = vget_high_f32(a); + + const float32x2_t zz = vdup_lane_f32(high, 0); + const float32x2_t max0 = vpmax_f32(zz, low); + const float32x2_t max1 = vpmax_f32(max0, max0); + + return max1; +} + +PX_FORCE_INLINE FloatV V3ExtractMin(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + + const float32x2_t low = vget_low_f32(a); + const float32x2_t high = vget_high_f32(a); + + const float32x2_t zz = vdup_lane_f32(high, 0); + const float32x2_t min0 = vpmin_f32(zz, low); + const float32x2_t min1 = vpmin_f32(min0, min0); + + return min1; +} + +// return (a >= 0.0f) ? 1.0f : -1.0f; +PX_FORCE_INLINE Vec3V V3Sign(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + const Vec3V zero = V3Zero(); + const Vec3V one = V3One(); + const Vec3V none = V3Neg(one); + return V3Sel(V3IsGrtrOrEq(a, zero), one, none); +} + +PX_FORCE_INLINE Vec3V V3Clamp(const Vec3V a, const Vec3V minV, const Vec3V maxV) +{ + ASSERT_ISVALIDVEC3V(minV); + ASSERT_ISVALIDVEC3V(maxV); + return V3Max(V3Min(a, maxV), minV); +} + +PX_FORCE_INLINE PxU32 V3AllGrtr(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return internalUnitNeonSimd::BAllTrue3_R(V4IsGrtr(a, b)); +} + +PX_FORCE_INLINE PxU32 V3AllGrtrOrEq(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return internalUnitNeonSimd::BAllTrue3_R(V4IsGrtrOrEq(a, b)); +} + +PX_FORCE_INLINE PxU32 V3AllEq(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return internalUnitNeonSimd::BAllTrue3_R(V4IsEq(a, b)); +} + +PX_FORCE_INLINE Vec3V V3Round(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + // truncate(a + (0.5f - sign(a))) + const Vec3V half = V3Load(0.5f); + const float32x4_t sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(a), 31))); + const Vec3V aPlusHalf = V3Add(a, half); + const Vec3V aRound = V3Sub(aPlusHalf, sign); + return vcvtq_f32_s32(vcvtq_s32_f32(aRound)); +} + +PX_FORCE_INLINE Vec3V V3Sin(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + + // Modulo the range of the given angles such that -XM_2PI <= Angles < XM_2PI + const Vec4V recipTwoPi = V4LoadA(g_PXReciprocalTwoPi.f); + const Vec4V twoPi = V4LoadA(g_PXTwoPi.f); + const Vec3V tmp = V4Mul(a, recipTwoPi); + const Vec3V b = V3Round(tmp); + const Vec3V V1 = V4NegMulSub(twoPi, b, a); + + // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - + // V^15 / 15! + V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI) + const Vec3V V2 = V3Mul(V1, V1); + const Vec3V V3 = V3Mul(V2, V1); + const Vec3V V5 = V3Mul(V3, V2); + const Vec3V V7 = V3Mul(V5, V2); + const Vec3V V9 = V3Mul(V7, V2); + const Vec3V V11 = V3Mul(V9, V2); + const Vec3V V13 = V3Mul(V11, V2); + const Vec3V V15 = V3Mul(V13, V2); + const Vec3V V17 = V3Mul(V15, V2); + const Vec3V V19 = V3Mul(V17, V2); + const Vec3V V21 = V3Mul(V19, V2); + const Vec3V V23 = V3Mul(V21, V2); + + const Vec4V sinCoefficients0 = V4LoadA(g_PXSinCoefficients0.f); + const Vec4V sinCoefficients1 = V4LoadA(g_PXSinCoefficients1.f); + const Vec4V sinCoefficients2 = V4LoadA(g_PXSinCoefficients2.f); + + const FloatV S1 = V4GetY(sinCoefficients0); + const FloatV S2 = V4GetZ(sinCoefficients0); + const FloatV S3 = V4GetW(sinCoefficients0); + const FloatV S4 = V4GetX(sinCoefficients1); + const FloatV S5 = V4GetY(sinCoefficients1); + const FloatV S6 = V4GetZ(sinCoefficients1); + const FloatV S7 = V4GetW(sinCoefficients1); + const FloatV S8 = V4GetX(sinCoefficients2); + const FloatV S9 = V4GetY(sinCoefficients2); + const FloatV S10 = V4GetZ(sinCoefficients2); + const FloatV S11 = V4GetW(sinCoefficients2); + + Vec3V Result; + Result = V4ScaleAdd(V3, S1, V1); + Result = V4ScaleAdd(V5, S2, Result); + Result = V4ScaleAdd(V7, S3, Result); + Result = V4ScaleAdd(V9, S4, Result); + Result = V4ScaleAdd(V11, S5, Result); + Result = V4ScaleAdd(V13, S6, Result); + Result = V4ScaleAdd(V15, S7, Result); + Result = V4ScaleAdd(V17, S8, Result); + Result = V4ScaleAdd(V19, S9, Result); + Result = V4ScaleAdd(V21, S10, Result); + Result = V4ScaleAdd(V23, S11, Result); + + return Result; +} + +PX_FORCE_INLINE Vec3V V3Cos(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + + // Modulo the range of the given angles such that -XM_2PI <= Angles < XM_2PI + const Vec4V recipTwoPi = V4LoadA(g_PXReciprocalTwoPi.f); + const Vec4V twoPi = V4LoadA(g_PXTwoPi.f); + const Vec3V tmp = V4Mul(a, recipTwoPi); + const Vec3V b = V3Round(tmp); + const Vec3V V1 = V4NegMulSub(twoPi, b, a); + + // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + V^12 / 12! - + // V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI) + const Vec3V V2 = V3Mul(V1, V1); + const Vec3V V4 = V3Mul(V2, V2); + const Vec3V V6 = V3Mul(V4, V2); + const Vec3V V8 = V3Mul(V4, V4); + const Vec3V V10 = V3Mul(V6, V4); + const Vec3V V12 = V3Mul(V6, V6); + const Vec3V V14 = V3Mul(V8, V6); + const Vec3V V16 = V3Mul(V8, V8); + const Vec3V V18 = V3Mul(V10, V8); + const Vec3V V20 = V3Mul(V10, V10); + const Vec3V V22 = V3Mul(V12, V10); + + const Vec4V cosCoefficients0 = V4LoadA(g_PXCosCoefficients0.f); + const Vec4V cosCoefficients1 = V4LoadA(g_PXCosCoefficients1.f); + const Vec4V cosCoefficients2 = V4LoadA(g_PXCosCoefficients2.f); + + const FloatV C1 = V4GetY(cosCoefficients0); + const FloatV C2 = V4GetZ(cosCoefficients0); + const FloatV C3 = V4GetW(cosCoefficients0); + const FloatV C4 = V4GetX(cosCoefficients1); + const FloatV C5 = V4GetY(cosCoefficients1); + const FloatV C6 = V4GetZ(cosCoefficients1); + const FloatV C7 = V4GetW(cosCoefficients1); + const FloatV C8 = V4GetX(cosCoefficients2); + const FloatV C9 = V4GetY(cosCoefficients2); + const FloatV C10 = V4GetZ(cosCoefficients2); + const FloatV C11 = V4GetW(cosCoefficients2); + + Vec3V Result; + Result = V4ScaleAdd(V2, C1, V4One()); + Result = V4ScaleAdd(V4, C2, Result); + Result = V4ScaleAdd(V6, C3, Result); + Result = V4ScaleAdd(V8, C4, Result); + Result = V4ScaleAdd(V10, C5, Result); + Result = V4ScaleAdd(V12, C6, Result); + Result = V4ScaleAdd(V14, C7, Result); + Result = V4ScaleAdd(V16, C8, Result); + Result = V4ScaleAdd(V18, C9, Result); + Result = V4ScaleAdd(V20, C10, Result); + Result = V4ScaleAdd(V22, C11, Result); + + return V4ClearW(Result); +} + +PX_FORCE_INLINE Vec3V V3PermYZZ(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + const float32x2_t xy = vget_low_f32(a); + const float32x2_t zw = vget_high_f32(a); + const float32x2_t yz = vext_f32(xy, zw, 1); + return vcombine_f32(yz, zw); +} + +PX_FORCE_INLINE Vec3V V3PermXYX(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + const uint32x2_t mask = { 0xffffFFFF, 0x0 }; + + const uint32x2_t xy = vget_low_u32(vreinterpretq_u32_f32(a)); + const uint32x2_t xw = vand_u32(xy, mask); + return vreinterpretq_f32_u32(vcombine_u32(xy, xw)); +} + +PX_FORCE_INLINE Vec3V V3PermYZX(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + const uint32x2_t mask = { 0xffffFFFF, 0x0 }; + + const uint32x2_t xy = vget_low_u32(vreinterpretq_u32_f32(a)); + const uint32x2_t zw = vget_high_u32(vreinterpretq_u32_f32(a)); + const uint32x2_t yz = vext_u32(xy, zw, 1); + const uint32x2_t xw = vand_u32(xy, mask); + return vreinterpretq_f32_u32(vcombine_u32(yz, xw)); +} + +PX_FORCE_INLINE Vec3V V3PermZXY(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + + const uint32x2_t xy = vget_low_u32(vreinterpretq_u32_f32(a)); + const uint32x2_t zw = vget_high_u32(vreinterpretq_u32_f32(a)); + const uint32x2_t wz = vrev64_u32(zw); + + const uint32x2_t zx = vext_u32(wz, xy, 1); + const uint32x2_t yw = vext_u32(xy, wz, 1); + + return vreinterpretq_f32_u32(vcombine_u32(zx, yw)); +} + +PX_FORCE_INLINE Vec3V V3PermZZY(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + + const uint32x2_t xy = vget_low_u32(vreinterpretq_u32_f32(a)); + const uint32x2_t zw = vget_high_u32(vreinterpretq_u32_f32(a)); + + const uint32x2_t wz = vrev64_u32(zw); + const uint32x2_t yw = vext_u32(xy, wz, 1); + const uint32x2_t zz = vdup_lane_u32(wz, 1); + + return vreinterpretq_f32_u32(vcombine_u32(zz, yw)); +} + +PX_FORCE_INLINE Vec3V V3PermYXX(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + + const uint32x2_t mask = { 0xffffFFFF, 0x0 }; + + const uint32x2_t xy = vget_low_u32(vreinterpretq_u32_f32(a)); + const uint32x2_t yx = vrev64_u32(xy); + const uint32x2_t xw = vand_u32(xy, mask); + return vreinterpretq_f32_u32(vcombine_u32(yx, xw)); +} + +PX_FORCE_INLINE Vec3V V3Perm_Zero_1Z_0Y(const Vec3V v0, const Vec3V v1) +{ + ASSERT_ISVALIDVEC3V(v0); + ASSERT_ISVALIDVEC3V(v1); + + const uint32x2_t xy = vget_low_u32(vreinterpretq_u32_f32(v0)); + const uint32x2_t zw = vget_high_u32(vreinterpretq_u32_f32(v1)); + const uint32x2_t wz = vrev64_u32(zw); + const uint32x2_t yw = vext_u32(xy, wz, 1); + + return vreinterpretq_f32_u32(vcombine_u32(wz, yw)); +} + +PX_FORCE_INLINE Vec3V V3Perm_0Z_Zero_1X(const Vec3V v0, const Vec3V v1) +{ + ASSERT_ISVALIDVEC3V(v0); + ASSERT_ISVALIDVEC3V(v1); + + const uint32x2_t mask = { 0xffffFFFF, 0x0 }; + + const uint32x2_t zw = vget_high_u32(vreinterpretq_u32_f32(v0)); + const uint32x2_t xy = vget_low_u32(vreinterpretq_u32_f32(v1)); + const uint32x2_t xw = vand_u32(xy, mask); + + return vreinterpretq_f32_u32(vcombine_u32(zw, xw)); +} + +PX_FORCE_INLINE Vec3V V3Perm_1Y_0X_Zero(const Vec3V v0, const Vec3V v1) +{ + ASSERT_ISVALIDVEC3V(v0); + ASSERT_ISVALIDVEC3V(v1); + + const uint32x2_t axy = vget_low_u32(vreinterpretq_u32_f32(v0)); + const uint32x2_t bxy = vget_low_u32(vreinterpretq_u32_f32(v1)); + const uint32x2_t byax = vext_u32(bxy, axy, 1); + const uint32x2_t ww = vdup_n_u32(0); + + return vreinterpretq_f32_u32(vcombine_u32(byax, ww)); +} + +PX_FORCE_INLINE FloatV V3SumElems(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + + // const uint32x2_t mask = {0xffffFFFF, 0x0}; + + const float32x2_t low = vget_low_f32(a); + const float32x2_t high = vget_high_f32(a); + // const float32x2_t high = vreinterpret_f32_u32(vand_u32(vreinterpret_u32_f32(high_), mask)); + + const float32x2_t sumTmp = vpadd_f32(low, high); // = {0+z, x+y} + const float32x2_t sum0ZYX = vpadd_f32(sumTmp, sumTmp); // = {x+y+z, x+y+z} + + return sum0ZYX; +} + +PX_FORCE_INLINE PxU32 V3OutOfBounds(const Vec3V a, const Vec3V min, const Vec3V max) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(min); + ASSERT_ISVALIDVEC3V(max); + + const BoolV c = BOr(V3IsGrtr(a, max), V3IsGrtr(min, a)); + return internalUnitNeonSimd::BAnyTrue3_R(c); +} + +PX_FORCE_INLINE PxU32 V3InBounds(const Vec3V a, const Vec3V min, const Vec3V max) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(min); + ASSERT_ISVALIDVEC3V(max); + + const BoolV c = BAnd(V3IsGrtrOrEq(a, min), V3IsGrtrOrEq(max, a)); + return internalUnitNeonSimd::BAllTrue4_R(c); +} + +PX_FORCE_INLINE PxU32 V3OutOfBounds(const Vec3V a, const Vec3V bounds) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(bounds); + + const BoolV greater = V3IsGrtr(V3Abs(a), bounds); + return internalUnitNeonSimd::BAnyTrue3_R(greater); +} + +PX_FORCE_INLINE PxU32 V3InBounds(const Vec3V a, const Vec3V bounds) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(bounds); + + const BoolV greaterOrEq = V3IsGrtrOrEq(bounds, V3Abs(a)); + return internalUnitNeonSimd::BAllTrue4_R(greaterOrEq); +} + +PX_FORCE_INLINE void V3Transpose(Vec3V& col0, Vec3V& col1, Vec3V& col2) +{ + ASSERT_ISVALIDVEC3V(col0); + ASSERT_ISVALIDVEC3V(col1); + ASSERT_ISVALIDVEC3V(col2); + + Vec3V col3 = V3Zero(); + const float32x4x2_t v0v1 = vzipq_f32(col0, col2); + const float32x4x2_t v2v3 = vzipq_f32(col1, col3); + const float32x4x2_t zip0 = vzipq_f32(v0v1.val[0], v2v3.val[0]); + const float32x4x2_t zip1 = vzipq_f32(v0v1.val[1], v2v3.val[1]); + col0 = zip0.val[0]; + col1 = zip0.val[1]; + col2 = zip1.val[0]; + // col3 = zip1.val[1]; +} + +////////////////////////////////// +// VEC4V +////////////////////////////////// + +PX_FORCE_INLINE Vec4V V4Splat(const FloatV f) +{ + ASSERT_ISVALIDFLOATV(f); + return vcombine_f32(f, f); +} + +PX_FORCE_INLINE Vec4V V4Merge(const FloatV* const floatVArray) +{ + ASSERT_ISVALIDFLOATV(floatVArray[0]); + ASSERT_ISVALIDFLOATV(floatVArray[1]); + ASSERT_ISVALIDFLOATV(floatVArray[2]); + ASSERT_ISVALIDFLOATV(floatVArray[3]); + + const uint32x2_t xLow = vreinterpret_u32_f32(floatVArray[0]); + const uint32x2_t yLow = vreinterpret_u32_f32(floatVArray[1]); + const uint32x2_t zLow = vreinterpret_u32_f32(floatVArray[2]); + const uint32x2_t wLow = vreinterpret_u32_f32(floatVArray[3]); + + const uint32x2_t dLow = vext_u32(xLow, yLow, 1); + const uint32x2_t dHigh = vext_u32(zLow, wLow, 1); + + return vreinterpretq_f32_u32(vcombine_u32(dLow, dHigh)); +} + +PX_FORCE_INLINE Vec4V V4Merge(const FloatVArg x, const FloatVArg y, const FloatVArg z, const FloatVArg w) +{ + ASSERT_ISVALIDFLOATV(x); + ASSERT_ISVALIDFLOATV(y); + ASSERT_ISVALIDFLOATV(z); + ASSERT_ISVALIDFLOATV(w); + + const uint32x2_t xLow = vreinterpret_u32_f32(x); + const uint32x2_t yLow = vreinterpret_u32_f32(y); + const uint32x2_t zLow = vreinterpret_u32_f32(z); + const uint32x2_t wLow = vreinterpret_u32_f32(w); + + const uint32x2_t dLow = vext_u32(xLow, yLow, 1); + const uint32x2_t dHigh = vext_u32(zLow, wLow, 1); + + return vreinterpretq_f32_u32(vcombine_u32(dLow, dHigh)); +} + +PX_FORCE_INLINE Vec4V V4MergeW(const Vec4VArg x, const Vec4VArg y, const Vec4VArg z, const Vec4VArg w) +{ + const float32x2_t xx = vget_high_f32(x); + const float32x2_t yy = vget_high_f32(y); + const float32x2_t zz = vget_high_f32(z); + const float32x2_t ww = vget_high_f32(w); + + const float32x2x2_t zipL = vzip_f32(xx, yy); + const float32x2x2_t zipH = vzip_f32(zz, ww); + + return vcombine_f32(zipL.val[1], zipH.val[1]); +} + +PX_FORCE_INLINE Vec4V V4MergeZ(const Vec4VArg x, const Vec4VArg y, const Vec4VArg z, const Vec4VArg w) +{ + const float32x2_t xx = vget_high_f32(x); + const float32x2_t yy = vget_high_f32(y); + const float32x2_t zz = vget_high_f32(z); + const float32x2_t ww = vget_high_f32(w); + + const float32x2x2_t zipL = vzip_f32(xx, yy); + const float32x2x2_t zipH = vzip_f32(zz, ww); + + return vcombine_f32(zipL.val[0], zipH.val[0]); +} + +PX_FORCE_INLINE Vec4V V4MergeY(const Vec4VArg x, const Vec4VArg y, const Vec4VArg z, const Vec4VArg w) +{ + const float32x2_t xx = vget_low_f32(x); + const float32x2_t yy = vget_low_f32(y); + const float32x2_t zz = vget_low_f32(z); + const float32x2_t ww = vget_low_f32(w); + + const float32x2x2_t zipL = vzip_f32(xx, yy); + const float32x2x2_t zipH = vzip_f32(zz, ww); + + return vcombine_f32(zipL.val[1], zipH.val[1]); +} + +PX_FORCE_INLINE Vec4V V4MergeX(const Vec4VArg x, const Vec4VArg y, const Vec4VArg z, const Vec4VArg w) +{ + const float32x2_t xx = vget_low_f32(x); + const float32x2_t yy = vget_low_f32(y); + const float32x2_t zz = vget_low_f32(z); + const float32x2_t ww = vget_low_f32(w); + + const float32x2x2_t zipL = vzip_f32(xx, yy); + const float32x2x2_t zipH = vzip_f32(zz, ww); + + return vcombine_f32(zipL.val[0], zipH.val[0]); +} + +PX_FORCE_INLINE Vec4V V4UnpackXY(const Vec4VArg a, const Vec4VArg b) +{ + return vzipq_f32(a, b).val[0]; +} + +PX_FORCE_INLINE Vec4V V4UnpackZW(const Vec4VArg a, const Vec4VArg b) +{ + return vzipq_f32(a, b).val[1]; +} + +PX_FORCE_INLINE Vec4V V4UnitW() +{ + const float32x2_t zeros = vreinterpret_f32_u32(vmov_n_u32(0)); + const float32x2_t ones = vmov_n_f32(1.0f); + const float32x2_t zo = vext_f32(zeros, ones, 1); + return vcombine_f32(zeros, zo); +} + +PX_FORCE_INLINE Vec4V V4UnitX() +{ + const float32x2_t zeros = vreinterpret_f32_u32(vmov_n_u32(0)); + const float32x2_t ones = vmov_n_f32(1.0f); + const float32x2_t oz = vext_f32(ones, zeros, 1); + return vcombine_f32(oz, zeros); +} + +PX_FORCE_INLINE Vec4V V4UnitY() +{ + const float32x2_t zeros = vreinterpret_f32_u32(vmov_n_u32(0)); + const float32x2_t ones = vmov_n_f32(1.0f); + const float32x2_t zo = vext_f32(zeros, ones, 1); + return vcombine_f32(zo, zeros); +} + +PX_FORCE_INLINE Vec4V V4UnitZ() +{ + const float32x2_t zeros = vreinterpret_f32_u32(vmov_n_u32(0)); + const float32x2_t ones = vmov_n_f32(1.0f); + const float32x2_t oz = vext_f32(ones, zeros, 1); + return vcombine_f32(zeros, oz); +} + +PX_FORCE_INLINE FloatV V4GetW(const Vec4V f) +{ + const float32x2_t fhigh = vget_high_f32(f); + return vdup_lane_f32(fhigh, 1); +} + +PX_FORCE_INLINE FloatV V4GetX(const Vec4V f) +{ + const float32x2_t fLow = vget_low_f32(f); + return vdup_lane_f32(fLow, 0); +} + +PX_FORCE_INLINE FloatV V4GetY(const Vec4V f) +{ + const float32x2_t fLow = vget_low_f32(f); + return vdup_lane_f32(fLow, 1); +} + +PX_FORCE_INLINE FloatV V4GetZ(const Vec4V f) +{ + const float32x2_t fhigh = vget_high_f32(f); + return vdup_lane_f32(fhigh, 0); +} + +PX_FORCE_INLINE Vec4V V4SetW(const Vec4V v, const FloatV f) +{ + ASSERT_ISVALIDFLOATV(f); + return V4Sel(BTTTF(), v, vcombine_f32(f, f)); +} + +PX_FORCE_INLINE Vec4V V4SetX(const Vec4V v, const FloatV f) +{ + ASSERT_ISVALIDFLOATV(f); + return V4Sel(BFTTT(), v, vcombine_f32(f, f)); +} + +PX_FORCE_INLINE Vec4V V4SetY(const Vec4V v, const FloatV f) +{ + ASSERT_ISVALIDFLOATV(f); + return V4Sel(BTFTT(), v, vcombine_f32(f, f)); +} + +PX_FORCE_INLINE Vec4V V4SetZ(const Vec4V v, const FloatV f) +{ + ASSERT_ISVALIDFLOATV(f); + return V4Sel(BTTFT(), v, vcombine_f32(f, f)); +} + +PX_FORCE_INLINE Vec4V V4ClearW(const Vec4V v) +{ + return V4Sel(BTTTF(), v, V4Zero()); +} + +PX_FORCE_INLINE Vec4V V4PermYXWZ(const Vec4V a) +{ + const float32x2_t xy = vget_low_f32(a); + const float32x2_t zw = vget_high_f32(a); + const float32x2_t yx = vext_f32(xy, xy, 1); + const float32x2_t wz = vext_f32(zw, zw, 1); + return vcombine_f32(yx, wz); +} + +PX_FORCE_INLINE Vec4V V4PermXZXZ(const Vec4V a) +{ + const float32x2_t xy = vget_low_f32(a); + const float32x2_t zw = vget_high_f32(a); + const float32x2x2_t xzyw = vzip_f32(xy, zw); + return vcombine_f32(xzyw.val[0], xzyw.val[0]); +} + +PX_FORCE_INLINE Vec4V V4PermYWYW(const Vec4V a) +{ + const float32x2_t xy = vget_low_f32(a); + const float32x2_t zw = vget_high_f32(a); + const float32x2x2_t xzyw = vzip_f32(xy, zw); + return vcombine_f32(xzyw.val[1], xzyw.val[1]); +} + +PX_FORCE_INLINE Vec4V V4PermYZXW(const Vec4V a) +{ + const uint32x2_t xy = vget_low_u32(vreinterpretq_u32_f32(a)); + const uint32x2_t zw = vget_high_u32(vreinterpretq_u32_f32(a)); + const uint32x2_t yz = vext_u32(xy, zw, 1); + const uint32x2_t xw = vrev64_u32(vext_u32(zw, xy, 1)); + return vreinterpretq_f32_u32(vcombine_u32(yz, xw)); +} + +template <PxU8 E0, PxU8 E1, PxU8 E2, PxU8 E3> +PX_FORCE_INLINE Vec4V V4Perm(const Vec4V V) +{ + static const uint32_t ControlElement[4] = + { +#if 1 + 0x03020100, // XM_SWIZZLE_X + 0x07060504, // XM_SWIZZLE_Y + 0x0B0A0908, // XM_SWIZZLE_Z + 0x0F0E0D0C, // XM_SWIZZLE_W +#else + 0x00010203, // XM_SWIZZLE_X + 0x04050607, // XM_SWIZZLE_Y + 0x08090A0B, // XM_SWIZZLE_Z + 0x0C0D0E0F, // XM_SWIZZLE_W +#endif + }; + + uint8x8x2_t tbl; + tbl.val[0] = vreinterpret_u8_f32(vget_low_f32(V)); + tbl.val[1] = vreinterpret_u8_f32(vget_high_f32(V)); + + uint8x8_t idx = + vcreate_u8(static_cast<uint64_t>(ControlElement[E0]) | (static_cast<uint64_t>(ControlElement[E1]) << 32)); + const uint8x8_t rL = vtbl2_u8(tbl, idx); + idx = vcreate_u8(static_cast<uint64_t>(ControlElement[E2]) | (static_cast<uint64_t>(ControlElement[E3]) << 32)); + const uint8x8_t rH = vtbl2_u8(tbl, idx); + return vreinterpretq_f32_u8(vcombine_u8(rL, rH)); +} + +// PT: this seems measurably slower than the hardcoded version +/*PX_FORCE_INLINE Vec4V V4PermYZXW(const Vec4V a) +{ + return V4Perm<1, 2, 0, 3>(a); +}*/ + +PX_FORCE_INLINE Vec4V V4Zero() +{ + return vreinterpretq_f32_u32(vmovq_n_u32(0)); + // return vmovq_n_f32(0.0f); +} + +PX_FORCE_INLINE Vec4V V4One() +{ + return vmovq_n_f32(1.0f); +} + +PX_FORCE_INLINE Vec4V V4Eps() +{ + // return vmovq_n_f32(PX_EPS_REAL); + return V4Load(PX_EPS_REAL); +} + +PX_FORCE_INLINE Vec4V V4Neg(const Vec4V f) +{ + return vnegq_f32(f); +} + +PX_FORCE_INLINE Vec4V V4Add(const Vec4V a, const Vec4V b) +{ + return vaddq_f32(a, b); +} + +PX_FORCE_INLINE Vec4V V4Sub(const Vec4V a, const Vec4V b) +{ + return vsubq_f32(a, b); +} + +PX_FORCE_INLINE Vec4V V4Scale(const Vec4V a, const FloatV b) +{ + return vmulq_lane_f32(a, b, 0); +} + +PX_FORCE_INLINE Vec4V V4Mul(const Vec4V a, const Vec4V b) +{ + return vmulq_f32(a, b); +} + +PX_FORCE_INLINE Vec4V V4ScaleInv(const Vec4V a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(b); + const float32x2_t invB = VRECIP(b); + return vmulq_lane_f32(a, invB, 0); +} + +PX_FORCE_INLINE Vec4V V4Div(const Vec4V a, const Vec4V b) +{ + const float32x4_t invB = VRECIPQ(b); + return vmulq_f32(a, invB); +} + +PX_FORCE_INLINE Vec4V V4ScaleInvFast(const Vec4V a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(b); + const float32x2_t invB = VRECIPE(b); + return vmulq_lane_f32(a, invB, 0); +} + +PX_FORCE_INLINE Vec4V V4DivFast(const Vec4V a, const Vec4V b) +{ + const float32x4_t invB = VRECIPEQ(b); + return vmulq_f32(a, invB); +} + +PX_FORCE_INLINE Vec4V V4Recip(const Vec4V a) +{ + return VRECIPQ(a); +} + +PX_FORCE_INLINE Vec4V V4RecipFast(const Vec4V a) +{ + return VRECIPEQ(a); +} + +PX_FORCE_INLINE Vec4V V4Rsqrt(const Vec4V a) +{ + return VRECIPSQRTQ(a); +} + +PX_FORCE_INLINE Vec4V V4RsqrtFast(const Vec4V a) +{ + return VRECIPSQRTEQ(a); +} + +PX_FORCE_INLINE Vec4V V4Sqrt(const Vec4V a) +{ + return V4Sel(V4IsEq(a, V4Zero()), a, V4Mul(a, VRECIPSQRTQ(a))); +} + +PX_FORCE_INLINE Vec4V V4ScaleAdd(const Vec4V a, const FloatV b, const Vec4V c) +{ + ASSERT_ISVALIDFLOATV(b); + return vmlaq_lane_f32(c, a, b, 0); +} + +PX_FORCE_INLINE Vec4V V4NegScaleSub(const Vec4V a, const FloatV b, const Vec4V c) +{ + ASSERT_ISVALIDFLOATV(b); + return vmlsq_lane_f32(c, a, b, 0); +} + +PX_FORCE_INLINE Vec4V V4MulAdd(const Vec4V a, const Vec4V b, const Vec4V c) +{ + return vmlaq_f32(c, a, b); +} + +PX_FORCE_INLINE Vec4V V4NegMulSub(const Vec4V a, const Vec4V b, const Vec4V c) +{ + return vmlsq_f32(c, a, b); +} + +PX_FORCE_INLINE Vec4V V4Abs(const Vec4V a) +{ + return vabsq_f32(a); +} + +PX_FORCE_INLINE FloatV V4SumElements(const Vec4V a) +{ + const Vec4V xy = V4UnpackXY(a, a); // x,x,y,y + const Vec4V zw = V4UnpackZW(a, a); // z,z,w,w + const Vec4V xz_yw = V4Add(xy, zw); // x+z,x+z,y+w,y+w + const FloatV xz = V4GetX(xz_yw); // x+z + const FloatV yw = V4GetZ(xz_yw); // y+w + return FAdd(xz, yw); // sum +} + +PX_FORCE_INLINE FloatV V4Dot(const Vec4V a, const Vec4V b) +{ + const float32x4_t tmp = vmulq_f32(a, b); + const float32x2_t low = vget_low_f32(tmp); + const float32x2_t high = vget_high_f32(tmp); + + const float32x2_t sumTmp = vpadd_f32(low, high); // = {z+w, x+y} + const float32x2_t sumWZYX = vpadd_f32(sumTmp, sumTmp); // = {x+y+z+w, x+y+z+w} + return sumWZYX; +} + +PX_FORCE_INLINE FloatV V4Dot3(const Vec4V aa, const Vec4V bb) +{ + // PT: the V3Dot code relies on the fact that W=0 so we can't reuse it as-is, we need to clear W first. + // TODO: find a better implementation that does not need to clear W. + const Vec4V a = V4ClearW(aa); + const Vec4V b = V4ClearW(bb); + + const float32x4_t tmp = vmulq_f32(a, b); + const float32x2_t low = vget_low_f32(tmp); + const float32x2_t high = vget_high_f32(tmp); + + const float32x2_t sumTmp = vpadd_f32(low, high); // = {0+z, x+y} + const float32x2_t sum0ZYX = vpadd_f32(sumTmp, sumTmp); // = {x+y+z, x+y+z} + return sum0ZYX; +} + +PX_FORCE_INLINE Vec4V V4Cross(const Vec4V a, const Vec4V b) +{ + const uint32x2_t TF = { 0xffffFFFF, 0x0 }; + const float32x2_t ay_ax = vget_low_f32(a); // d2 + const float32x2_t aw_az = vget_high_f32(a); // d3 + const float32x2_t by_bx = vget_low_f32(b); // d4 + const float32x2_t bw_bz = vget_high_f32(b); // d5 + // Hi, Lo + const float32x2_t bz_by = vext_f32(by_bx, bw_bz, 1); // bz, by + const float32x2_t az_ay = vext_f32(ay_ax, aw_az, 1); // az, ay + + const float32x2_t azbx = vmul_f32(aw_az, by_bx); // 0, az*bx + const float32x2_t aybz_axby = vmul_f32(ay_ax, bz_by); // ay*bz, ax*by + + const float32x2_t azbxSUBaxbz = vmls_f32(azbx, bw_bz, ay_ax); // 0, az*bx-ax*bz + const float32x2_t aybzSUBazby_axbySUBaybx = vmls_f32(aybz_axby, by_bx, az_ay); // ay*bz-az*by, ax*by-ay*bx + + const float32x2_t retLow = vext_f32(aybzSUBazby_axbySUBaybx, azbxSUBaxbz, 1); // az*bx-ax*bz, ay*bz-az*by + const uint32x2_t retHigh = vand_u32(TF, vreinterpret_u32_f32(aybzSUBazby_axbySUBaybx)); // 0, ax*by-ay*bx + + return vcombine_f32(retLow, vreinterpret_f32_u32(retHigh)); +} + +PX_FORCE_INLINE FloatV V4Length(const Vec4V a) +{ + const float32x4_t tmp = vmulq_f32(a, a); + const float32x2_t low = vget_low_f32(tmp); + const float32x2_t high = vget_high_f32(tmp); + + const float32x2_t sumTmp = vpadd_f32(low, high); // = {0+z, x+y} + const float32x2_t sumWZYX = vpadd_f32(sumTmp, sumTmp); // = {x+y+z, x+y+z} + return FSqrt(sumWZYX); +} + +PX_FORCE_INLINE FloatV V4LengthSq(const Vec4V a) +{ + return V4Dot(a, a); +} + +PX_FORCE_INLINE Vec4V V4Normalize(const Vec4V a) +{ + //PX_ASSERT(!FAllEq(V4LengthSq(a), FZero())); + return V4ScaleInv(a, V4Length(a)); +} + +PX_FORCE_INLINE Vec4V V4NormalizeFast(const Vec4V a) +{ + //PX_ASSERT(!FAllEq(V4LengthSq(a), FZero())); + return V4Scale(a, FRsqrtFast(V4Dot(a, a))); +} + +PX_FORCE_INLINE Vec4V V4NormalizeSafe(const Vec4V a, const Vec4V unsafeReturnValue) +{ + const FloatV zero = FZero(); + const FloatV length = V4Length(a); + const uint32x4_t isGreaterThanZero = FIsGrtr(length, zero); + return V4Sel(isGreaterThanZero, V4ScaleInv(a, length), unsafeReturnValue); +} + +PX_FORCE_INLINE BoolV V4IsEqU32(const VecU32V a, const VecU32V b) +{ + return vceqq_u32(a, b); +} + +PX_FORCE_INLINE Vec4V V4Sel(const BoolV c, const Vec4V a, const Vec4V b) +{ + return vbslq_f32(c, a, b); +} + +PX_FORCE_INLINE BoolV V4IsGrtr(const Vec4V a, const Vec4V b) +{ + return vcgtq_f32(a, b); +} + +PX_FORCE_INLINE BoolV V4IsGrtrOrEq(const Vec4V a, const Vec4V b) +{ + return vcgeq_f32(a, b); +} + +PX_FORCE_INLINE BoolV V4IsEq(const Vec4V a, const Vec4V b) +{ + return vceqq_f32(a, b); +} + +PX_FORCE_INLINE Vec4V V4Max(const Vec4V a, const Vec4V b) +{ + return vmaxq_f32(a, b); +} + +PX_FORCE_INLINE Vec4V V4Min(const Vec4V a, const Vec4V b) +{ + return vminq_f32(a, b); +} + +PX_FORCE_INLINE FloatV V4ExtractMax(const Vec4V a) +{ + const float32x2_t low = vget_low_f32(a); + const float32x2_t high = vget_high_f32(a); + + const float32x2_t max0 = vpmax_f32(high, low); + const float32x2_t max1 = vpmax_f32(max0, max0); + + return max1; +} + +PX_FORCE_INLINE FloatV V4ExtractMin(const Vec4V a) +{ + const float32x2_t low = vget_low_f32(a); + const float32x2_t high = vget_high_f32(a); + + const float32x2_t min0 = vpmin_f32(high, low); + const float32x2_t min1 = vpmin_f32(min0, min0); + + return min1; +} + +PX_FORCE_INLINE Vec4V V4Clamp(const Vec4V a, const Vec4V minV, const Vec4V maxV) +{ + return V4Max(V4Min(a, maxV), minV); +} + +PX_FORCE_INLINE PxU32 V4AllGrtr(const Vec4V a, const Vec4V b) +{ + return internalUnitNeonSimd::BAllTrue4_R(V4IsGrtr(a, b)); +} + +PX_FORCE_INLINE PxU32 V4AllGrtrOrEq(const Vec4V a, const Vec4V b) +{ + return internalUnitNeonSimd::BAllTrue4_R(V4IsGrtrOrEq(a, b)); +} + +PX_FORCE_INLINE PxU32 V4AllGrtrOrEq3(const Vec4V a, const Vec4V b) +{ + return internalUnitNeonSimd::BAllTrue3_R(V4IsGrtrOrEq(a, b)); +} + +PX_FORCE_INLINE PxU32 V4AllEq(const Vec4V a, const Vec4V b) +{ + return internalUnitNeonSimd::BAllTrue4_R(V4IsEq(a, b)); +} + +PX_FORCE_INLINE PxU32 V4AnyGrtr3(const Vec4V a, const Vec4V b) +{ + return internalUnitNeonSimd::BAnyTrue3_R(V4IsGrtr(a, b)); +} + +PX_FORCE_INLINE Vec4V V4Round(const Vec4V a) +{ + // truncate(a + (0.5f - sign(a))) + const Vec4V half = V4Load(0.5f); + const float32x4_t sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(a), 31))); + const Vec4V aPlusHalf = V4Add(a, half); + const Vec4V aRound = V4Sub(aPlusHalf, sign); + return vcvtq_f32_s32(vcvtq_s32_f32(aRound)); +} + +PX_FORCE_INLINE Vec4V V4Sin(const Vec4V a) +{ + const Vec4V recipTwoPi = V4LoadA(g_PXReciprocalTwoPi.f); + const Vec4V twoPi = V4LoadA(g_PXTwoPi.f); + const Vec4V tmp = V4Mul(a, recipTwoPi); + const Vec4V b = V4Round(tmp); + const Vec4V V1 = V4NegMulSub(twoPi, b, a); + + // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - + // V^15 / 15! + V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI) + const Vec4V V2 = V4Mul(V1, V1); + const Vec4V V3 = V4Mul(V2, V1); + const Vec4V V5 = V4Mul(V3, V2); + const Vec4V V7 = V4Mul(V5, V2); + const Vec4V V9 = V4Mul(V7, V2); + const Vec4V V11 = V4Mul(V9, V2); + const Vec4V V13 = V4Mul(V11, V2); + const Vec4V V15 = V4Mul(V13, V2); + const Vec4V V17 = V4Mul(V15, V2); + const Vec4V V19 = V4Mul(V17, V2); + const Vec4V V21 = V4Mul(V19, V2); + const Vec4V V23 = V4Mul(V21, V2); + + const Vec4V sinCoefficients0 = V4LoadA(g_PXSinCoefficients0.f); + const Vec4V sinCoefficients1 = V4LoadA(g_PXSinCoefficients1.f); + const Vec4V sinCoefficients2 = V4LoadA(g_PXSinCoefficients2.f); + + const FloatV S1 = V4GetY(sinCoefficients0); + const FloatV S2 = V4GetZ(sinCoefficients0); + const FloatV S3 = V4GetW(sinCoefficients0); + const FloatV S4 = V4GetX(sinCoefficients1); + const FloatV S5 = V4GetY(sinCoefficients1); + const FloatV S6 = V4GetZ(sinCoefficients1); + const FloatV S7 = V4GetW(sinCoefficients1); + const FloatV S8 = V4GetX(sinCoefficients2); + const FloatV S9 = V4GetY(sinCoefficients2); + const FloatV S10 = V4GetZ(sinCoefficients2); + const FloatV S11 = V4GetW(sinCoefficients2); + + Vec4V Result; + Result = V4ScaleAdd(V3, S1, V1); + Result = V4ScaleAdd(V5, S2, Result); + Result = V4ScaleAdd(V7, S3, Result); + Result = V4ScaleAdd(V9, S4, Result); + Result = V4ScaleAdd(V11, S5, Result); + Result = V4ScaleAdd(V13, S6, Result); + Result = V4ScaleAdd(V15, S7, Result); + Result = V4ScaleAdd(V17, S8, Result); + Result = V4ScaleAdd(V19, S9, Result); + Result = V4ScaleAdd(V21, S10, Result); + Result = V4ScaleAdd(V23, S11, Result); + + return Result; +} + +PX_FORCE_INLINE Vec4V V4Cos(const Vec4V a) +{ + const Vec4V recipTwoPi = V4LoadA(g_PXReciprocalTwoPi.f); + const Vec4V twoPi = V4LoadA(g_PXTwoPi.f); + const Vec4V tmp = V4Mul(a, recipTwoPi); + const Vec4V b = V4Round(tmp); + const Vec4V V1 = V4NegMulSub(twoPi, b, a); + + // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + V^12 / 12! - + // V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI) + const Vec4V V2 = V4Mul(V1, V1); + const Vec4V V4 = V4Mul(V2, V2); + const Vec4V V6 = V4Mul(V4, V2); + const Vec4V V8 = V4Mul(V4, V4); + const Vec4V V10 = V4Mul(V6, V4); + const Vec4V V12 = V4Mul(V6, V6); + const Vec4V V14 = V4Mul(V8, V6); + const Vec4V V16 = V4Mul(V8, V8); + const Vec4V V18 = V4Mul(V10, V8); + const Vec4V V20 = V4Mul(V10, V10); + const Vec4V V22 = V4Mul(V12, V10); + + const Vec4V cosCoefficients0 = V4LoadA(g_PXCosCoefficients0.f); + const Vec4V cosCoefficients1 = V4LoadA(g_PXCosCoefficients1.f); + const Vec4V cosCoefficients2 = V4LoadA(g_PXCosCoefficients2.f); + + const FloatV C1 = V4GetY(cosCoefficients0); + const FloatV C2 = V4GetZ(cosCoefficients0); + const FloatV C3 = V4GetW(cosCoefficients0); + const FloatV C4 = V4GetX(cosCoefficients1); + const FloatV C5 = V4GetY(cosCoefficients1); + const FloatV C6 = V4GetZ(cosCoefficients1); + const FloatV C7 = V4GetW(cosCoefficients1); + const FloatV C8 = V4GetX(cosCoefficients2); + const FloatV C9 = V4GetY(cosCoefficients2); + const FloatV C10 = V4GetZ(cosCoefficients2); + const FloatV C11 = V4GetW(cosCoefficients2); + + Vec4V Result; + Result = V4ScaleAdd(V2, C1, V4One()); + Result = V4ScaleAdd(V4, C2, Result); + Result = V4ScaleAdd(V6, C3, Result); + Result = V4ScaleAdd(V8, C4, Result); + Result = V4ScaleAdd(V10, C5, Result); + Result = V4ScaleAdd(V12, C6, Result); + Result = V4ScaleAdd(V14, C7, Result); + Result = V4ScaleAdd(V16, C8, Result); + Result = V4ScaleAdd(V18, C9, Result); + Result = V4ScaleAdd(V20, C10, Result); + Result = V4ScaleAdd(V22, C11, Result); + + return Result; +} + +PX_FORCE_INLINE void V4Transpose(Vec4V& col0, Vec4V& col1, Vec4V& col2, Vec4V& col3) +{ + const float32x4x2_t v0v1 = vzipq_f32(col0, col2); + const float32x4x2_t v2v3 = vzipq_f32(col1, col3); + const float32x4x2_t zip0 = vzipq_f32(v0v1.val[0], v2v3.val[0]); + const float32x4x2_t zip1 = vzipq_f32(v0v1.val[1], v2v3.val[1]); + col0 = zip0.val[0]; + col1 = zip0.val[1]; + col2 = zip1.val[0]; + col3 = zip1.val[1]; +} + +////////////////////////////////// +// VEC4V +////////////////////////////////// + +PX_FORCE_INLINE BoolV BFFFF() +{ + return vmovq_n_u32(0); +} + +PX_FORCE_INLINE BoolV BFFFT() +{ + const uint32x2_t zeros = vmov_n_u32(0); + const uint32x2_t ones = vmov_n_u32(0xffffFFFF); + const uint32x2_t zo = vext_u32(zeros, ones, 1); + return vcombine_u32(zeros, zo); +} + +PX_FORCE_INLINE BoolV BFFTF() +{ + const uint32x2_t zeros = vmov_n_u32(0); + const uint32x2_t ones = vmov_n_u32(0xffffFFFF); + const uint32x2_t oz = vext_u32(ones, zeros, 1); + return vcombine_u32(zeros, oz); +} + +PX_FORCE_INLINE BoolV BFFTT() +{ + const uint32x2_t zeros = vmov_n_u32(0); + const uint32x2_t ones = vmov_n_u32(0xffffFFFF); + return vcombine_u32(zeros, ones); +} + +PX_FORCE_INLINE BoolV BFTFF() +{ + const uint32x2_t zeros = vmov_n_u32(0); + const uint32x2_t ones = vmov_n_u32(0xffffFFFF); + const uint32x2_t zo = vext_u32(zeros, ones, 1); + return vcombine_u32(zo, zeros); +} + +PX_FORCE_INLINE BoolV BFTFT() +{ + const uint32x2_t zeros = vmov_n_u32(0); + const uint32x2_t ones = vmov_n_u32(0xffffFFFF); + const uint32x2_t zo = vext_u32(zeros, ones, 1); + return vcombine_u32(zo, zo); +} + +PX_FORCE_INLINE BoolV BFTTF() +{ + const uint32x2_t zeros = vmov_n_u32(0); + const uint32x2_t ones = vmov_n_u32(0xffffFFFF); + const uint32x2_t zo = vext_u32(zeros, ones, 1); + const uint32x2_t oz = vext_u32(ones, zeros, 1); + return vcombine_u32(zo, oz); +} + +PX_FORCE_INLINE BoolV BFTTT() +{ + const uint32x2_t zeros = vmov_n_u32(0); + const uint32x2_t ones = vmov_n_u32(0xffffFFFF); + const uint32x2_t zo = vext_u32(zeros, ones, 1); + return vcombine_u32(zo, ones); +} + +PX_FORCE_INLINE BoolV BTFFF() +{ + const uint32x2_t zeros = vmov_n_u32(0); + const uint32x2_t ones = vmov_n_u32(0xffffFFFF); + // const uint32x2_t zo = vext_u32(zeros, ones, 1); + const uint32x2_t oz = vext_u32(ones, zeros, 1); + return vcombine_u32(oz, zeros); +} + +PX_FORCE_INLINE BoolV BTFFT() +{ + const uint32x2_t zeros = vmov_n_u32(0); + const uint32x2_t ones = vmov_n_u32(0xffffFFFF); + const uint32x2_t zo = vext_u32(zeros, ones, 1); + const uint32x2_t oz = vext_u32(ones, zeros, 1); + return vcombine_u32(oz, zo); +} + +PX_FORCE_INLINE BoolV BTFTF() +{ + const uint32x2_t zeros = vmov_n_u32(0); + const uint32x2_t ones = vmov_n_u32(0xffffFFFF); + const uint32x2_t oz = vext_u32(ones, zeros, 1); + return vcombine_u32(oz, oz); +} + +PX_FORCE_INLINE BoolV BTFTT() +{ + const uint32x2_t zeros = vmov_n_u32(0); + const uint32x2_t ones = vmov_n_u32(0xffffFFFF); + const uint32x2_t oz = vext_u32(ones, zeros, 1); + return vcombine_u32(oz, ones); +} + +PX_FORCE_INLINE BoolV BTTFF() +{ + const uint32x2_t zeros = vmov_n_u32(0); + const uint32x2_t ones = vmov_n_u32(0xffffFFFF); + return vcombine_u32(ones, zeros); +} + +PX_FORCE_INLINE BoolV BTTFT() +{ + const uint32x2_t zeros = vmov_n_u32(0); + const uint32x2_t ones = vmov_n_u32(0xffffFFFF); + const uint32x2_t zo = vext_u32(zeros, ones, 1); + return vcombine_u32(ones, zo); +} + +PX_FORCE_INLINE BoolV BTTTF() +{ + const uint32x2_t zeros = vmov_n_u32(0); + const uint32x2_t ones = vmov_n_u32(0xffffFFFF); + const uint32x2_t oz = vext_u32(ones, zeros, 1); + return vcombine_u32(ones, oz); +} + +PX_FORCE_INLINE BoolV BTTTT() +{ + return vmovq_n_u32(0xffffFFFF); +} + +PX_FORCE_INLINE BoolV BXMask() +{ + return BTFFF(); +} + +PX_FORCE_INLINE BoolV BYMask() +{ + return BFTFF(); +} + +PX_FORCE_INLINE BoolV BZMask() +{ + return BFFTF(); +} + +PX_FORCE_INLINE BoolV BWMask() +{ + return BFFFT(); +} + +PX_FORCE_INLINE BoolV BGetX(const BoolV f) +{ + const uint32x2_t fLow = vget_low_u32(f); + return vdupq_lane_u32(fLow, 0); +} + +PX_FORCE_INLINE BoolV BGetY(const BoolV f) +{ + const uint32x2_t fLow = vget_low_u32(f); + return vdupq_lane_u32(fLow, 1); +} + +PX_FORCE_INLINE BoolV BGetZ(const BoolV f) +{ + const uint32x2_t fHigh = vget_high_u32(f); + return vdupq_lane_u32(fHigh, 0); +} + +PX_FORCE_INLINE BoolV BGetW(const BoolV f) +{ + const uint32x2_t fHigh = vget_high_u32(f); + return vdupq_lane_u32(fHigh, 1); +} + +PX_FORCE_INLINE BoolV BSetX(const BoolV v, const BoolV f) +{ + return vbslq_u32(BFTTT(), v, f); +} + +PX_FORCE_INLINE BoolV BSetY(const BoolV v, const BoolV f) +{ + return vbslq_u32(BTFTT(), v, f); +} + +PX_FORCE_INLINE BoolV BSetZ(const BoolV v, const BoolV f) +{ + return vbslq_u32(BTTFT(), v, f); +} + +PX_FORCE_INLINE BoolV BSetW(const BoolV v, const BoolV f) +{ + return vbslq_u32(BTTTF(), v, f); +} + +PX_FORCE_INLINE BoolV BAnd(const BoolV a, const BoolV b) +{ + return vandq_u32(a, b); +} + +PX_FORCE_INLINE BoolV BNot(const BoolV a) +{ + return vmvnq_u32(a); +} + +PX_FORCE_INLINE BoolV BAndNot(const BoolV a, const BoolV b) +{ + // return vbicq_u32(a, b); + return vandq_u32(a, vmvnq_u32(b)); +} + +PX_FORCE_INLINE BoolV BOr(const BoolV a, const BoolV b) +{ + return vorrq_u32(a, b); +} + +PX_FORCE_INLINE BoolV BAllTrue4(const BoolV a) +{ + const uint32x2_t allTrue = vmov_n_u32(0xffffFFFF); + const uint16x4_t dHigh = vget_high_u16(vreinterpretq_u16_u32(a)); + const uint16x4_t dLow = vmovn_u32(a); + uint16x8_t combined = vcombine_u16(dLow, dHigh); + const uint32x2_t finalReduce = vreinterpret_u32_u8(vmovn_u16(combined)); + const uint32x2_t result = vceq_u32(finalReduce, allTrue); + return vdupq_lane_u32(result, 0); +} + +PX_FORCE_INLINE BoolV BAnyTrue4(const BoolV a) +{ + const uint32x2_t allTrue = vmov_n_u32(0xffffFFFF); + const uint16x4_t dHigh = vget_high_u16(vreinterpretq_u16_u32(a)); + const uint16x4_t dLow = vmovn_u32(a); + uint16x8_t combined = vcombine_u16(dLow, dHigh); + const uint32x2_t finalReduce = vreinterpret_u32_u8(vmovn_u16(combined)); + const uint32x2_t result = vtst_u32(finalReduce, allTrue); + return vdupq_lane_u32(result, 0); +} + +PX_FORCE_INLINE BoolV BAllTrue3(const BoolV a) +{ + const uint32x2_t allTrue3 = vmov_n_u32(0x00ffFFFF); + const uint16x4_t dHigh = vget_high_u16(vreinterpretq_u16_u32(a)); + const uint16x4_t dLow = vmovn_u32(a); + uint16x8_t combined = vcombine_u16(dLow, dHigh); + const uint32x2_t finalReduce = vreinterpret_u32_u8(vmovn_u16(combined)); + const uint32x2_t result = vceq_u32(vand_u32(finalReduce, allTrue3), allTrue3); + return vdupq_lane_u32(result, 0); +} + +PX_FORCE_INLINE BoolV BAnyTrue3(const BoolV a) +{ + const uint32x2_t allTrue3 = vmov_n_u32(0x00ffFFFF); + const uint16x4_t dHigh = vget_high_u16(vreinterpretq_u16_u32(a)); + const uint16x4_t dLow = vmovn_u32(a); + uint16x8_t combined = vcombine_u16(dLow, dHigh); + const uint32x2_t finalReduce = vreinterpret_u32_u8(vmovn_u16(combined)); + const uint32x2_t result = vtst_u32(vand_u32(finalReduce, allTrue3), allTrue3); + return vdupq_lane_u32(result, 0); +} + +PX_FORCE_INLINE PxU32 BAllEq(const BoolV a, const BoolV b) +{ + const BoolV bTest = vceqq_u32(a, b); + return internalUnitNeonSimd::BAllTrue4_R(bTest); +} + +PX_FORCE_INLINE PxU32 BAllEqTTTT(const BoolV a) +{ + return BAllEq(a, BTTTT()); +} + +PX_FORCE_INLINE PxU32 BAllEqFFFF(const BoolV a) +{ + return BAllEq(a, BFFFF()); +} + +PX_FORCE_INLINE PxU32 BGetBitMask(const BoolV a) +{ + static PX_ALIGN(16, const PxU32) bitMaskData[4] = { 1, 2, 4, 8 }; + const uint32x4_t bitMask = *(reinterpret_cast<const uint32x4_t*>(bitMaskData)); + const uint32x4_t t0 = vandq_u32(a, bitMask); + const uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0)); // Pairwise add (0 + 1), (2 + 3) + return PxU32(vget_lane_u32(vpadd_u32(t1, t1), 0)); +} + +////////////////////////////////// +// MAT33V +////////////////////////////////// + +PX_FORCE_INLINE Vec3V M33MulV3(const Mat33V& a, const Vec3V b) +{ + const FloatV x = V3GetX(b); + const FloatV y = V3GetY(b); + const FloatV z = V3GetZ(b); + const Vec3V v0 = V3Scale(a.col0, x); + const Vec3V v1 = V3Scale(a.col1, y); + const Vec3V v2 = V3Scale(a.col2, z); + const Vec3V v0PlusV1 = V3Add(v0, v1); + return V3Add(v0PlusV1, v2); +} + +PX_FORCE_INLINE Vec3V M33TrnspsMulV3(const Mat33V& a, const Vec3V b) +{ + const FloatV x = V3Dot(a.col0, b); + const FloatV y = V3Dot(a.col1, b); + const FloatV z = V3Dot(a.col2, b); + return V3Merge(x, y, z); +} + +PX_FORCE_INLINE Vec3V M33MulV3AddV3(const Mat33V& A, const Vec3V b, const Vec3V c) +{ + const FloatV x = V3GetX(b); + const FloatV y = V3GetY(b); + const FloatV z = V3GetZ(b); + Vec3V result = V3ScaleAdd(A.col0, x, c); + result = V3ScaleAdd(A.col1, y, result); + return V3ScaleAdd(A.col2, z, result); +} + +PX_FORCE_INLINE Mat33V M33MulM33(const Mat33V& a, const Mat33V& b) +{ + return Mat33V(M33MulV3(a, b.col0), M33MulV3(a, b.col1), M33MulV3(a, b.col2)); +} + +PX_FORCE_INLINE Mat33V M33Add(const Mat33V& a, const Mat33V& b) +{ + return Mat33V(V3Add(a.col0, b.col0), V3Add(a.col1, b.col1), V3Add(a.col2, b.col2)); +} + +PX_FORCE_INLINE Mat33V M33Scale(const Mat33V& a, const FloatV& b) +{ + return Mat33V(V3Scale(a.col0, b), V3Scale(a.col1, b), V3Scale(a.col2, b)); +} + +PX_FORCE_INLINE Mat33V M33Inverse(const Mat33V& a) +{ + const float32x2_t zeros = vreinterpret_f32_u32(vmov_n_u32(0)); + const BoolV btttf = BTTTF(); + + const Vec3V cross01 = V3Cross(a.col0, a.col1); + const Vec3V cross12 = V3Cross(a.col1, a.col2); + const Vec3V cross20 = V3Cross(a.col2, a.col0); + const FloatV dot = V3Dot(cross01, a.col2); + const FloatV invDet = FRecipFast(dot); + + const float32x4x2_t merge = vzipq_f32(cross12, cross01); + const float32x4_t mergeh = merge.val[0]; + const float32x4_t mergel = merge.val[1]; + + // const Vec3V colInv0 = XMVectorPermute(mergeh,cross20,PxPermuteControl(0,4,1,7)); + const float32x4_t colInv0_xxyy = vzipq_f32(mergeh, cross20).val[0]; + const float32x4_t colInv0 = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(colInv0_xxyy), btttf)); + + // const Vec3V colInv1 = XMVectorPermute(mergeh,cross20,PxPermuteControl(2,5,3,7)); + const float32x2_t zw0 = vget_high_f32(mergeh); + const float32x2_t xy1 = vget_low_f32(cross20); + const float32x2_t yzero1 = vext_f32(xy1, zeros, 1); + const float32x2x2_t merge1 = vzip_f32(zw0, yzero1); + const float32x4_t colInv1 = vcombine_f32(merge1.val[0], merge1.val[1]); + + // const Vec3V colInv2 = XMVectorPermute(mergel,cross20,PxPermuteControl(0,6,1,7)); + const float32x2_t x0y0 = vget_low_f32(mergel); + const float32x2_t z1w1 = vget_high_f32(cross20); + const float32x2x2_t merge2 = vzip_f32(x0y0, z1w1); + const float32x4_t colInv2 = vcombine_f32(merge2.val[0], merge2.val[1]); + + return Mat33V(vmulq_lane_f32(colInv0, invDet, 0), vmulq_lane_f32(colInv1, invDet, 0), + vmulq_lane_f32(colInv2, invDet, 0)); +} + +PX_FORCE_INLINE Mat33V M33Trnsps(const Mat33V& a) +{ + return Mat33V(V3Merge(V3GetX(a.col0), V3GetX(a.col1), V3GetX(a.col2)), + V3Merge(V3GetY(a.col0), V3GetY(a.col1), V3GetY(a.col2)), + V3Merge(V3GetZ(a.col0), V3GetZ(a.col1), V3GetZ(a.col2))); +} + +PX_FORCE_INLINE Mat33V M33Identity() +{ + return Mat33V(V3UnitX(), V3UnitY(), V3UnitZ()); +} + +PX_FORCE_INLINE Mat33V M33Sub(const Mat33V& a, const Mat33V& b) +{ + return Mat33V(V3Sub(a.col0, b.col0), V3Sub(a.col1, b.col1), V3Sub(a.col2, b.col2)); +} + +PX_FORCE_INLINE Mat33V M33Neg(const Mat33V& a) +{ + return Mat33V(V3Neg(a.col0), V3Neg(a.col1), V3Neg(a.col2)); +} + +PX_FORCE_INLINE Mat33V M33Abs(const Mat33V& a) +{ + return Mat33V(V3Abs(a.col0), V3Abs(a.col1), V3Abs(a.col2)); +} + +PX_FORCE_INLINE Mat33V PromoteVec3V(const Vec3V v) +{ + const BoolV bTFFF = BTFFF(); + const BoolV bFTFF = BFTFF(); + const BoolV bFFTF = BTFTF(); + + const Vec3V zero = V3Zero(); + + return Mat33V(V3Sel(bTFFF, v, zero), V3Sel(bFTFF, v, zero), V3Sel(bFFTF, v, zero)); +} + +PX_FORCE_INLINE Mat33V M33Diagonal(const Vec3VArg d) +{ + const Vec3V x = V3Mul(V3UnitX(), d); + const Vec3V y = V3Mul(V3UnitY(), d); + const Vec3V z = V3Mul(V3UnitZ(), d); + return Mat33V(x, y, z); +} + +////////////////////////////////// +// MAT34V +////////////////////////////////// + +PX_FORCE_INLINE Vec3V M34MulV3(const Mat34V& a, const Vec3V b) +{ + const FloatV x = V3GetX(b); + const FloatV y = V3GetY(b); + const FloatV z = V3GetZ(b); + const Vec3V v0 = V3Scale(a.col0, x); + const Vec3V v1 = V3Scale(a.col1, y); + const Vec3V v2 = V3Scale(a.col2, z); + const Vec3V v0PlusV1 = V3Add(v0, v1); + const Vec3V v0PlusV1Plusv2 = V3Add(v0PlusV1, v2); + return V3Add(v0PlusV1Plusv2, a.col3); +} + +PX_FORCE_INLINE Vec3V M34Mul33V3(const Mat34V& a, const Vec3V b) +{ + const FloatV x = V3GetX(b); + const FloatV y = V3GetY(b); + const FloatV z = V3GetZ(b); + const Vec3V v0 = V3Scale(a.col0, x); + const Vec3V v1 = V3Scale(a.col1, y); + const Vec3V v2 = V3Scale(a.col2, z); + const Vec3V v0PlusV1 = V3Add(v0, v1); + return V3Add(v0PlusV1, v2); +} + +PX_FORCE_INLINE Vec3V M34TrnspsMul33V3(const Mat34V& a, const Vec3V b) +{ + const FloatV x = V3Dot(a.col0, b); + const FloatV y = V3Dot(a.col1, b); + const FloatV z = V3Dot(a.col2, b); + return V3Merge(x, y, z); +} + +PX_FORCE_INLINE Mat34V M34MulM34(const Mat34V& a, const Mat34V& b) +{ + return Mat34V(M34Mul33V3(a, b.col0), M34Mul33V3(a, b.col1), M34Mul33V3(a, b.col2), M34MulV3(a, b.col3)); +} + +PX_FORCE_INLINE Mat33V M34MulM33(const Mat34V& a, const Mat33V& b) +{ + return Mat33V(M34Mul33V3(a, b.col0), M34Mul33V3(a, b.col1), M34Mul33V3(a, b.col2)); +} + +PX_FORCE_INLINE Mat33V M34Mul33MM34(const Mat34V& a, const Mat34V& b) +{ + return Mat33V(M34Mul33V3(a, b.col0), M34Mul33V3(a, b.col1), M34Mul33V3(a, b.col2)); +} + +PX_FORCE_INLINE Mat34V M34Add(const Mat34V& a, const Mat34V& b) +{ + return Mat34V(V3Add(a.col0, b.col0), V3Add(a.col1, b.col1), V3Add(a.col2, b.col2), V3Add(a.col3, b.col3)); +} + +PX_FORCE_INLINE Mat33V M34Trnsps33(const Mat34V& a) +{ + return Mat33V(V3Merge(V3GetX(a.col0), V3GetX(a.col1), V3GetX(a.col2)), + V3Merge(V3GetY(a.col0), V3GetY(a.col1), V3GetY(a.col2)), + V3Merge(V3GetZ(a.col0), V3GetZ(a.col1), V3GetZ(a.col2))); +} + +////////////////////////////////// +// MAT44V +////////////////////////////////// + +PX_FORCE_INLINE Vec4V M44MulV4(const Mat44V& a, const Vec4V b) +{ + const FloatV x = V4GetX(b); + const FloatV y = V4GetY(b); + const FloatV z = V4GetZ(b); + const FloatV w = V4GetW(b); + + const Vec4V v0 = V4Scale(a.col0, x); + const Vec4V v1 = V4Scale(a.col1, y); + const Vec4V v2 = V4Scale(a.col2, z); + const Vec4V v3 = V4Scale(a.col3, w); + const Vec4V v0PlusV1 = V4Add(v0, v1); + const Vec4V v0PlusV1Plusv2 = V4Add(v0PlusV1, v2); + return V4Add(v0PlusV1Plusv2, v3); +} + +PX_FORCE_INLINE Vec4V M44TrnspsMulV4(const Mat44V& a, const Vec4V b) +{ + return V4Merge(V4Dot(a.col0, b), V4Dot(a.col1, b), V4Dot(a.col2, b), V4Dot(a.col3, b)); +} + +PX_FORCE_INLINE Mat44V M44MulM44(const Mat44V& a, const Mat44V& b) +{ + return Mat44V(M44MulV4(a, b.col0), M44MulV4(a, b.col1), M44MulV4(a, b.col2), M44MulV4(a, b.col3)); +} + +PX_FORCE_INLINE Mat44V M44Add(const Mat44V& a, const Mat44V& b) +{ + return Mat44V(V4Add(a.col0, b.col0), V4Add(a.col1, b.col1), V4Add(a.col2, b.col2), V4Add(a.col3, b.col3)); +} + +PX_FORCE_INLINE Mat44V M44Trnsps(const Mat44V& a) +{ + // asm volatile( + // "vzip.f32 %q0, %q2 \n\t" + // "vzip.f32 %q1, %q3 \n\t" + // "vzip.f32 %q0, %q1 \n\t" + // "vzip.f32 %q2, %q3 \n\t" + // : "+w" (a.col0), "+w" (a.col1), "+w" (a.col2), "+w" a.col3)); + + const float32x4x2_t v0v1 = vzipq_f32(a.col0, a.col2); + const float32x4x2_t v2v3 = vzipq_f32(a.col1, a.col3); + const float32x4x2_t zip0 = vzipq_f32(v0v1.val[0], v2v3.val[0]); + const float32x4x2_t zip1 = vzipq_f32(v0v1.val[1], v2v3.val[1]); + + return Mat44V(zip0.val[0], zip0.val[1], zip1.val[0], zip1.val[1]); +} + +PX_FORCE_INLINE Mat44V M44Inverse(const Mat44V& a) +{ + float32x4_t minor0, minor1, minor2, minor3; + float32x4_t row0, row1, row2, row3; + float32x4_t det, tmp1; + + tmp1 = vmovq_n_f32(0.0f); + row1 = vmovq_n_f32(0.0f); + row3 = vmovq_n_f32(0.0f); + + row0 = a.col0; + row1 = vextq_f32(a.col1, a.col1, 2); + row2 = a.col2; + row3 = vextq_f32(a.col3, a.col3, 2); + + tmp1 = vmulq_f32(row2, row3); + tmp1 = vrev64q_f32(tmp1); + minor0 = vmulq_f32(row1, tmp1); + minor1 = vmulq_f32(row0, tmp1); + tmp1 = vextq_f32(tmp1, tmp1, 2); + minor0 = vsubq_f32(vmulq_f32(row1, tmp1), minor0); + minor1 = vsubq_f32(vmulq_f32(row0, tmp1), minor1); + minor1 = vextq_f32(minor1, minor1, 2); + + tmp1 = vmulq_f32(row1, row2); + tmp1 = vrev64q_f32(tmp1); + minor0 = vaddq_f32(vmulq_f32(row3, tmp1), minor0); + minor3 = vmulq_f32(row0, tmp1); + tmp1 = vextq_f32(tmp1, tmp1, 2); + minor0 = vsubq_f32(minor0, vmulq_f32(row3, tmp1)); + minor3 = vsubq_f32(vmulq_f32(row0, tmp1), minor3); + minor3 = vextq_f32(minor3, minor3, 2); + + tmp1 = vmulq_f32(vextq_f32(row1, row1, 2), row3); + tmp1 = vrev64q_f32(tmp1); + row2 = vextq_f32(row2, row2, 2); + minor0 = vaddq_f32(vmulq_f32(row2, tmp1), minor0); + minor2 = vmulq_f32(row0, tmp1); + tmp1 = vextq_f32(tmp1, tmp1, 2); + minor0 = vsubq_f32(minor0, vmulq_f32(row2, tmp1)); + minor2 = vsubq_f32(vmulq_f32(row0, tmp1), minor2); + minor2 = vextq_f32(minor2, minor2, 2); + + tmp1 = vmulq_f32(row0, row1); + tmp1 = vrev64q_f32(tmp1); + minor2 = vaddq_f32(vmulq_f32(row3, tmp1), minor2); + minor3 = vsubq_f32(vmulq_f32(row2, tmp1), minor3); + tmp1 = vextq_f32(tmp1, tmp1, 2); + minor2 = vsubq_f32(vmulq_f32(row3, tmp1), minor2); + minor3 = vsubq_f32(minor3, vmulq_f32(row2, tmp1)); + + tmp1 = vmulq_f32(row0, row3); + tmp1 = vrev64q_f32(tmp1); + minor1 = vsubq_f32(minor1, vmulq_f32(row2, tmp1)); + minor2 = vaddq_f32(vmulq_f32(row1, tmp1), minor2); + tmp1 = vextq_f32(tmp1, tmp1, 2); + minor1 = vaddq_f32(vmulq_f32(row2, tmp1), minor1); + minor2 = vsubq_f32(minor2, vmulq_f32(row1, tmp1)); + + tmp1 = vmulq_f32(row0, row2); + tmp1 = vrev64q_f32(tmp1); + minor1 = vaddq_f32(vmulq_f32(row3, tmp1), minor1); + minor3 = vsubq_f32(minor3, vmulq_f32(row1, tmp1)); + tmp1 = vextq_f32(tmp1, tmp1, 2); + minor1 = vsubq_f32(minor1, vmulq_f32(row3, tmp1)); + minor3 = vaddq_f32(vmulq_f32(row1, tmp1), minor3); + + det = vmulq_f32(row0, minor0); + det = vaddq_f32(vextq_f32(det, det, 2), det); + det = vaddq_f32(vrev64q_f32(det), det); + det = vdupq_lane_f32(VRECIPE(vget_low_f32(det)), 0); + + minor0 = vmulq_f32(det, minor0); + minor1 = vmulq_f32(det, minor1); + minor2 = vmulq_f32(det, minor2); + minor3 = vmulq_f32(det, minor3); + Mat44V invTrans(minor0, minor1, minor2, minor3); + return M44Trnsps(invTrans); +} + +PX_FORCE_INLINE Vec4V V4LoadXYZW(const PxF32& x, const PxF32& y, const PxF32& z, const PxF32& w) +{ + const float32x4_t ret = { x, y, z, w }; + return ret; +} + +/* +PX_FORCE_INLINE VecU16V V4U32PK(VecU32V a, VecU32V b) +{ + return vcombine_u16(vqmovn_u32(a), vqmovn_u32(b)); +} +*/ + +PX_FORCE_INLINE VecU32V V4U32Sel(const BoolV c, const VecU32V a, const VecU32V b) +{ + return vbslq_u32(c, a, b); +} + +PX_FORCE_INLINE VecU32V V4U32or(VecU32V a, VecU32V b) +{ + return vorrq_u32(a, b); +} + +PX_FORCE_INLINE VecU32V V4U32xor(VecU32V a, VecU32V b) +{ + return veorq_u32(a, b); +} + +PX_FORCE_INLINE VecU32V V4U32and(VecU32V a, VecU32V b) +{ + return vandq_u32(a, b); +} + +PX_FORCE_INLINE VecU32V V4U32Andc(VecU32V a, VecU32V b) +{ + // return vbicq_u32(a, b); // creates gcc compiler bug in RTreeQueries.cpp + return vandq_u32(a, vmvnq_u32(b)); +} + +/* +PX_FORCE_INLINE VecU16V V4U16Or(VecU16V a, VecU16V b) +{ + return vorrq_u16(a, b); +} +*/ + +/* +PX_FORCE_INLINE VecU16V V4U16And(VecU16V a, VecU16V b) +{ + return vandq_u16(a, b); +} +*/ +/* +PX_FORCE_INLINE VecU16V V4U16Andc(VecU16V a, VecU16V b) +{ + return vbicq_u16(a, b); +} +*/ + +PX_FORCE_INLINE VecI32V I4Load(const PxI32 i) +{ + return vdupq_n_s32(i); +} + +PX_FORCE_INLINE VecI32V I4LoadU(const PxI32* i) +{ + return vld1q_s32(i); +} + +PX_FORCE_INLINE VecI32V I4LoadA(const PxI32* i) +{ + return vld1q_s32(i); +} + +PX_FORCE_INLINE VecI32V VecI32V_Add(const VecI32VArg a, const VecI32VArg b) +{ + return vaddq_s32(a, b); +} + +PX_FORCE_INLINE VecI32V VecI32V_Sub(const VecI32VArg a, const VecI32VArg b) +{ + return vsubq_s32(a, b); +} + +PX_FORCE_INLINE BoolV VecI32V_IsGrtr(const VecI32VArg a, const VecI32VArg b) +{ + return vcgtq_s32(a, b); +} + +PX_FORCE_INLINE BoolV VecI32V_IsEq(const VecI32VArg a, const VecI32VArg b) +{ + return vceqq_s32(a, b); +} + +PX_FORCE_INLINE VecI32V V4I32Sel(const BoolV c, const VecI32V a, const VecI32V b) +{ + return vbslq_s32(c, a, b); +} + +PX_FORCE_INLINE VecI32V VecI32V_Zero() +{ + return vdupq_n_s32(0); +} + +PX_FORCE_INLINE VecI32V VecI32V_One() +{ + return vdupq_n_s32(1); +} + +PX_FORCE_INLINE VecI32V VecI32V_Two() +{ + return vdupq_n_s32(2); +} + +PX_FORCE_INLINE VecI32V VecI32V_MinusOne() +{ + return vdupq_n_s32(-1); +} + +PX_FORCE_INLINE VecU32V U4Zero() +{ + return U4Load(0); +} + +PX_FORCE_INLINE VecU32V U4One() +{ + return U4Load(1); +} + +PX_FORCE_INLINE VecU32V U4Two() +{ + return U4Load(2); +} + +PX_FORCE_INLINE VecShiftV VecI32V_PrepareShift(const VecI32VArg shift) +{ + return shift; +} + +PX_FORCE_INLINE VecI32V VecI32V_LeftShift(const VecI32VArg a, const VecShiftVArg count) +{ + return vshlq_s32(a, count); +} + +PX_FORCE_INLINE VecI32V VecI32V_RightShift(const VecI32VArg a, const VecShiftVArg count) +{ + return vshlq_s32(a, VecI32V_Sub(I4Load(0), count)); +} + +PX_FORCE_INLINE VecI32V VecI32V_And(const VecI32VArg a, const VecI32VArg b) +{ + return vandq_s32(a, b); +} + +PX_FORCE_INLINE VecI32V VecI32V_Or(const VecI32VArg a, const VecI32VArg b) +{ + return vorrq_s32(a, b); +} + +PX_FORCE_INLINE VecI32V VecI32V_GetX(const VecI32VArg f) +{ + const int32x2_t fLow = vget_low_s32(f); + return vdupq_lane_s32(fLow, 0); +} + +PX_FORCE_INLINE VecI32V VecI32V_GetY(const VecI32VArg f) +{ + const int32x2_t fLow = vget_low_s32(f); + return vdupq_lane_s32(fLow, 1); +} + +PX_FORCE_INLINE VecI32V VecI32V_GetZ(const VecI32VArg f) +{ + const int32x2_t fHigh = vget_high_s32(f); + return vdupq_lane_s32(fHigh, 0); +} + +PX_FORCE_INLINE VecI32V VecI32V_GetW(const VecI32VArg f) +{ + const int32x2_t fHigh = vget_high_s32(f); + return vdupq_lane_s32(fHigh, 1); +} + +PX_FORCE_INLINE VecI32V VecI32V_Sel(const BoolV c, const VecI32VArg a, const VecI32VArg b) +{ + return vbslq_s32(c, a, b); +} + +PX_FORCE_INLINE void PxI32_From_VecI32V(const VecI32VArg a, PxI32* i) +{ + *i = vgetq_lane_s32(a, 0); +} + +PX_FORCE_INLINE VecI32V VecI32V_Merge(const VecI32VArg a, const VecI32VArg b, const VecI32VArg c, const VecI32VArg d) +{ + const int32x2_t aLow = vget_low_s32(a); + const int32x2_t bLow = vget_low_s32(b); + const int32x2_t cLow = vget_low_s32(c); + const int32x2_t dLow = vget_low_s32(d); + + const int32x2_t low = vext_s32(aLow, bLow, 1); + const int32x2_t high = vext_s32(cLow, dLow, 1); + + return vcombine_s32(low, high); +} + +PX_FORCE_INLINE VecI32V VecI32V_From_BoolV(const BoolVArg a) +{ + return vreinterpretq_s32_u32(a); +} + +PX_FORCE_INLINE VecU32V VecU32V_From_BoolV(const BoolVArg a) +{ + return a; +} + +/* +template<int a> PX_FORCE_INLINE VecI32V V4ISplat() +{ + return vdupq_n_s32(a); +} + +template<PxU32 a> PX_FORCE_INLINE VecU32V V4USplat() +{ + return vdupq_n_u32(a); +} +*/ + +/* +PX_FORCE_INLINE void V4U16StoreAligned(VecU16V val, VecU16V* address) +{ + vst1q_u16((uint16_t*)address, val); +} +*/ + +PX_FORCE_INLINE void V4U32StoreAligned(VecU32V val, VecU32V* address) +{ + vst1q_u32(reinterpret_cast<uint32_t*>(address), val); +} + +PX_FORCE_INLINE Vec4V V4LoadAligned(Vec4V* addr) +{ + return vld1q_f32(reinterpret_cast<float32_t*>(addr)); +} + +PX_FORCE_INLINE Vec4V V4LoadUnaligned(Vec4V* addr) +{ + return vld1q_f32(reinterpret_cast<float32_t*>(addr)); +} + +PX_FORCE_INLINE Vec4V V4Andc(const Vec4V a, const VecU32V b) +{ + return vreinterpretq_f32_u32(V4U32Andc(vreinterpretq_u32_f32(a), b)); +} + +PX_FORCE_INLINE VecU32V V4IsGrtrV32u(const Vec4V a, const Vec4V b) +{ + return V4IsGrtr(a, b); +} + +PX_FORCE_INLINE VecU16V V4U16LoadAligned(VecU16V* addr) +{ + return vld1q_u16(reinterpret_cast<uint16_t*>(addr)); +} + +PX_FORCE_INLINE VecU16V V4U16LoadUnaligned(VecU16V* addr) +{ + return vld1q_u16(reinterpret_cast<uint16_t*>(addr)); +} + +PX_FORCE_INLINE VecU16V V4U16CompareGt(VecU16V a, VecU16V b) +{ + return vcgtq_u16(a, b); +} + +PX_FORCE_INLINE VecU16V V4I16CompareGt(VecI16V a, VecI16V b) +{ + return vcgtq_s16(a, b); +} + +PX_FORCE_INLINE Vec4V Vec4V_From_VecU32V(VecU32V a) +{ + return vcvtq_f32_u32(a); +} + +PX_FORCE_INLINE Vec4V Vec4V_From_VecI32V(VecI32V a) +{ + return vcvtq_f32_s32(a); +} + +PX_FORCE_INLINE VecI32V VecI32V_From_Vec4V(Vec4V a) +{ + return vcvtq_s32_f32(a); +} + +PX_FORCE_INLINE Vec4V Vec4V_ReinterpretFrom_VecU32V(VecU32V a) +{ + return vreinterpretq_f32_u32(a); +} + +PX_FORCE_INLINE Vec4V Vec4V_ReinterpretFrom_VecI32V(VecI32V a) +{ + return vreinterpretq_f32_s32(a); +} + +PX_FORCE_INLINE VecU32V VecU32V_ReinterpretFrom_Vec4V(Vec4V a) +{ + return vreinterpretq_u32_f32(a); +} + +PX_FORCE_INLINE VecI32V VecI32V_ReinterpretFrom_Vec4V(Vec4V a) +{ + return vreinterpretq_s32_f32(a); +} + +template <int index> +PX_FORCE_INLINE BoolV BSplatElement(BoolV a) +{ + if(index < 2) + { + return vdupq_lane_u32(vget_low_u32(a), index); + } + else if(index == 2) + { + return vdupq_lane_u32(vget_high_u32(a), 0); + } + else if(index == 3) + { + return vdupq_lane_u32(vget_high_u32(a), 1); + } +} + +template <int index> +PX_FORCE_INLINE VecU32V V4U32SplatElement(VecU32V a) +{ + if(index < 2) + { + return vdupq_lane_u32(vget_low_u32(a), index); + } + else if(index == 2) + { + return vdupq_lane_u32(vget_high_u32(a), 0); + } + else if(index == 3) + { + return vdupq_lane_u32(vget_high_u32(a), 1); + } +} + +template <int index> +PX_FORCE_INLINE Vec4V V4SplatElement(Vec4V a) +{ + if(index < 2) + { + return vdupq_lane_f32(vget_low_f32(a), index); + } + else if(index == 2) + { + return vdupq_lane_f32(vget_high_f32(a), 0); + } + else if(index == 3) + { + return vdupq_lane_f32(vget_high_f32(a), 1); + } +} + +PX_FORCE_INLINE VecU32V U4LoadXYZW(PxU32 x, PxU32 y, PxU32 z, PxU32 w) +{ + const uint32x4_t ret = { x, y, z, w }; + return ret; +} + +PX_FORCE_INLINE VecU32V U4Load(const PxU32 i) +{ + return vdupq_n_u32(i); +} + +PX_FORCE_INLINE VecU32V U4LoadU(const PxU32* i) +{ + return vld1q_u32(i); +} + +PX_FORCE_INLINE VecU32V U4LoadA(const PxU32* i) +{ + return vld1q_u32(i); +} + +PX_FORCE_INLINE Vec4V V4Ceil(const Vec4V in) +{ + const float32x4_t ones = vdupq_n_f32(1.0f); + const float32x4_t rdToZero = vcvtq_f32_s32(vcvtq_s32_f32(in)); + const float32x4_t rdToZeroPlusOne = vaddq_f32(rdToZero, ones); + const uint32x4_t gt = vcgtq_f32(in, rdToZero); + return vbslq_f32(gt, rdToZeroPlusOne, rdToZero); +} + +PX_FORCE_INLINE Vec4V V4Floor(const Vec4V in) +{ + const float32x4_t ones = vdupq_n_f32(1.0f); + const float32x4_t rdToZero = vcvtq_f32_s32(vcvtq_s32_f32(in)); + const float32x4_t rdToZeroMinusOne = vsubq_f32(rdToZero, ones); + const uint32x4_t lt = vcltq_f32(in, rdToZero); + return vbslq_f32(lt, rdToZeroMinusOne, rdToZero); +} + +PX_FORCE_INLINE VecU32V V4ConvertToU32VSaturate(const Vec4V in, PxU32 power) +{ + PX_ASSERT(power == 0 && "Non-zero power not supported in convertToU32VSaturate"); + PX_UNUSED(power); // prevent warning in release builds + + return vcvtq_u32_f32(in); +} + +PX_FORCE_INLINE void QuatGetMat33V(const QuatVArg q, Vec3V& column0, Vec3V& column1, Vec3V& column2) +{ + const FloatV one = FOne(); + const FloatV x = V4GetX(q); + const FloatV y = V4GetY(q); + const FloatV z = V4GetZ(q); + const FloatV w = V4GetW(q); + + const FloatV x2 = FAdd(x, x); + const FloatV y2 = FAdd(y, y); + const FloatV z2 = FAdd(z, z); + + const FloatV xx = FMul(x2, x); + const FloatV yy = FMul(y2, y); + const FloatV zz = FMul(z2, z); + + const FloatV xy = FMul(x2, y); + const FloatV xz = FMul(x2, z); + const FloatV xw = FMul(x2, w); + + const FloatV yz = FMul(y2, z); + const FloatV yw = FMul(y2, w); + const FloatV zw = FMul(z2, w); + + const FloatV v = FSub(one, xx); + + column0 = V3Merge(FSub(FSub(one, yy), zz), FAdd(xy, zw), FSub(xz, yw)); + column1 = V3Merge(FSub(xy, zw), FSub(v, zz), FAdd(yz, xw)); + column2 = V3Merge(FAdd(xz, yw), FSub(yz, xw), FSub(v, yy)); +} + +#endif // PSFOUNDATION_PSUNIXNEONINLINEAOS_H diff --git a/PxShared/src/foundation/include/unix/sse2/PsUnixSse2AoS.h b/PxShared/src/foundation/include/unix/sse2/PsUnixSse2AoS.h new file mode 100644 index 0000000..9c76438 --- /dev/null +++ b/PxShared/src/foundation/include/unix/sse2/PsUnixSse2AoS.h @@ -0,0 +1,179 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSUNIXSSE2AOS_H +#define PSFOUNDATION_PSUNIXSSE2AOS_H + +// no includes here! this file should be included from PxcVecMath.h only!!! + +#if !COMPILE_VECTOR_INTRINSICS +#error Vector intrinsics should not be included when using scalar implementation. +#endif +#if PX_EMSCRIPTEN +typedef int8_t __int8_t; +typedef int16_t __int16_t; +typedef int32_t __int32_t; +typedef int64_t __int64_t; +typedef uint16_t __uint16_t; +typedef uint32_t __uint32_t; +typedef uint64_t __uint64_t; +#endif + +typedef union UnionM128 +{ + UnionM128() + { + } + UnionM128(__m128 in) + { + m128 = in; + } + + UnionM128(__m128i in) + { + m128i = in; + } + + operator __m128() + { + return m128; + } + + operator const __m128() const + { + return m128; + } + + float m128_f32[4]; + __int8_t m128_i8[16]; + __int16_t m128_i16[8]; + __int32_t m128_i32[4]; + __int64_t m128_i64[2]; + __uint16_t m128_u16[8]; + __uint32_t m128_u32[4]; + __uint64_t m128_u64[2]; + __m128 m128; + __m128i m128i; +} UnionM128; + +typedef __m128 FloatV; +typedef __m128 Vec3V; +typedef __m128 Vec4V; +typedef __m128 BoolV; +typedef __m128 QuatV; +typedef __m128i VecI32V; +typedef UnionM128 VecU32V; +typedef UnionM128 VecU16V; +typedef UnionM128 VecI16V; +typedef UnionM128 VecU8V; + +#define FloatVArg FloatV & +#define Vec3VArg Vec3V & +#define Vec4VArg Vec4V & +#define BoolVArg BoolV & +#define VecU32VArg VecU32V & +#define VecI32VArg VecI32V & +#define VecU16VArg VecU16V & +#define VecI16VArg VecI16V & +#define VecU8VArg VecU8V & +#define QuatVArg QuatV & + +// Optimization for situations in which you cross product multiple vectors with the same vector. +// Avoids 2X shuffles per product +struct VecCrossV +{ + Vec3V mL1; + Vec3V mR1; +}; + +struct VecShiftV +{ + VecI32V shift; +}; +#define VecShiftVArg VecShiftV & + +PX_ALIGN_PREFIX(16) +struct Mat33V +{ + Mat33V() + { + } + Mat33V(const Vec3V& c0, const Vec3V& c1, const Vec3V& c2) : col0(c0), col1(c1), col2(c2) + { + } + Vec3V PX_ALIGN(16, col0); + Vec3V PX_ALIGN(16, col1); + Vec3V PX_ALIGN(16, col2); +} PX_ALIGN_SUFFIX(16); + +PX_ALIGN_PREFIX(16) +struct Mat34V +{ + Mat34V() + { + } + Mat34V(const Vec3V& c0, const Vec3V& c1, const Vec3V& c2, const Vec3V& c3) : col0(c0), col1(c1), col2(c2), col3(c3) + { + } + Vec3V PX_ALIGN(16, col0); + Vec3V PX_ALIGN(16, col1); + Vec3V PX_ALIGN(16, col2); + Vec3V PX_ALIGN(16, col3); +} PX_ALIGN_SUFFIX(16); + +PX_ALIGN_PREFIX(16) +struct Mat43V +{ + Mat43V() + { + } + Mat43V(const Vec4V& c0, const Vec4V& c1, const Vec4V& c2) : col0(c0), col1(c1), col2(c2) + { + } + Vec4V PX_ALIGN(16, col0); + Vec4V PX_ALIGN(16, col1); + Vec4V PX_ALIGN(16, col2); +} PX_ALIGN_SUFFIX(16); + +PX_ALIGN_PREFIX(16) +struct Mat44V +{ + Mat44V() + { + } + Mat44V(const Vec4V& c0, const Vec4V& c1, const Vec4V& c2, const Vec4V& c3) : col0(c0), col1(c1), col2(c2), col3(c3) + { + } + Vec4V PX_ALIGN(16, col0); + Vec4V PX_ALIGN(16, col1); + Vec4V PX_ALIGN(16, col2); + Vec4V PX_ALIGN(16, col3); +} PX_ALIGN_SUFFIX(16); + +#endif // PSFOUNDATION_PSUNIXSSE2AOS_H diff --git a/PxShared/src/foundation/include/unix/sse2/PsUnixSse2InlineAoS.h b/PxShared/src/foundation/include/unix/sse2/PsUnixSse2InlineAoS.h new file mode 100644 index 0000000..0355538 --- /dev/null +++ b/PxShared/src/foundation/include/unix/sse2/PsUnixSse2InlineAoS.h @@ -0,0 +1,3226 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSUNIXSSE2INLINEAOS_H +#define PSFOUNDATION_PSUNIXSSE2INLINEAOS_H + +#if !COMPILE_VECTOR_INTRINSICS +#error Vector intrinsics should not be included when using scalar implementation. +#endif + +// Remove this define when all platforms use simd solver. +#define PX_SUPPORT_SIMD + +#ifdef __SSE4_2__ +#include "smmintrin.h" +#endif + +#include "../../PsVecMathSSE.h" + +#define PX_FPCLASS_SNAN 0x0001 /* signaling NaN */ +#define PX_FPCLASS_QNAN 0x0002 /* quiet NaN */ +#define PX_FPCLASS_NINF 0x0004 /* negative infinity */ +#define PX_FPCLASS_PINF 0x0200 /* positive infinity */ + +PX_FORCE_INLINE __m128 m128_I2F(__m128i n) +{ + return _mm_castsi128_ps(n); +} +PX_FORCE_INLINE __m128i m128_F2I(__m128 n) +{ + return _mm_castps_si128(n); +} + +////////////////////////////////////////////////////////////////////// +//Test that Vec3V and FloatV are legal +////////////////////////////////////////////////////////////////////// + +#define FLOAT_COMPONENTS_EQUAL_THRESHOLD 0.01f +PX_FORCE_INLINE static bool isValidFloatV(const FloatV a) +{ + const PxF32 x = V4ReadX(a); + const PxF32 y = V4ReadY(a); + const PxF32 z = V4ReadZ(a); + const PxF32 w = V4ReadW(a); + + if ( + (PxAbs(x - y) < FLOAT_COMPONENTS_EQUAL_THRESHOLD) && + (PxAbs(x - z) < FLOAT_COMPONENTS_EQUAL_THRESHOLD) && + (PxAbs(x - w) < FLOAT_COMPONENTS_EQUAL_THRESHOLD) + ) + { + return true; + } + + if ( + (PxAbs((x - y) / x) < FLOAT_COMPONENTS_EQUAL_THRESHOLD) && + (PxAbs((x - z) / x) < FLOAT_COMPONENTS_EQUAL_THRESHOLD) && + (PxAbs((x - w) / x) < FLOAT_COMPONENTS_EQUAL_THRESHOLD) + ) + { + return true; + } + + return false; +} + +PX_FORCE_INLINE bool isValidVec3V(const Vec3V a) +{ + PX_ALIGN(16, PxF32 f[4]); + V4StoreA(a, f); + return (f[3] == 0.0f); +} + +PX_FORCE_INLINE bool isFiniteLength(const Vec3V a) +{ + return !FAllEq(V4LengthSq(a), FZero()); +} + +PX_FORCE_INLINE bool isAligned16(void* a) +{ + return(0 == (size_t(a) & 0x0f)); +} + +//ASSERT_FINITELENGTH is deactivated because there is a lot of code that calls a simd normalisation function with zero length but then ignores the result. + +#if PX_DEBUG +#define ASSERT_ISVALIDVEC3V(a) PX_ASSERT(isValidVec3V(a)) +#define ASSERT_ISVALIDFLOATV(a) PX_ASSERT(isValidFloatV(a)) +#define ASSERT_ISALIGNED16(a) PX_ASSERT(isAligned16(reinterpret_cast<void*>(a))) +#define ASSERT_ISFINITELENGTH(a) //PX_ASSERT(isFiniteLength(a)) +#else +#define ASSERT_ISVALIDVEC3V(a) +#define ASSERT_ISVALIDFLOATV(a) +#define ASSERT_ISALIGNED16(a) +#define ASSERT_ISFINITELENGTH(a) +#endif + + +namespace internalUnitSSE2Simd +{ +PX_FORCE_INLINE PxU32 BAllTrue4_R(const BoolV a) +{ + const PxI32 moveMask = _mm_movemask_ps(a); + return PxU32(moveMask == 0xf); +} + +PX_FORCE_INLINE PxU32 BAllTrue3_R(const BoolV a) +{ + const PxI32 moveMask = _mm_movemask_ps(a); + return PxU32((moveMask & 0x7) == 0x7); +} + +PX_FORCE_INLINE PxU32 BAnyTrue4_R(const BoolV a) +{ + const PxI32 moveMask = _mm_movemask_ps(a); + return PxU32(moveMask != 0x0); +} + +PX_FORCE_INLINE PxU32 BAnyTrue3_R(const BoolV a) +{ + const PxI32 moveMask = _mm_movemask_ps(a); + return PxU32((moveMask & 0x7) != 0x0); +} + +PX_FORCE_INLINE PxU32 FiniteTestEq(const Vec4V a, const Vec4V b) +{ + // This is a bit of a bodge. + //_mm_comieq_ss returns 1 if either value is nan so we need to re-cast a and b with true encoded as a non-nan + // number. + // There must be a better way of doing this in sse. + const BoolV one = FOne(); + const BoolV zero = FZero(); + const BoolV a1 = V4Sel(a, one, zero); + const BoolV b1 = V4Sel(b, one, zero); + return ( + _mm_comieq_ss(a1, b1) && + _mm_comieq_ss(_mm_shuffle_ps(a1, a1, _MM_SHUFFLE(1, 1, 1, 1)), _mm_shuffle_ps(b1, b1, _MM_SHUFFLE(1, 1, 1, 1))) && + _mm_comieq_ss(_mm_shuffle_ps(a1, a1, _MM_SHUFFLE(2, 2, 2, 2)), _mm_shuffle_ps(b1, b1, _MM_SHUFFLE(2, 2, 2, 2))) && + _mm_comieq_ss(_mm_shuffle_ps(a1, a1, _MM_SHUFFLE(3, 3, 3, 3)), _mm_shuffle_ps(b1, b1, _MM_SHUFFLE(3, 3, 3, 3)))); +} + +#if !PX_EMSCRIPTEN +const PX_ALIGN(16, PxF32 gMaskXYZ[4]) = { physx::PxUnionCast<PxF32>(0xffffffff), physx::PxUnionCast<PxF32>(0xffffffff), + physx::PxUnionCast<PxF32>(0xffffffff), 0 }; +} +#else +// emscripten doesn't like the PxUnionCast data structure +// the following is what windows and xbox does -- using these for emscripten +const PX_ALIGN(16, PxU32 gMaskXYZ[4]) = { 0xffffffff, 0xffffffff, 0xffffffff, 0 }; } +#endif + +namespace _VecMathTests +{ +// PT: this function returns an invalid Vec3V (W!=0.0f) just for unit-testing 'isValidVec3V' +PX_FORCE_INLINE Vec3V getInvalidVec3V() +{ + const float f = 1.0f; + return _mm_load1_ps(&f); +} + +PX_FORCE_INLINE bool allElementsEqualFloatV(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return _mm_comieq_ss(a, b) != 0; +} + +PX_FORCE_INLINE bool allElementsEqualVec3V(const Vec3V a, const Vec3V b) +{ + return V3AllEq(a, b) != 0; +} + +PX_FORCE_INLINE bool allElementsEqualVec4V(const Vec4V a, const Vec4V b) +{ + return V4AllEq(a, b) != 0; +} + +PX_FORCE_INLINE bool allElementsEqualBoolV(const BoolV a, const BoolV b) +{ + return internalUnitSSE2Simd::BAllTrue4_R(VecI32V_IsEq(m128_F2I(a), m128_F2I(b))) != 0; +} + +PX_FORCE_INLINE bool allElementsEqualVecU32V(const VecU32V a, const VecU32V b) +{ + return internalUnitSSE2Simd::BAllTrue4_R(V4IsEqU32(a, b)) != 0; +} + +PX_FORCE_INLINE bool allElementsEqualVecI32V(const VecI32V a, const VecI32V b) +{ + BoolV c = m128_I2F(_mm_cmpeq_epi32(a, b)); + return internalUnitSSE2Simd::BAllTrue4_R(c) != 0; +} + +#define VECMATH_AOS_EPSILON (1e-3f) + +PX_FORCE_INLINE bool allElementsNearEqualFloatV(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + const FloatV c = FSub(a, b); + const FloatV minError = FLoad(-VECMATH_AOS_EPSILON); + const FloatV maxError = FLoad(VECMATH_AOS_EPSILON); + return _mm_comigt_ss(c, minError) && _mm_comilt_ss(c, maxError); +} + +PX_FORCE_INLINE bool allElementsNearEqualVec3V(const Vec3V a, const Vec3V b) +{ + const Vec3V c = V3Sub(a, b); + const Vec3V minError = V3Load(-VECMATH_AOS_EPSILON); + const Vec3V maxError = V3Load(VECMATH_AOS_EPSILON); + return (_mm_comigt_ss(_mm_shuffle_ps(c, c, _MM_SHUFFLE(0, 0, 0, 0)), minError) && + _mm_comilt_ss(_mm_shuffle_ps(c, c, _MM_SHUFFLE(0, 0, 0, 0)), maxError) && + _mm_comigt_ss(_mm_shuffle_ps(c, c, _MM_SHUFFLE(1, 1, 1, 1)), minError) && + _mm_comilt_ss(_mm_shuffle_ps(c, c, _MM_SHUFFLE(1, 1, 1, 1)), maxError) && + _mm_comigt_ss(_mm_shuffle_ps(c, c, _MM_SHUFFLE(2, 2, 2, 2)), minError) && + _mm_comilt_ss(_mm_shuffle_ps(c, c, _MM_SHUFFLE(2, 2, 2, 2)), maxError)); +} + +PX_FORCE_INLINE bool allElementsNearEqualVec4V(const Vec4V a, const Vec4V b) +{ + const Vec4V c = V4Sub(a, b); + const Vec4V minError = V4Load(-VECMATH_AOS_EPSILON); + const Vec4V maxError = V4Load(VECMATH_AOS_EPSILON); + return (_mm_comigt_ss(_mm_shuffle_ps(c, c, _MM_SHUFFLE(0, 0, 0, 0)), minError) && + _mm_comilt_ss(_mm_shuffle_ps(c, c, _MM_SHUFFLE(0, 0, 0, 0)), maxError) && + _mm_comigt_ss(_mm_shuffle_ps(c, c, _MM_SHUFFLE(1, 1, 1, 1)), minError) && + _mm_comilt_ss(_mm_shuffle_ps(c, c, _MM_SHUFFLE(1, 1, 1, 1)), maxError) && + _mm_comigt_ss(_mm_shuffle_ps(c, c, _MM_SHUFFLE(2, 2, 2, 2)), minError) && + _mm_comilt_ss(_mm_shuffle_ps(c, c, _MM_SHUFFLE(2, 2, 2, 2)), maxError) && + _mm_comigt_ss(_mm_shuffle_ps(c, c, _MM_SHUFFLE(3, 3, 3, 3)), minError) && + _mm_comilt_ss(_mm_shuffle_ps(c, c, _MM_SHUFFLE(3, 3, 3, 3)), maxError)); +} +} + +///////////////////////////////////////////////////////////////////// +////FUNCTIONS USED ONLY FOR ASSERTS IN VECTORISED IMPLEMENTATIONS +///////////////////////////////////////////////////////////////////// + +PX_FORCE_INLINE bool isFiniteFloatV(const FloatV a) +{ + PxF32 badNumber = + physx::PxUnionCast<PxF32, PxU32>(PX_FPCLASS_SNAN | PX_FPCLASS_QNAN | PX_FPCLASS_NINF | PX_FPCLASS_PINF); + const FloatV vBadNum = FLoad(badNumber); + const BoolV vMask = BAnd(vBadNum, a); + return internalUnitSSE2Simd::FiniteTestEq(vMask, BFFFF()) == 1; +} + +PX_FORCE_INLINE bool isFiniteVec3V(const Vec3V a) +{ + PxF32 badNumber = + physx::PxUnionCast<PxF32, PxU32>(PX_FPCLASS_SNAN | PX_FPCLASS_QNAN | PX_FPCLASS_NINF | PX_FPCLASS_PINF); + const Vec3V vBadNum = V3Load(badNumber); + const BoolV vMask = BAnd(BAnd(vBadNum, a), BTTTF()); + return internalUnitSSE2Simd::FiniteTestEq(vMask, BFFFF()) == 1; +} + +PX_FORCE_INLINE bool isFiniteVec4V(const Vec4V a) +{ + /*Vec4V a; + PX_ALIGN(16, PxF32 f[4]); + F32Array_Aligned_From_Vec4V(a, f); + return PxIsFinite(f[0]) + && PxIsFinite(f[1]) + && PxIsFinite(f[2]) + && PxIsFinite(f[3]);*/ + + PxF32 badNumber = + physx::PxUnionCast<PxF32, PxU32>(PX_FPCLASS_SNAN | PX_FPCLASS_QNAN | PX_FPCLASS_NINF | PX_FPCLASS_PINF); + const Vec4V vBadNum = V4Load(badNumber); + const BoolV vMask = BAnd(vBadNum, a); + + return internalUnitSSE2Simd::FiniteTestEq(vMask, BFFFF()) == 1; +} + +PX_FORCE_INLINE bool hasZeroElementinFloatV(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + return _mm_comieq_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 0, 0, 0)), FZero()) ? true : false; +} + +PX_FORCE_INLINE bool hasZeroElementInVec3V(const Vec3V a) +{ + return (_mm_comieq_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 0, 0, 0)), FZero()) || + _mm_comieq_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1)), FZero()) || + _mm_comieq_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 2, 2, 2)), FZero())); +} + +PX_FORCE_INLINE bool hasZeroElementInVec4V(const Vec4V a) +{ + return (_mm_comieq_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 0, 0, 0)), FZero()) || + _mm_comieq_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1)), FZero()) || + _mm_comieq_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 2, 2, 2)), FZero()) || + _mm_comieq_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 3, 3, 3)), FZero())); +} + +///////////////////////////////////////////////////////////////////// +////VECTORISED FUNCTION IMPLEMENTATIONS +///////////////////////////////////////////////////////////////////// + +PX_FORCE_INLINE FloatV FLoad(const PxF32 f) +{ + return _mm_load1_ps(&f); +} + +PX_FORCE_INLINE Vec3V V3Load(const PxF32 f) +{ + return _mm_set_ps(0.0f, f, f, f); +} + +PX_FORCE_INLINE Vec4V V4Load(const PxF32 f) +{ + return _mm_load1_ps(&f); +} + +PX_FORCE_INLINE BoolV BLoad(const bool f) +{ + const PxU32 i = -PxI32(f); + return _mm_load1_ps(reinterpret_cast<const float*>(&i)); +} + +PX_FORCE_INLINE Vec3V V3LoadA(const PxVec3& f) +{ + ASSERT_ISALIGNED16(const_cast<PxVec3*>(&f)); +#if !PX_EMSCRIPTEN + return _mm_and_ps(reinterpret_cast<const Vec3V&>(f), V4LoadA(internalUnitSSE2Simd::gMaskXYZ)); +#else + return _mm_and_ps((Vec3V&)f, (VecI32V&)internalUnitSSE2Simd::gMaskXYZ); +#endif +} + +PX_FORCE_INLINE Vec3V V3LoadU(const PxVec3& f) +{ + return _mm_set_ps(0.0f, f.z, f.y, f.x); +} + +PX_FORCE_INLINE Vec3V V3LoadUnsafeA(const PxVec3& f) +{ + ASSERT_ISALIGNED16(const_cast<PxVec3*>(&f)); + return _mm_set_ps(0.0f, f.z, f.y, f.x); +} + +PX_FORCE_INLINE Vec3V V3LoadA(const PxF32* const f) +{ + ASSERT_ISALIGNED16(const_cast<PxF32*>(f)); +#if !PX_EMSCRIPTEN + return _mm_and_ps(V4LoadA(f), V4LoadA(internalUnitSSE2Simd::gMaskXYZ)); +#else + return _mm_and_ps((Vec3V&)*f, (VecI32V&)internalUnitSSE2Simd::gMaskXYZ); +#endif +} + +PX_FORCE_INLINE Vec3V V3LoadU(const PxF32* const i) +{ + return _mm_set_ps(0.0f, i[2], i[1], i[0]); +} + +PX_FORCE_INLINE Vec3V Vec3V_From_Vec4V(Vec4V v) +{ + return V4ClearW(v); +} + +PX_FORCE_INLINE Vec3V Vec3V_From_Vec4V_WUndefined(const Vec4V v) +{ + return v; +} + +PX_FORCE_INLINE Vec4V Vec4V_From_Vec3V(Vec3V f) +{ + ASSERT_ISVALIDVEC3V(f); + return f; // ok if it is implemented as the same type. +} + +PX_FORCE_INLINE Vec4V Vec4V_From_PxVec3_WUndefined(const PxVec3& f) +{ + return _mm_set_ps(0.0f, f.z, f.y, f.x); +} + +PX_FORCE_INLINE Vec4V Vec4V_From_FloatV(FloatV f) +{ + return f; +} + +PX_FORCE_INLINE Vec3V Vec3V_From_FloatV(FloatV f) +{ + ASSERT_ISVALIDFLOATV(f); + return Vec3V_From_Vec4V(Vec4V_From_FloatV(f)); +} + +PX_FORCE_INLINE Vec3V Vec3V_From_FloatV_WUndefined(FloatV f) +{ + ASSERT_ISVALIDVEC3V(f); + return Vec3V_From_Vec4V_WUndefined(Vec4V_From_FloatV(f)); +} + +PX_FORCE_INLINE Mat33V Mat33V_From_PxMat33(const PxMat33& m) +{ + return Mat33V(V3LoadU(m.column0), V3LoadU(m.column1), V3LoadU(m.column2)); +} + +PX_FORCE_INLINE void PxMat33_From_Mat33V(const Mat33V& m, PxMat33& out) +{ + V3StoreU(m.col0, out.column0); + V3StoreU(m.col1, out.column1); + V3StoreU(m.col2, out.column2); +} + +PX_FORCE_INLINE Vec4V V4LoadA(const PxF32* const f) +{ + ASSERT_ISALIGNED16(const_cast<PxF32*>(f)); + return _mm_load_ps(f); +} + +PX_FORCE_INLINE void V4StoreA(Vec4V a, PxF32* f) +{ + ASSERT_ISALIGNED16(f); + _mm_store_ps(f, a); +} + +PX_FORCE_INLINE void V4StoreU(const Vec4V a, PxF32* f) +{ + _mm_storeu_ps(f, a); +} + +PX_FORCE_INLINE void BStoreA(const BoolV a, PxU32* f) +{ + ASSERT_ISALIGNED16(f); + _mm_store_ps(reinterpret_cast<PxF32*>(f), a); +} + +PX_FORCE_INLINE void U4StoreA(const VecU32V uv, PxU32* u) +{ + ASSERT_ISALIGNED16(u); + _mm_store_ps(reinterpret_cast<float*>(u), uv); +} + +PX_FORCE_INLINE void I4StoreA(const VecI32V iv, PxI32* i) +{ + ASSERT_ISALIGNED16(i); + _mm_store_ps(reinterpret_cast<float*>(i), m128_I2F(iv)); +} + +PX_FORCE_INLINE Vec4V V4LoadU(const PxF32* const f) +{ + return _mm_loadu_ps(f); +} + +PX_FORCE_INLINE BoolV BLoad(const bool* const f) +{ + const PX_ALIGN(16, PxI32) b[4] = { -PxI32(f[0]), -PxI32(f[1]), -PxI32(f[2]), -PxI32(f[3]) }; + return _mm_load_ps(reinterpret_cast<const float*>(&b)); +} + +PX_FORCE_INLINE void FStore(const FloatV a, PxF32* PX_RESTRICT f) +{ + ASSERT_ISVALIDFLOATV(a); + _mm_store_ss(f, a); +} + +PX_FORCE_INLINE void V3StoreA(const Vec3V a, PxVec3& f) +{ + ASSERT_ISALIGNED16(&f); + PX_ALIGN(16, PxF32) f2[4]; + _mm_store_ps(f2, a); + f = PxVec3(f2[0], f2[1], f2[2]); +} + +PX_FORCE_INLINE void V3StoreU(const Vec3V a, PxVec3& f) +{ + PX_ALIGN(16, PxF32) f2[4]; + _mm_store_ps(f2, a); + f = PxVec3(f2[0], f2[1], f2[2]); +} + +PX_FORCE_INLINE void Store_From_BoolV(const BoolV b, PxU32* b2) +{ + _mm_store_ss(reinterpret_cast<PxF32*>(b2), b); +} + +PX_FORCE_INLINE VecU32V U4Load(const PxU32 i) +{ + return _mm_load1_ps(reinterpret_cast<const PxF32*>(&i)); +} + +PX_FORCE_INLINE VecU32V U4LoadU(const PxU32* i) +{ + return _mm_loadu_ps(reinterpret_cast<const PxF32*>(i)); +} + +PX_FORCE_INLINE VecU32V U4LoadA(const PxU32* i) +{ + ASSERT_ISALIGNED16(const_cast<PxU32*>(i)); + return _mm_load_ps(reinterpret_cast<const PxF32*>(i)); +} + +////////////////////////////////// +// FLOATV +////////////////////////////////// + +PX_FORCE_INLINE FloatV FZero() +{ + return FLoad(0.0f); +} + +PX_FORCE_INLINE FloatV FOne() +{ + return FLoad(1.0f); +} + +PX_FORCE_INLINE FloatV FHalf() +{ + return FLoad(0.5f); +} + +PX_FORCE_INLINE FloatV FEps() +{ + return FLoad(PX_EPS_REAL); +} + +PX_FORCE_INLINE FloatV FEps6() +{ + return FLoad(1e-6f); +} + +PX_FORCE_INLINE FloatV FMax() +{ + return FLoad(PX_MAX_REAL); +} + +PX_FORCE_INLINE FloatV FNegMax() +{ + return FLoad(-PX_MAX_REAL); +} + +PX_FORCE_INLINE FloatV IZero() +{ + const PxU32 zero = 0; + return _mm_load1_ps(reinterpret_cast<const PxF32*>(&zero)); +} + +PX_FORCE_INLINE FloatV IOne() +{ + const PxU32 one = 1; + return _mm_load1_ps(reinterpret_cast<const PxF32*>(&one)); +} + +PX_FORCE_INLINE FloatV ITwo() +{ + const PxU32 two = 2; + return _mm_load1_ps(reinterpret_cast<const PxF32*>(&two)); +} + +PX_FORCE_INLINE FloatV IThree() +{ + const PxU32 three = 3; + return _mm_load1_ps(reinterpret_cast<const PxF32*>(&three)); +} + +PX_FORCE_INLINE FloatV IFour() +{ + PxU32 four = 4; + return _mm_load1_ps(reinterpret_cast<const PxF32*>(&four)); +} + +PX_FORCE_INLINE FloatV FNeg(const FloatV f) +{ + ASSERT_ISVALIDFLOATV(f); + return _mm_sub_ps(_mm_setzero_ps(), f); +} + +PX_FORCE_INLINE FloatV FAdd(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); +/* + if(!isValidFloatV(a)) + { +assert(false); + } + if(!isValidFloatV(b)) + { +assert(false); + } +*/ + return _mm_add_ps(a, b); +} + +PX_FORCE_INLINE FloatV FSub(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return _mm_sub_ps(a, b); +} + +PX_FORCE_INLINE FloatV FMul(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return _mm_mul_ps(a, b); +} + +PX_FORCE_INLINE FloatV FDiv(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return _mm_div_ps(a, b); +} + +PX_FORCE_INLINE FloatV FDivFast(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return _mm_mul_ps(a, _mm_rcp_ps(b)); +} + +PX_FORCE_INLINE FloatV FRecip(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + return _mm_div_ps(FOne(), a); +} + +PX_FORCE_INLINE FloatV FRecipFast(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + return _mm_rcp_ps(a); +} + +PX_FORCE_INLINE FloatV FRsqrt(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + return _mm_div_ps(FOne(), _mm_sqrt_ps(a)); +} + +PX_FORCE_INLINE FloatV FSqrt(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + return _mm_sqrt_ps(a); +} + +PX_FORCE_INLINE FloatV FRsqrtFast(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + return _mm_rsqrt_ps(a); +} + +PX_FORCE_INLINE FloatV FScaleAdd(const FloatV a, const FloatV b, const FloatV c) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + ASSERT_ISVALIDFLOATV(c); + return FAdd(FMul(a, b), c); +} + +PX_FORCE_INLINE FloatV FNegScaleSub(const FloatV a, const FloatV b, const FloatV c) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + ASSERT_ISVALIDFLOATV(c); + return FSub(c, FMul(a, b)); +} + +PX_FORCE_INLINE FloatV FAbs(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + PX_ALIGN(16, const PxU32) absMask[4] = { 0x7fFFffFF, 0x7fFFffFF, 0x7fFFffFF, 0x7fFFffFF }; + return _mm_and_ps(a, _mm_load_ps(reinterpret_cast<const PxF32*>(absMask))); +} + +PX_FORCE_INLINE FloatV FSel(const BoolV c, const FloatV a, const FloatV b) +{ + PX_ASSERT(_VecMathTests::allElementsEqualBoolV(c,BTTTT()) || + _VecMathTests::allElementsEqualBoolV(c,BFFFF())); + ASSERT_ISVALIDFLOATV(_mm_or_ps(_mm_andnot_ps(c, b), _mm_and_ps(c, a))); + return _mm_or_ps(_mm_andnot_ps(c, b), _mm_and_ps(c, a)); +} + +PX_FORCE_INLINE BoolV FIsGrtr(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return _mm_cmpgt_ps(a, b); +} + +PX_FORCE_INLINE BoolV FIsGrtrOrEq(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return _mm_cmpge_ps(a, b); +} + +PX_FORCE_INLINE BoolV FIsEq(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return _mm_cmpeq_ps(a, b); +} + +PX_FORCE_INLINE FloatV FMax(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return _mm_max_ps(a, b); +} + +PX_FORCE_INLINE FloatV FMin(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return _mm_min_ps(a, b); +} + +PX_FORCE_INLINE FloatV FClamp(const FloatV a, const FloatV minV, const FloatV maxV) +{ + ASSERT_ISVALIDFLOATV(minV); + ASSERT_ISVALIDFLOATV(maxV); + return _mm_max_ps(_mm_min_ps(a, maxV), minV); +} + +PX_FORCE_INLINE PxU32 FAllGrtr(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return _mm_comigt_ss(a, b); +} + +PX_FORCE_INLINE PxU32 FAllGrtrOrEq(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return _mm_comige_ss(a, b); +} + +PX_FORCE_INLINE PxU32 FAllEq(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return _mm_comieq_ss(a, b); +} + +PX_FORCE_INLINE FloatV FRound(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); +#ifdef __SSE4_2__ + return _mm_round_ps(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +#else + // return _mm_round_ps(a, 0x0); + const FloatV half = FLoad(0.5f); + const __m128 signBit = _mm_cvtepi32_ps(_mm_srli_epi32(_mm_cvtps_epi32(a), 31)); + const FloatV aRound = FSub(FAdd(a, half), signBit); + __m128i tmp = _mm_cvttps_epi32(aRound); + return _mm_cvtepi32_ps(tmp); +#endif +} + +PX_FORCE_INLINE FloatV FSin(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + + // Modulo the range of the given angles such that -XM_2PI <= Angles < XM_2PI + const FloatV recipTwoPi = V4LoadA(g_PXReciprocalTwoPi.f); + const FloatV twoPi = V4LoadA(g_PXTwoPi.f); + const FloatV tmp = FMul(a, recipTwoPi); + const FloatV b = FRound(tmp); + const FloatV V1 = FNegScaleSub(twoPi, b, a); + + // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - + // V^15 / 15! + V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI) + const FloatV V2 = FMul(V1, V1); + const FloatV V3 = FMul(V2, V1); + const FloatV V5 = FMul(V3, V2); + const FloatV V7 = FMul(V5, V2); + const FloatV V9 = FMul(V7, V2); + const FloatV V11 = FMul(V9, V2); + const FloatV V13 = FMul(V11, V2); + const FloatV V15 = FMul(V13, V2); + const FloatV V17 = FMul(V15, V2); + const FloatV V19 = FMul(V17, V2); + const FloatV V21 = FMul(V19, V2); + const FloatV V23 = FMul(V21, V2); + + const Vec4V sinCoefficients0 = V4LoadA(g_PXSinCoefficients0.f); + const Vec4V sinCoefficients1 = V4LoadA(g_PXSinCoefficients1.f); + const Vec4V sinCoefficients2 = V4LoadA(g_PXSinCoefficients2.f); + + const FloatV S1 = V4GetY(sinCoefficients0); + const FloatV S2 = V4GetZ(sinCoefficients0); + const FloatV S3 = V4GetW(sinCoefficients0); + const FloatV S4 = V4GetX(sinCoefficients1); + const FloatV S5 = V4GetY(sinCoefficients1); + const FloatV S6 = V4GetZ(sinCoefficients1); + const FloatV S7 = V4GetW(sinCoefficients1); + const FloatV S8 = V4GetX(sinCoefficients2); + const FloatV S9 = V4GetY(sinCoefficients2); + const FloatV S10 = V4GetZ(sinCoefficients2); + const FloatV S11 = V4GetW(sinCoefficients2); + + FloatV Result; + Result = FScaleAdd(S1, V3, V1); + Result = FScaleAdd(S2, V5, Result); + Result = FScaleAdd(S3, V7, Result); + Result = FScaleAdd(S4, V9, Result); + Result = FScaleAdd(S5, V11, Result); + Result = FScaleAdd(S6, V13, Result); + Result = FScaleAdd(S7, V15, Result); + Result = FScaleAdd(S8, V17, Result); + Result = FScaleAdd(S9, V19, Result); + Result = FScaleAdd(S10, V21, Result); + Result = FScaleAdd(S11, V23, Result); + + return Result; +} + +PX_FORCE_INLINE FloatV FCos(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + + // Modulo the range of the given angles such that -XM_2PI <= Angles < XM_2PI + const FloatV recipTwoPi = V4LoadA(g_PXReciprocalTwoPi.f); + const FloatV twoPi = V4LoadA(g_PXTwoPi.f); + const FloatV tmp = FMul(a, recipTwoPi); + const FloatV b = FRound(tmp); + const FloatV V1 = FNegScaleSub(twoPi, b, a); + + // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + V^12 / 12! - + // V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI) + const FloatV V2 = FMul(V1, V1); + const FloatV V4 = FMul(V2, V2); + const FloatV V6 = FMul(V4, V2); + const FloatV V8 = FMul(V4, V4); + const FloatV V10 = FMul(V6, V4); + const FloatV V12 = FMul(V6, V6); + const FloatV V14 = FMul(V8, V6); + const FloatV V16 = FMul(V8, V8); + const FloatV V18 = FMul(V10, V8); + const FloatV V20 = FMul(V10, V10); + const FloatV V22 = FMul(V12, V10); + + const Vec4V cosCoefficients0 = V4LoadA(g_PXCosCoefficients0.f); + const Vec4V cosCoefficients1 = V4LoadA(g_PXCosCoefficients1.f); + const Vec4V cosCoefficients2 = V4LoadA(g_PXCosCoefficients2.f); + + const FloatV C1 = V4GetY(cosCoefficients0); + const FloatV C2 = V4GetZ(cosCoefficients0); + const FloatV C3 = V4GetW(cosCoefficients0); + const FloatV C4 = V4GetX(cosCoefficients1); + const FloatV C5 = V4GetY(cosCoefficients1); + const FloatV C6 = V4GetZ(cosCoefficients1); + const FloatV C7 = V4GetW(cosCoefficients1); + const FloatV C8 = V4GetX(cosCoefficients2); + const FloatV C9 = V4GetY(cosCoefficients2); + const FloatV C10 = V4GetZ(cosCoefficients2); + const FloatV C11 = V4GetW(cosCoefficients2); + + FloatV Result; + Result = FScaleAdd(C1, V2, V4One()); + Result = FScaleAdd(C2, V4, Result); + Result = FScaleAdd(C3, V6, Result); + Result = FScaleAdd(C4, V8, Result); + Result = FScaleAdd(C5, V10, Result); + Result = FScaleAdd(C6, V12, Result); + Result = FScaleAdd(C7, V14, Result); + Result = FScaleAdd(C8, V16, Result); + Result = FScaleAdd(C9, V18, Result); + Result = FScaleAdd(C10, V20, Result); + Result = FScaleAdd(C11, V22, Result); + + return Result; +} + +PX_FORCE_INLINE PxU32 FOutOfBounds(const FloatV a, const FloatV min, const FloatV max) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(min); + ASSERT_ISVALIDFLOATV(max); + const BoolV c = BOr(FIsGrtr(a, max), FIsGrtr(min, a)); + return !BAllEqFFFF(c); +} + +PX_FORCE_INLINE PxU32 FInBounds(const FloatV a, const FloatV min, const FloatV max) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(min); + ASSERT_ISVALIDFLOATV(max) + const BoolV c = BAnd(FIsGrtrOrEq(a, min), FIsGrtrOrEq(max, a)); + return BAllEqTTTT(c); +} + +PX_FORCE_INLINE PxU32 FOutOfBounds(const FloatV a, const FloatV bounds) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(bounds); + return FOutOfBounds(a, FNeg(bounds), bounds); +} + +PX_FORCE_INLINE PxU32 FInBounds(const FloatV a, const FloatV bounds) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(bounds); + return FInBounds(a, FNeg(bounds), bounds); +} + +////////////////////////////////// +// VEC3V +////////////////////////////////// + +PX_FORCE_INLINE Vec3V V3Splat(const FloatV f) +{ + ASSERT_ISVALIDFLOATV(f); + const __m128 zero = FZero(); + const __m128 fff0 = _mm_move_ss(f, zero); + return _mm_shuffle_ps(fff0, fff0, _MM_SHUFFLE(0, 1, 2, 3)); +} + +PX_FORCE_INLINE Vec3V V3Merge(const FloatVArg x, const FloatVArg y, const FloatVArg z) +{ + ASSERT_ISVALIDFLOATV(x); + ASSERT_ISVALIDFLOATV(y); + ASSERT_ISVALIDFLOATV(z); + // static on zero causes compiler crash on x64 debug_opt + const __m128 zero = FZero(); + const __m128 xy = _mm_move_ss(x, y); + const __m128 z0 = _mm_move_ss(zero, z); + + return _mm_shuffle_ps(xy, z0, _MM_SHUFFLE(1, 0, 0, 1)); +} + +PX_FORCE_INLINE Vec3V V3UnitX() +{ + const PX_ALIGN(16, PxF32) x[4] = { 1.0f, 0.0f, 0.0f, 0.0f }; + const __m128 x128 = _mm_load_ps(x); + return x128; +} + +PX_FORCE_INLINE Vec3V V3UnitY() +{ + const PX_ALIGN(16, PxF32) y[4] = { 0.0f, 1.0f, 0.0f, 0.0f }; + const __m128 y128 = _mm_load_ps(y); + return y128; +} + +PX_FORCE_INLINE Vec3V V3UnitZ() +{ + const PX_ALIGN(16, PxF32) z[4] = { 0.0f, 0.0f, 1.0f, 0.0f }; + const __m128 z128 = _mm_load_ps(z); + return z128; +} + +PX_FORCE_INLINE FloatV V3GetX(const Vec3V f) +{ + ASSERT_ISVALIDVEC3V(f); + return _mm_shuffle_ps(f, f, _MM_SHUFFLE(0, 0, 0, 0)); +} + +PX_FORCE_INLINE FloatV V3GetY(const Vec3V f) +{ + ASSERT_ISVALIDVEC3V(f) + return _mm_shuffle_ps(f, f, _MM_SHUFFLE(1, 1, 1, 1)); +} + +PX_FORCE_INLINE FloatV V3GetZ(const Vec3V f) +{ + ASSERT_ISVALIDVEC3V(f); + return _mm_shuffle_ps(f, f, _MM_SHUFFLE(2, 2, 2, 2)); +} + +PX_FORCE_INLINE Vec3V V3SetX(const Vec3V v, const FloatV f) +{ + ASSERT_ISVALIDVEC3V(v); + ASSERT_ISVALIDFLOATV(f); + return V4Sel(BFTTT(), v, f); +} + +PX_FORCE_INLINE Vec3V V3SetY(const Vec3V v, const FloatV f) +{ + ASSERT_ISVALIDVEC3V(v); + ASSERT_ISVALIDFLOATV(f); + return V4Sel(BTFTT(), v, f); +} + +PX_FORCE_INLINE Vec3V V3SetZ(const Vec3V v, const FloatV f) +{ + ASSERT_ISVALIDVEC3V(v); + ASSERT_ISVALIDFLOATV(f); + return V4Sel(BTTFT(), v, f); +} + +PX_FORCE_INLINE Vec3V V3ColX(const Vec3V a, const Vec3V b, const Vec3V c) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + ASSERT_ISVALIDVEC3V(c); + Vec3V r = _mm_shuffle_ps(a, c, _MM_SHUFFLE(3, 0, 3, 0)); + return V3SetY(r, V3GetX(b)); +} + +PX_FORCE_INLINE Vec3V V3ColY(const Vec3V a, const Vec3V b, const Vec3V c) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + ASSERT_ISVALIDVEC3V(c) + Vec3V r = _mm_shuffle_ps(a, c, _MM_SHUFFLE(3, 1, 3, 1)); + return V3SetY(r, V3GetY(b)); +} + +PX_FORCE_INLINE Vec3V V3ColZ(const Vec3V a, const Vec3V b, const Vec3V c) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + ASSERT_ISVALIDVEC3V(c); + Vec3V r = _mm_shuffle_ps(a, c, _MM_SHUFFLE(3, 2, 3, 2)); + return V3SetY(r, V3GetZ(b)); +} + +PX_FORCE_INLINE Vec3V V3Zero() +{ + return V3Load(0.0f); +} + +PX_FORCE_INLINE Vec3V V3Eps() +{ + return V3Load(PX_EPS_REAL); +} +PX_FORCE_INLINE Vec3V V3One() +{ + return V3Load(1.0f); +} + +PX_FORCE_INLINE Vec3V V3Neg(const Vec3V f) +{ + ASSERT_ISVALIDVEC3V(f); + return _mm_sub_ps(_mm_setzero_ps(), f); +} + +PX_FORCE_INLINE Vec3V V3Add(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return _mm_add_ps(a, b); +} + +PX_FORCE_INLINE Vec3V V3Sub(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return _mm_sub_ps(a, b); +} + +PX_FORCE_INLINE Vec3V V3Scale(const Vec3V a, const FloatV b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDFLOATV(b); + return _mm_mul_ps(a, b); +} + +PX_FORCE_INLINE Vec3V V3Mul(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return _mm_mul_ps(a, b); +} + +PX_FORCE_INLINE Vec3V V3ScaleInv(const Vec3V a, const FloatV b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDFLOATV(b); + return _mm_div_ps(a, b); +} + +PX_FORCE_INLINE Vec3V V3Div(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return V4ClearW(_mm_div_ps(a, b)); +} + +PX_FORCE_INLINE Vec3V V3ScaleInvFast(const Vec3V a, const FloatV b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDFLOATV(b); + return _mm_mul_ps(a, _mm_rcp_ps(b)); +} + +PX_FORCE_INLINE Vec3V V3DivFast(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return V4ClearW(_mm_mul_ps(a, _mm_rcp_ps(b))); +} + +PX_FORCE_INLINE Vec3V V3Recip(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + const __m128 zero = V3Zero(); + const __m128 tttf = BTTTF(); + const __m128 recipA = _mm_div_ps(V3One(), a); + return V4Sel(tttf, recipA, zero); +} + +PX_FORCE_INLINE Vec3V V3RecipFast(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + const __m128 zero = V3Zero(); + const __m128 tttf = BTTTF(); + const __m128 recipA = _mm_rcp_ps(a); + return V4Sel(tttf, recipA, zero); +} + +PX_FORCE_INLINE Vec3V V3Rsqrt(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + const __m128 zero = V3Zero(); + const __m128 tttf = BTTTF(); + const __m128 recipA = _mm_div_ps(V3One(), _mm_sqrt_ps(a)); + return V4Sel(tttf, recipA, zero); +} + +PX_FORCE_INLINE Vec3V V3RsqrtFast(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + const __m128 zero = V3Zero(); + const __m128 tttf = BTTTF(); + const __m128 recipA = _mm_rsqrt_ps(a); + return V4Sel(tttf, recipA, zero); +} + +PX_FORCE_INLINE Vec3V V3ScaleAdd(const Vec3V a, const FloatV b, const Vec3V c) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDFLOATV(b); + ASSERT_ISVALIDVEC3V(c); + return V3Add(V3Scale(a, b), c); +} + +PX_FORCE_INLINE Vec3V V3NegScaleSub(const Vec3V a, const FloatV b, const Vec3V c) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDFLOATV(b); + ASSERT_ISVALIDVEC3V(c); + return V3Sub(c, V3Scale(a, b)); +} + +PX_FORCE_INLINE Vec3V V3MulAdd(const Vec3V a, const Vec3V b, const Vec3V c) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + ASSERT_ISVALIDVEC3V(c); + return V3Add(V3Mul(a, b), c); +} + +PX_FORCE_INLINE Vec3V V3NegMulSub(const Vec3V a, const Vec3V b, const Vec3V c) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + ASSERT_ISVALIDVEC3V(c); + return V3Sub(c, V3Mul(a, b)); +} + +PX_FORCE_INLINE Vec3V V3Abs(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + return V3Max(a, V3Neg(a)); +} + +PX_FORCE_INLINE FloatV V3Dot(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); +#ifdef __SSE4_2__ + return _mm_dp_ps(a, b, 0x7f); +#else + const __m128 t0 = _mm_mul_ps(a, b); // aw*bw | az*bz | ay*by | ax*bx + const __m128 t1 = _mm_shuffle_ps(t0, t0, _MM_SHUFFLE(1,0,3,2)); // ay*by | ax*bx | aw*bw | az*bz + const __m128 t2 = _mm_add_ps(t0, t1); // ay*by + aw*bw | ax*bx + az*bz | aw*bw + ay*by | az*bz + ax*bx + const __m128 t3 = _mm_shuffle_ps(t2, t2, _MM_SHUFFLE(2,3,0,1)); // ax*bx + az*bz | ay*by + aw*bw | az*bz + ax*bx | aw*bw + ay*by + return _mm_add_ps(t3, t2); // ax*bx + az*bz + ay*by + aw*bw + // ay*by + aw*bw + ax*bx + az*bz + // az*bz + ax*bx + aw*bw + ay*by + // aw*bw + ay*by + az*bz + ax*bx +#endif +} + +PX_FORCE_INLINE Vec3V V3Cross(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + const __m128 r1 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 1, 0, 2)); // z,x,y,w + const __m128 r2 = _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 0, 2, 1)); // y,z,x,w + const __m128 l1 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 0, 2, 1)); // y,z,x,w + const __m128 l2 = _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 1, 0, 2)); // z,x,y,w + return _mm_sub_ps(_mm_mul_ps(l1, l2), _mm_mul_ps(r1, r2)); +} + +PX_FORCE_INLINE VecCrossV V3PrepareCross(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + VecCrossV v; + v.mR1 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 1, 0, 2)); // z,x,y,w + v.mL1 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 0, 2, 1)); // y,z,x,w + return v; +} + +PX_FORCE_INLINE Vec3V V3Cross(const VecCrossV& a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(b); + const __m128 r2 = _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 0, 2, 1)); // y,z,x,w + const __m128 l2 = _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 1, 0, 2)); // z,x,y,w + return _mm_sub_ps(_mm_mul_ps(a.mL1, l2), _mm_mul_ps(a.mR1, r2)); +} + +PX_FORCE_INLINE Vec3V V3Cross(const Vec3V a, const VecCrossV& b) +{ + ASSERT_ISVALIDVEC3V(a); + const __m128 r2 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 0, 2, 1)); // y,z,x,w + const __m128 l2 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 1, 0, 2)); // z,x,y,w + return _mm_sub_ps(_mm_mul_ps(b.mR1, r2), _mm_mul_ps(b.mL1, l2)); +} + +PX_FORCE_INLINE Vec3V V3Cross(const VecCrossV& a, const VecCrossV& b) +{ + return _mm_sub_ps(_mm_mul_ps(a.mL1, b.mR1), _mm_mul_ps(a.mR1, b.mL1)); +} + +PX_FORCE_INLINE FloatV V3Length(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + return _mm_sqrt_ps(V3Dot(a, a)); +} + +PX_FORCE_INLINE FloatV V3LengthSq(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + return V3Dot(a, a); +} + +PX_FORCE_INLINE Vec3V V3Normalize(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISFINITELENGTH(a); + return V3ScaleInv(a, _mm_sqrt_ps(V3Dot(a, a))); +} + +PX_FORCE_INLINE Vec3V V3NormalizeFast(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISFINITELENGTH(a); + return V3Scale(a, _mm_rsqrt_ps(V3Dot(a, a))); +} + +PX_FORCE_INLINE Vec3V V3NormalizeSafe(const Vec3V a, const Vec3V unsafeReturnValue) +{ + ASSERT_ISVALIDVEC3V(a); + const __m128 eps = V3Eps(); + const __m128 length = V3Length(a); + const __m128 isGreaterThanZero = FIsGrtr(length, eps); + return V3Sel(isGreaterThanZero, V3ScaleInv(a, length), unsafeReturnValue); +} + +PX_FORCE_INLINE Vec3V V3Sel(const BoolV c, const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(_mm_or_ps(_mm_andnot_ps(c, b), _mm_and_ps(c, a))); + return _mm_or_ps(_mm_andnot_ps(c, b), _mm_and_ps(c, a)); +} + +PX_FORCE_INLINE BoolV V3IsGrtr(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return _mm_cmpgt_ps(a, b); +} + +PX_FORCE_INLINE BoolV V3IsGrtrOrEq(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return _mm_cmpge_ps(a, b); +} + +PX_FORCE_INLINE BoolV V3IsEq(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return _mm_cmpeq_ps(a, b); +} + +PX_FORCE_INLINE Vec3V V3Max(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return _mm_max_ps(a, b); +} + +PX_FORCE_INLINE Vec3V V3Min(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return _mm_min_ps(a, b); +} + +PX_FORCE_INLINE FloatV V3ExtractMax(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + const __m128 shuf1 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 0, 0, 0)); + const __m128 shuf2 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1)); + const __m128 shuf3 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 2, 2, 2)); + + return _mm_max_ps(_mm_max_ps(shuf1, shuf2), shuf3); +} + +PX_FORCE_INLINE FloatV V3ExtractMin(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + + const __m128 shuf1 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 0, 0, 0)); + const __m128 shuf2 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1)); + const __m128 shuf3 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 2, 2, 2)); + + return _mm_min_ps(_mm_min_ps(shuf1, shuf2), shuf3); +} + +// return (a >= 0.0f) ? 1.0f : -1.0f; +PX_FORCE_INLINE Vec3V V3Sign(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + const __m128 zero = V3Zero(); + const __m128 one = V3One(); + const __m128 none = V3Neg(one); + return V3Sel(V3IsGrtrOrEq(a, zero), one, none); +} + +PX_FORCE_INLINE Vec3V V3Clamp(const Vec3V a, const Vec3V minV, const Vec3V maxV) +{ + ASSERT_ISVALIDVEC3V(maxV); + ASSERT_ISVALIDVEC3V(minV); + return V3Max(V3Min(a, maxV), minV); +} + +PX_FORCE_INLINE PxU32 V3AllGrtr(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return internalUnitSSE2Simd::BAllTrue3_R(V4IsGrtr(a, b)); +} + +PX_FORCE_INLINE PxU32 V3AllGrtrOrEq(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return internalUnitSSE2Simd::BAllTrue3_R(V4IsGrtrOrEq(a, b)); +} + +PX_FORCE_INLINE PxU32 V3AllEq(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return internalUnitSSE2Simd::BAllTrue3_R(V4IsEq(a, b)); +} + +PX_FORCE_INLINE Vec3V V3Round(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); +#ifdef __SSE4_2__ + return _mm_round_ps(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +#else + // return _mm_round_ps(a, 0x0); + const Vec3V half = V3Load(0.5f); + const __m128 signBit = _mm_cvtepi32_ps(_mm_srli_epi32(_mm_cvtps_epi32(a), 31)); + const Vec3V aRound = V3Sub(V3Add(a, half), signBit); + __m128i tmp = _mm_cvttps_epi32(aRound); + return _mm_cvtepi32_ps(tmp); +#endif +} + +PX_FORCE_INLINE Vec3V V3Sin(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + // Modulo the range of the given angles such that -XM_2PI <= Angles < XM_2PI + const Vec4V recipTwoPi = V4LoadA(g_PXReciprocalTwoPi.f); + const Vec4V twoPi = V4LoadA(g_PXTwoPi.f); + const Vec3V tmp = V3Scale(a, recipTwoPi); + const Vec3V b = V3Round(tmp); + const Vec3V V1 = V3NegScaleSub(b, twoPi, a); + + // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - + // V^15 / 15! + V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI) + const Vec3V V2 = V3Mul(V1, V1); + const Vec3V V3 = V3Mul(V2, V1); + const Vec3V V5 = V3Mul(V3, V2); + const Vec3V V7 = V3Mul(V5, V2); + const Vec3V V9 = V3Mul(V7, V2); + const Vec3V V11 = V3Mul(V9, V2); + const Vec3V V13 = V3Mul(V11, V2); + const Vec3V V15 = V3Mul(V13, V2); + const Vec3V V17 = V3Mul(V15, V2); + const Vec3V V19 = V3Mul(V17, V2); + const Vec3V V21 = V3Mul(V19, V2); + const Vec3V V23 = V3Mul(V21, V2); + + const Vec4V sinCoefficients0 = V4LoadA(g_PXSinCoefficients0.f); + const Vec4V sinCoefficients1 = V4LoadA(g_PXSinCoefficients1.f); + const Vec4V sinCoefficients2 = V4LoadA(g_PXSinCoefficients2.f); + + const FloatV S1 = V4GetY(sinCoefficients0); + const FloatV S2 = V4GetZ(sinCoefficients0); + const FloatV S3 = V4GetW(sinCoefficients0); + const FloatV S4 = V4GetX(sinCoefficients1); + const FloatV S5 = V4GetY(sinCoefficients1); + const FloatV S6 = V4GetZ(sinCoefficients1); + const FloatV S7 = V4GetW(sinCoefficients1); + const FloatV S8 = V4GetX(sinCoefficients2); + const FloatV S9 = V4GetY(sinCoefficients2); + const FloatV S10 = V4GetZ(sinCoefficients2); + const FloatV S11 = V4GetW(sinCoefficients2); + + Vec3V Result; + Result = V3ScaleAdd(V3, S1, V1); + Result = V3ScaleAdd(V5, S2, Result); + Result = V3ScaleAdd(V7, S3, Result); + Result = V3ScaleAdd(V9, S4, Result); + Result = V3ScaleAdd(V11, S5, Result); + Result = V3ScaleAdd(V13, S6, Result); + Result = V3ScaleAdd(V15, S7, Result); + Result = V3ScaleAdd(V17, S8, Result); + Result = V3ScaleAdd(V19, S9, Result); + Result = V3ScaleAdd(V21, S10, Result); + Result = V3ScaleAdd(V23, S11, Result); + + ASSERT_ISVALIDVEC3V(Result); + return Result; +} + +PX_FORCE_INLINE Vec3V V3Cos(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + + // Modulo the range of the given angles such that -XM_2PI <= Angles < XM_2PI + const Vec4V recipTwoPi = V4LoadA(g_PXReciprocalTwoPi.f); + const Vec4V twoPi = V4LoadA(g_PXTwoPi.f); + const Vec3V tmp = V3Scale(a, recipTwoPi); + const Vec3V b = V3Round(tmp); + const Vec3V V1 = V3NegScaleSub(b, twoPi, a); + + // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + V^12 / 12! - + // V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI) + const Vec3V V2 = V3Mul(V1, V1); + const Vec3V V4 = V3Mul(V2, V2); + const Vec3V V6 = V3Mul(V4, V2); + const Vec3V V8 = V3Mul(V4, V4); + const Vec3V V10 = V3Mul(V6, V4); + const Vec3V V12 = V3Mul(V6, V6); + const Vec3V V14 = V3Mul(V8, V6); + const Vec3V V16 = V3Mul(V8, V8); + const Vec3V V18 = V3Mul(V10, V8); + const Vec3V V20 = V3Mul(V10, V10); + const Vec3V V22 = V3Mul(V12, V10); + + const Vec4V cosCoefficients0 = V4LoadA(g_PXCosCoefficients0.f); + const Vec4V cosCoefficients1 = V4LoadA(g_PXCosCoefficients1.f); + const Vec4V cosCoefficients2 = V4LoadA(g_PXCosCoefficients2.f); + + const FloatV C1 = V4GetY(cosCoefficients0); + const FloatV C2 = V4GetZ(cosCoefficients0); + const FloatV C3 = V4GetW(cosCoefficients0); + const FloatV C4 = V4GetX(cosCoefficients1); + const FloatV C5 = V4GetY(cosCoefficients1); + const FloatV C6 = V4GetZ(cosCoefficients1); + const FloatV C7 = V4GetW(cosCoefficients1); + const FloatV C8 = V4GetX(cosCoefficients2); + const FloatV C9 = V4GetY(cosCoefficients2); + const FloatV C10 = V4GetZ(cosCoefficients2); + const FloatV C11 = V4GetW(cosCoefficients2); + + Vec3V Result; + Result = V3ScaleAdd(V2, C1, V3One()); + Result = V3ScaleAdd(V4, C2, Result); + Result = V3ScaleAdd(V6, C3, Result); + Result = V3ScaleAdd(V8, C4, Result); + Result = V3ScaleAdd(V10, C5, Result); + Result = V3ScaleAdd(V12, C6, Result); + Result = V3ScaleAdd(V14, C7, Result); + Result = V3ScaleAdd(V16, C8, Result); + Result = V3ScaleAdd(V18, C9, Result); + Result = V3ScaleAdd(V20, C10, Result); + Result = V3ScaleAdd(V22, C11, Result); + + ASSERT_ISVALIDVEC3V(Result); + return Result; +} + +PX_FORCE_INLINE Vec3V V3PermYZZ(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + return _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 2, 2, 1)); +} + +PX_FORCE_INLINE Vec3V V3PermXYX(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + return _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 0, 1, 0)); +} + +PX_FORCE_INLINE Vec3V V3PermYZX(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + return _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 0, 2, 1)); +} + +PX_FORCE_INLINE Vec3V V3PermZXY(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + return _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 1, 0, 2)); +} + +PX_FORCE_INLINE Vec3V V3PermZZY(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + return _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 1, 2, 2)); +} + +PX_FORCE_INLINE Vec3V V3PermYXX(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + return _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 0, 0, 1)); +} + +PX_FORCE_INLINE Vec3V V3Perm_Zero_1Z_0Y(const Vec3V v0, const Vec3V v1) +{ + ASSERT_ISVALIDVEC3V(v0); + ASSERT_ISVALIDVEC3V(v1); + return _mm_shuffle_ps(v1, v0, _MM_SHUFFLE(3, 1, 2, 3)); +} + +PX_FORCE_INLINE Vec3V V3Perm_0Z_Zero_1X(const Vec3V v0, const Vec3V v1) +{ + ASSERT_ISVALIDVEC3V(v0); + ASSERT_ISVALIDVEC3V(v1); + return _mm_shuffle_ps(v0, v1, _MM_SHUFFLE(3, 0, 3, 2)); +} + +PX_FORCE_INLINE Vec3V V3Perm_1Y_0X_Zero(const Vec3V v0, const Vec3V v1) +{ + ASSERT_ISVALIDVEC3V(v0); + ASSERT_ISVALIDVEC3V(v1); + // There must be a better way to do this. + Vec3V v2 = V3Zero(); + FloatV y1 = V3GetY(v1); + FloatV x0 = V3GetX(v0); + v2 = V3SetX(v2, y1); + return V3SetY(v2, x0); +} + +PX_FORCE_INLINE FloatV V3SumElems(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); +#ifdef __SSE4_2__ + Vec3V r = _mm_hadd_ps(a, a); + r = _mm_hadd_ps(r, r); + return r; +#else + __m128 shuf1 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 0, 0, 0)); // z,y,x,w + __m128 shuf2 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1)); // y,x,w,z + __m128 shuf3 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 2, 2, 2)); // x,w,z,y + return _mm_add_ps(_mm_add_ps(shuf1, shuf2), shuf3); +#endif +} + +PX_FORCE_INLINE PxU32 V3OutOfBounds(const Vec3V a, const Vec3V min, const Vec3V max) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(min); + ASSERT_ISVALIDVEC3V(max); + const BoolV c = BOr(V3IsGrtr(a, max), V3IsGrtr(min, a)); + return !BAllEqFFFF(c); +} + +PX_FORCE_INLINE PxU32 V3InBounds(const Vec3V a, const Vec3V min, const Vec3V max) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(min); + ASSERT_ISVALIDVEC3V(max); + const BoolV c = BAnd(V3IsGrtrOrEq(a, min), V3IsGrtrOrEq(max, a)); + return BAllEqTTTT(c); +} + +PX_FORCE_INLINE PxU32 V3OutOfBounds(const Vec3V a, const Vec3V bounds) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(bounds); + return V3OutOfBounds(a, V3Neg(bounds), bounds); +} + +PX_FORCE_INLINE PxU32 V3InBounds(const Vec3V a, const Vec3V bounds) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(bounds) + return V3InBounds(a, V3Neg(bounds), bounds); +} + +PX_FORCE_INLINE void V3Transpose(Vec3V& col0, Vec3V& col1, Vec3V& col2) +{ + ASSERT_ISVALIDVEC3V(col0); + ASSERT_ISVALIDVEC3V(col1); + ASSERT_ISVALIDVEC3V(col2); + + const Vec3V col3 = _mm_setzero_ps(); + Vec3V tmp0 = _mm_unpacklo_ps(col0, col1); + Vec3V tmp2 = _mm_unpacklo_ps(col2, col3); + Vec3V tmp1 = _mm_unpackhi_ps(col0, col1); + Vec3V tmp3 = _mm_unpackhi_ps(col2, col3); + col0 = _mm_movelh_ps(tmp0, tmp2); + col1 = _mm_movehl_ps(tmp2, tmp0); + col2 = _mm_movelh_ps(tmp1, tmp3); +} + +////////////////////////////////// +// VEC4V +////////////////////////////////// + +PX_FORCE_INLINE Vec4V V4Splat(const FloatV f) +{ + ASSERT_ISVALIDFLOATV(f); + // return _mm_shuffle_ps(f, f, _MM_SHUFFLE(0,0,0,0)); + return f; +} + +PX_FORCE_INLINE Vec4V V4Merge(const FloatV* const floatVArray) +{ + ASSERT_ISVALIDFLOATV(floatVArray[0]); + ASSERT_ISVALIDFLOATV(floatVArray[1]); + ASSERT_ISVALIDFLOATV(floatVArray[2]); + ASSERT_ISVALIDFLOATV(floatVArray[3]); + const __m128 xw = _mm_move_ss(floatVArray[1], floatVArray[0]); // y, y, y, x + const __m128 yz = _mm_move_ss(floatVArray[2], floatVArray[3]); // z, z, z, w + return _mm_shuffle_ps(xw, yz, _MM_SHUFFLE(0, 2, 1, 0)); +} + +PX_FORCE_INLINE Vec4V V4Merge(const FloatVArg x, const FloatVArg y, const FloatVArg z, const FloatVArg w) +{ + ASSERT_ISVALIDFLOATV(x); + ASSERT_ISVALIDFLOATV(y); + ASSERT_ISVALIDFLOATV(z); + ASSERT_ISVALIDFLOATV(w); + const __m128 xw = _mm_move_ss(y, x); // y, y, y, x + const __m128 yz = _mm_move_ss(z, w); // z, z, z, w + return _mm_shuffle_ps(xw, yz, _MM_SHUFFLE(0, 2, 1, 0)); +} + +PX_FORCE_INLINE Vec4V V4MergeW(const Vec4VArg x, const Vec4VArg y, const Vec4VArg z, const Vec4VArg w) +{ + const Vec4V xz = _mm_unpackhi_ps(x, z); + const Vec4V yw = _mm_unpackhi_ps(y, w); + return _mm_unpackhi_ps(xz, yw); +} + +PX_FORCE_INLINE Vec4V V4MergeZ(const Vec4VArg x, const Vec4VArg y, const Vec4VArg z, const Vec4VArg w) +{ + const Vec4V xz = _mm_unpackhi_ps(x, z); + const Vec4V yw = _mm_unpackhi_ps(y, w); + return _mm_unpacklo_ps(xz, yw); +} + +PX_FORCE_INLINE Vec4V V4MergeY(const Vec4VArg x, const Vec4VArg y, const Vec4VArg z, const Vec4VArg w) +{ + const Vec4V xz = _mm_unpacklo_ps(x, z); + const Vec4V yw = _mm_unpacklo_ps(y, w); + return _mm_unpackhi_ps(xz, yw); +} + +PX_FORCE_INLINE Vec4V V4MergeX(const Vec4VArg x, const Vec4VArg y, const Vec4VArg z, const Vec4VArg w) +{ + const Vec4V xz = _mm_unpacklo_ps(x, z); + const Vec4V yw = _mm_unpacklo_ps(y, w); + return _mm_unpacklo_ps(xz, yw); +} + +PX_FORCE_INLINE Vec4V V4UnpackXY(const Vec4VArg a, const Vec4VArg b) +{ + return _mm_unpacklo_ps(a, b); +} + +PX_FORCE_INLINE Vec4V V4UnpackZW(const Vec4VArg a, const Vec4VArg b) +{ + return _mm_unpackhi_ps(a, b); +} + +PX_FORCE_INLINE Vec4V V4UnitW() +{ + const PX_ALIGN(16, PxF32) w[4] = { 0.0f, 0.0f, 0.0f, 1.0f }; + const __m128 w128 = _mm_load_ps(w); + return w128; +} + +PX_FORCE_INLINE Vec4V V4UnitX() +{ + const PX_ALIGN(16, PxF32) x[4] = { 1.0f, 0.0f, 0.0f, 0.0f }; + const __m128 x128 = _mm_load_ps(x); + return x128; +} + +PX_FORCE_INLINE Vec4V V4UnitY() +{ + const PX_ALIGN(16, PxF32) y[4] = { 0.0f, 1.0f, 0.0f, 0.0f }; + const __m128 y128 = _mm_load_ps(y); + return y128; +} + +PX_FORCE_INLINE Vec4V V4UnitZ() +{ + const PX_ALIGN(16, PxF32) z[4] = { 0.0f, 0.0f, 1.0f, 0.0f }; + const __m128 z128 = _mm_load_ps(z); + return z128; +} + +PX_FORCE_INLINE FloatV V4GetW(const Vec4V f) +{ + return _mm_shuffle_ps(f, f, _MM_SHUFFLE(3, 3, 3, 3)); +} + +PX_FORCE_INLINE FloatV V4GetX(const Vec4V f) +{ + return _mm_shuffle_ps(f, f, _MM_SHUFFLE(0, 0, 0, 0)); +} + +PX_FORCE_INLINE FloatV V4GetY(const Vec4V f) +{ + return _mm_shuffle_ps(f, f, _MM_SHUFFLE(1, 1, 1, 1)); +} + +PX_FORCE_INLINE FloatV V4GetZ(const Vec4V f) +{ + return _mm_shuffle_ps(f, f, _MM_SHUFFLE(2, 2, 2, 2)); +} + +PX_FORCE_INLINE Vec4V V4SetW(const Vec4V v, const FloatV f) +{ + ASSERT_ISVALIDFLOATV(f); + return V4Sel(BTTTF(), v, f); +} + +PX_FORCE_INLINE Vec4V V4SetX(const Vec4V v, const FloatV f) +{ + ASSERT_ISVALIDFLOATV(f); + return V4Sel(BFTTT(), v, f); +} + +PX_FORCE_INLINE Vec4V V4SetY(const Vec4V v, const FloatV f) +{ + ASSERT_ISVALIDFLOATV(f); + return V4Sel(BTFTT(), v, f); +} + +PX_FORCE_INLINE Vec4V V4SetZ(const Vec4V v, const FloatV f) +{ + ASSERT_ISVALIDFLOATV(f); + return V4Sel(BTTFT(), v, f); +} + +PX_FORCE_INLINE Vec4V V4ClearW(const Vec4V v) +{ +#if !PX_EMSCRIPTEN + return _mm_and_ps(v, V4LoadA(internalUnitSSE2Simd::gMaskXYZ)); +#else + return _mm_and_ps(v, (VecI32V&)internalUnitSSE2Simd::gMaskXYZ); +#endif +} + +PX_FORCE_INLINE Vec4V V4PermYXWZ(const Vec4V a) +{ + return _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 3, 0, 1)); +} + +PX_FORCE_INLINE Vec4V V4PermXZXZ(const Vec4V a) +{ + return _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 0, 2, 0)); +} + +PX_FORCE_INLINE Vec4V V4PermYWYW(const Vec4V a) +{ + return _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 1, 3, 1)); +} + +PX_FORCE_INLINE Vec4V V4PermYZXW(const Vec4V a) +{ + return _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 0, 2, 1)); +} + +template <PxU8 x, PxU8 y, PxU8 z, PxU8 w> +PX_FORCE_INLINE Vec4V V4Perm(const Vec4V a) +{ + return _mm_shuffle_ps(a, a, _MM_SHUFFLE(w, z, y, x)); +} + +PX_FORCE_INLINE Vec4V V4Zero() +{ + return V4Load(0.0f); +} + +PX_FORCE_INLINE Vec4V V4One() +{ + return V4Load(1.0f); +} + +PX_FORCE_INLINE Vec4V V4Eps() +{ + return V4Load(PX_EPS_REAL); +} + +PX_FORCE_INLINE Vec4V V4Neg(const Vec4V f) +{ + return _mm_sub_ps(_mm_setzero_ps(), f); +} + +PX_FORCE_INLINE Vec4V V4Add(const Vec4V a, const Vec4V b) +{ + return _mm_add_ps(a, b); +} + +PX_FORCE_INLINE Vec4V V4Sub(const Vec4V a, const Vec4V b) +{ + return _mm_sub_ps(a, b); +} + +PX_FORCE_INLINE Vec4V V4Scale(const Vec4V a, const FloatV b) +{ + return _mm_mul_ps(a, b); +} + +PX_FORCE_INLINE Vec4V V4Mul(const Vec4V a, const Vec4V b) +{ + return _mm_mul_ps(a, b); +} + +PX_FORCE_INLINE Vec4V V4ScaleInv(const Vec4V a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(b); + return _mm_div_ps(a, b); +} + +PX_FORCE_INLINE Vec4V V4Div(const Vec4V a, const Vec4V b) +{ + return _mm_div_ps(a, b); +} + +PX_FORCE_INLINE Vec4V V4ScaleInvFast(const Vec4V a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(b); + return _mm_mul_ps(a, _mm_rcp_ps(b)); +} + +PX_FORCE_INLINE Vec4V V4DivFast(const Vec4V a, const Vec4V b) +{ + return _mm_mul_ps(a, _mm_rcp_ps(b)); +} + +PX_FORCE_INLINE Vec4V V4Recip(const Vec4V a) +{ + return _mm_div_ps(V4One(), a); +} + +PX_FORCE_INLINE Vec4V V4RecipFast(const Vec4V a) +{ + return _mm_rcp_ps(a); +} + +PX_FORCE_INLINE Vec4V V4Rsqrt(const Vec4V a) +{ + return _mm_div_ps(V4One(), _mm_sqrt_ps(a)); +} + +PX_FORCE_INLINE Vec4V V4RsqrtFast(const Vec4V a) +{ + return _mm_rsqrt_ps(a); +} + +PX_FORCE_INLINE Vec4V V4Sqrt(const Vec4V a) +{ + return _mm_sqrt_ps(a); +} + +PX_FORCE_INLINE Vec4V V4ScaleAdd(const Vec4V a, const FloatV b, const Vec4V c) +{ + ASSERT_ISVALIDFLOATV(b); + return V4Add(V4Scale(a, b), c); +} + +PX_FORCE_INLINE Vec4V V4NegScaleSub(const Vec4V a, const FloatV b, const Vec4V c) +{ + ASSERT_ISVALIDFLOATV(b); + return V4Sub(c, V4Scale(a, b)); +} + +PX_FORCE_INLINE Vec4V V4MulAdd(const Vec4V a, const Vec4V b, const Vec4V c) +{ + return V4Add(V4Mul(a, b), c); +} + +PX_FORCE_INLINE Vec4V V4NegMulSub(const Vec4V a, const Vec4V b, const Vec4V c) +{ + return V4Sub(c, V4Mul(a, b)); +} + +PX_FORCE_INLINE Vec4V V4Abs(const Vec4V a) +{ + return V4Max(a, V4Neg(a)); +} + +PX_FORCE_INLINE FloatV V4SumElements(const Vec4V a) +{ +#ifdef __SSE4_2__ + Vec4V r = _mm_hadd_ps(a, a); + r = _mm_hadd_ps(r, r); + return r; +#else + const Vec4V xy = V4UnpackXY(a, a); // x,x,y,y + const Vec4V zw = V4UnpackZW(a, a); // z,z,w,w + const Vec4V xz_yw = V4Add(xy, zw); // x+z,x+z,y+w,y+w + const FloatV xz = V4GetX(xz_yw); // x+z + const FloatV yw = V4GetZ(xz_yw); // y+w + return FAdd(xz, yw); // sum +#endif +} + +PX_FORCE_INLINE FloatV V4Dot(const Vec4V a, const Vec4V b) +{ +#ifdef __SSE4_2__ + return _mm_dp_ps(a, b, 0xff); +#else + const __m128 dot1 = _mm_mul_ps(a, b); // x,y,z,w + const __m128 shuf1 = _mm_shuffle_ps(dot1, dot1, _MM_SHUFFLE(2, 1, 0, 3)); // w,x,y,z + const __m128 shuf2 = _mm_shuffle_ps(dot1, dot1, _MM_SHUFFLE(1, 0, 3, 2)); // z,w,x,y + const __m128 shuf3 = _mm_shuffle_ps(dot1, dot1, _MM_SHUFFLE(0, 3, 2, 1)); // y,z,w,x + return _mm_add_ps(_mm_add_ps(shuf2, shuf3), _mm_add_ps(dot1, shuf1)); +#endif +} + +PX_FORCE_INLINE FloatV V4Dot3(const Vec4V a, const Vec4V b) +{ +#ifdef __SSE4_2__ + return _mm_dp_ps(a, b, 0x7f); +#else + const __m128 dot1 = _mm_mul_ps(a, b); // w,z,y,x + const __m128 shuf1 = _mm_shuffle_ps(dot1, dot1, _MM_SHUFFLE(0, 0, 0, 0)); // z,y,x,w + const __m128 shuf2 = _mm_shuffle_ps(dot1, dot1, _MM_SHUFFLE(1, 1, 1, 1)); // y,x,w,z + const __m128 shuf3 = _mm_shuffle_ps(dot1, dot1, _MM_SHUFFLE(2, 2, 2, 2)); // x,w,z,y + return _mm_add_ps(_mm_add_ps(shuf1, shuf2), shuf3); +#endif +} + +PX_FORCE_INLINE Vec4V V4Cross(const Vec4V a, const Vec4V b) +{ + const __m128 r1 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 1, 0, 2)); // z,x,y,w + const __m128 r2 = _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 0, 2, 1)); // y,z,x,w + const __m128 l1 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 0, 2, 1)); // y,z,x,w + const __m128 l2 = _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 1, 0, 2)); // z,x,y,w + return _mm_sub_ps(_mm_mul_ps(l1, l2), _mm_mul_ps(r1, r2)); +} + +PX_FORCE_INLINE FloatV V4Length(const Vec4V a) +{ + return _mm_sqrt_ps(V4Dot(a, a)); +} + +PX_FORCE_INLINE FloatV V4LengthSq(const Vec4V a) +{ + return V4Dot(a, a); +} + +PX_FORCE_INLINE Vec4V V4Normalize(const Vec4V a) +{ + ASSERT_ISFINITELENGTH(a); + return V4ScaleInv(a, _mm_sqrt_ps(V4Dot(a, a))); +} + +PX_FORCE_INLINE Vec4V V4NormalizeFast(const Vec4V a) +{ + ASSERT_ISFINITELENGTH(a); + return V4ScaleInvFast(a, _mm_sqrt_ps(V4Dot(a, a))); +} + +PX_FORCE_INLINE Vec4V V4NormalizeSafe(const Vec4V a, const Vec3V unsafeReturnValue) +{ + const __m128 eps = V3Eps(); + const __m128 length = V4Length(a); + const __m128 isGreaterThanZero = V4IsGrtr(length, eps); + return V4Sel(isGreaterThanZero, V4ScaleInv(a, length), unsafeReturnValue); +} + +PX_FORCE_INLINE BoolV V4IsEqU32(const VecU32V a, const VecU32V b) +{ + return m128_I2F(_mm_cmpeq_epi32(m128_F2I(a), m128_F2I(b))); +} + +PX_FORCE_INLINE Vec4V V4Sel(const BoolV c, const Vec4V a, const Vec4V b) +{ + return _mm_or_ps(_mm_andnot_ps(c, b), _mm_and_ps(c, a)); +} + +PX_FORCE_INLINE BoolV V4IsGrtr(const Vec4V a, const Vec4V b) +{ + return _mm_cmpgt_ps(a, b); +} + +PX_FORCE_INLINE BoolV V4IsGrtrOrEq(const Vec4V a, const Vec4V b) +{ + return _mm_cmpge_ps(a, b); +} + +PX_FORCE_INLINE BoolV V4IsEq(const Vec4V a, const Vec4V b) +{ + return _mm_cmpeq_ps(a, b); +} + +PX_FORCE_INLINE Vec4V V4Max(const Vec4V a, const Vec4V b) +{ + return _mm_max_ps(a, b); +} + +PX_FORCE_INLINE Vec4V V4Min(const Vec4V a, const Vec4V b) +{ + return _mm_min_ps(a, b); +} + +PX_FORCE_INLINE FloatV V4ExtractMax(const Vec4V a) +{ + const __m128 shuf1 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 1, 0, 3)); + const __m128 shuf2 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 0, 3, 2)); + const __m128 shuf3 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 3, 2, 1)); + + return _mm_max_ps(_mm_max_ps(a, shuf1), _mm_max_ps(shuf2, shuf3)); +} + +PX_FORCE_INLINE FloatV V4ExtractMin(const Vec4V a) +{ + const __m128 shuf1 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 1, 0, 3)); + const __m128 shuf2 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 0, 3, 2)); + const __m128 shuf3 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 3, 2, 1)); + + return _mm_min_ps(_mm_min_ps(a, shuf1), _mm_min_ps(shuf2, shuf3)); +} + +PX_FORCE_INLINE Vec4V V4Clamp(const Vec4V a, const Vec4V minV, const Vec4V maxV) +{ + return V4Max(V4Min(a, maxV), minV); +} + +PX_FORCE_INLINE PxU32 V4AllGrtr(const Vec4V a, const Vec4V b) +{ + return internalUnitSSE2Simd::BAllTrue4_R(V4IsGrtr(a, b)); +} + +PX_FORCE_INLINE PxU32 V4AllGrtrOrEq(const Vec4V a, const Vec4V b) +{ + return internalUnitSSE2Simd::BAllTrue4_R(V4IsGrtrOrEq(a, b)); +} + +PX_FORCE_INLINE PxU32 V4AllGrtrOrEq3(const Vec4V a, const Vec4V b) +{ + return internalUnitSSE2Simd::BAllTrue3_R(V4IsGrtrOrEq(a, b)); +} + +PX_FORCE_INLINE PxU32 V4AllEq(const Vec4V a, const Vec4V b) +{ + return internalUnitSSE2Simd::BAllTrue4_R(V4IsEq(a, b)); +} + +PX_FORCE_INLINE PxU32 V4AnyGrtr3(const Vec4V a, const Vec4V b) +{ + return internalUnitSSE2Simd::BAnyTrue3_R(V4IsGrtr(a, b)); +} + +PX_FORCE_INLINE Vec4V V4Round(const Vec4V a) +{ +#ifdef __SSE4_2__ + return _mm_round_ps(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); +#else + // return _mm_round_ps(a, 0x0); + const Vec4V half = V4Load(0.5f); + const __m128 signBit = _mm_cvtepi32_ps(_mm_srli_epi32(_mm_cvtps_epi32(a), 31)); + const Vec4V aRound = V4Sub(V4Add(a, half), signBit); + __m128i tmp = _mm_cvttps_epi32(aRound); + return _mm_cvtepi32_ps(tmp); +#endif +} + +PX_FORCE_INLINE Vec4V V4Sin(const Vec4V a) +{ + const Vec4V recipTwoPi = V4LoadA(g_PXReciprocalTwoPi.f); + const Vec4V twoPi = V4LoadA(g_PXTwoPi.f); + const Vec4V tmp = V4Mul(a, recipTwoPi); + const Vec4V b = V4Round(tmp); + const Vec4V V1 = V4NegMulSub(twoPi, b, a); + + // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - + // V^15 / 15! + V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI) + const Vec4V V2 = V4Mul(V1, V1); + const Vec4V V3 = V4Mul(V2, V1); + const Vec4V V5 = V4Mul(V3, V2); + const Vec4V V7 = V4Mul(V5, V2); + const Vec4V V9 = V4Mul(V7, V2); + const Vec4V V11 = V4Mul(V9, V2); + const Vec4V V13 = V4Mul(V11, V2); + const Vec4V V15 = V4Mul(V13, V2); + const Vec4V V17 = V4Mul(V15, V2); + const Vec4V V19 = V4Mul(V17, V2); + const Vec4V V21 = V4Mul(V19, V2); + const Vec4V V23 = V4Mul(V21, V2); + + const Vec4V sinCoefficients0 = V4LoadA(g_PXSinCoefficients0.f); + const Vec4V sinCoefficients1 = V4LoadA(g_PXSinCoefficients1.f); + const Vec4V sinCoefficients2 = V4LoadA(g_PXSinCoefficients2.f); + + const FloatV S1 = V4GetY(sinCoefficients0); + const FloatV S2 = V4GetZ(sinCoefficients0); + const FloatV S3 = V4GetW(sinCoefficients0); + const FloatV S4 = V4GetX(sinCoefficients1); + const FloatV S5 = V4GetY(sinCoefficients1); + const FloatV S6 = V4GetZ(sinCoefficients1); + const FloatV S7 = V4GetW(sinCoefficients1); + const FloatV S8 = V4GetX(sinCoefficients2); + const FloatV S9 = V4GetY(sinCoefficients2); + const FloatV S10 = V4GetZ(sinCoefficients2); + const FloatV S11 = V4GetW(sinCoefficients2); + + Vec4V Result; + Result = V4MulAdd(S1, V3, V1); + Result = V4MulAdd(S2, V5, Result); + Result = V4MulAdd(S3, V7, Result); + Result = V4MulAdd(S4, V9, Result); + Result = V4MulAdd(S5, V11, Result); + Result = V4MulAdd(S6, V13, Result); + Result = V4MulAdd(S7, V15, Result); + Result = V4MulAdd(S8, V17, Result); + Result = V4MulAdd(S9, V19, Result); + Result = V4MulAdd(S10, V21, Result); + Result = V4MulAdd(S11, V23, Result); + + return Result; +} + +PX_FORCE_INLINE Vec4V V4Cos(const Vec4V a) +{ + const Vec4V recipTwoPi = V4LoadA(g_PXReciprocalTwoPi.f); + const Vec4V twoPi = V4LoadA(g_PXTwoPi.f); + const Vec4V tmp = V4Mul(a, recipTwoPi); + const Vec4V b = V4Round(tmp); + const Vec4V V1 = V4NegMulSub(twoPi, b, a); + + // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + V^12 / 12! - + // V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI) + const Vec4V V2 = V4Mul(V1, V1); + const Vec4V V4 = V4Mul(V2, V2); + const Vec4V V6 = V4Mul(V4, V2); + const Vec4V V8 = V4Mul(V4, V4); + const Vec4V V10 = V4Mul(V6, V4); + const Vec4V V12 = V4Mul(V6, V6); + const Vec4V V14 = V4Mul(V8, V6); + const Vec4V V16 = V4Mul(V8, V8); + const Vec4V V18 = V4Mul(V10, V8); + const Vec4V V20 = V4Mul(V10, V10); + const Vec4V V22 = V4Mul(V12, V10); + + const Vec4V cosCoefficients0 = V4LoadA(g_PXCosCoefficients0.f); + const Vec4V cosCoefficients1 = V4LoadA(g_PXCosCoefficients1.f); + const Vec4V cosCoefficients2 = V4LoadA(g_PXCosCoefficients2.f); + + const FloatV C1 = V4GetY(cosCoefficients0); + const FloatV C2 = V4GetZ(cosCoefficients0); + const FloatV C3 = V4GetW(cosCoefficients0); + const FloatV C4 = V4GetX(cosCoefficients1); + const FloatV C5 = V4GetY(cosCoefficients1); + const FloatV C6 = V4GetZ(cosCoefficients1); + const FloatV C7 = V4GetW(cosCoefficients1); + const FloatV C8 = V4GetX(cosCoefficients2); + const FloatV C9 = V4GetY(cosCoefficients2); + const FloatV C10 = V4GetZ(cosCoefficients2); + const FloatV C11 = V4GetW(cosCoefficients2); + + Vec4V Result; + Result = V4MulAdd(C1, V2, V4One()); + Result = V4MulAdd(C2, V4, Result); + Result = V4MulAdd(C3, V6, Result); + Result = V4MulAdd(C4, V8, Result); + Result = V4MulAdd(C5, V10, Result); + Result = V4MulAdd(C6, V12, Result); + Result = V4MulAdd(C7, V14, Result); + Result = V4MulAdd(C8, V16, Result); + Result = V4MulAdd(C9, V18, Result); + Result = V4MulAdd(C10, V20, Result); + Result = V4MulAdd(C11, V22, Result); + + return Result; +} + +PX_FORCE_INLINE void V4Transpose(Vec4V& col0, Vec4V& col1, Vec4V& col2, Vec4V& col3) +{ + Vec4V tmp0 = _mm_unpacklo_ps(col0, col1); + Vec4V tmp2 = _mm_unpacklo_ps(col2, col3); + Vec4V tmp1 = _mm_unpackhi_ps(col0, col1); + Vec4V tmp3 = _mm_unpackhi_ps(col2, col3); + col0 = _mm_movelh_ps(tmp0, tmp2); + col1 = _mm_movehl_ps(tmp2, tmp0); + col2 = _mm_movelh_ps(tmp1, tmp3); + col3 = _mm_movehl_ps(tmp3, tmp1); +} + +////////////////////////////////// +// BoolV +////////////////////////////////// + +PX_FORCE_INLINE BoolV BFFFF() +{ + return _mm_setzero_ps(); +} + +PX_FORCE_INLINE BoolV BFFFT() +{ + /*const PX_ALIGN(16, PxU32 f[4])={0,0,0,0xFFFFFFFF}; + const __m128 ffft=_mm_load_ps((float*)&f); + return ffft;*/ + return m128_I2F(_mm_set_epi32(-1, 0, 0, 0)); +} + +PX_FORCE_INLINE BoolV BFFTF() +{ + /*const PX_ALIGN(16, PxU32 f[4])={0,0,0xFFFFFFFF,0}; + const __m128 fftf=_mm_load_ps((float*)&f); + return fftf;*/ + return m128_I2F(_mm_set_epi32(0, -1, 0, 0)); +} + +PX_FORCE_INLINE BoolV BFFTT() +{ + /*const PX_ALIGN(16, PxU32 f[4])={0,0,0xFFFFFFFF,0xFFFFFFFF}; + const __m128 fftt=_mm_load_ps((float*)&f); + return fftt;*/ + return m128_I2F(_mm_set_epi32(-1, -1, 0, 0)); +} + +PX_FORCE_INLINE BoolV BFTFF() +{ + /*const PX_ALIGN(16, PxU32 f[4])={0,0xFFFFFFFF,0,0}; + const __m128 ftff=_mm_load_ps((float*)&f); + return ftff;*/ + return m128_I2F(_mm_set_epi32(0, 0, -1, 0)); +} + +PX_FORCE_INLINE BoolV BFTFT() +{ + /*const PX_ALIGN(16, PxU32 f[4])={0,0xFFFFFFFF,0,0xFFFFFFFF}; + const __m128 ftft=_mm_load_ps((float*)&f); + return ftft;*/ + return m128_I2F(_mm_set_epi32(-1, 0, -1, 0)); +} + +PX_FORCE_INLINE BoolV BFTTF() +{ + /*const PX_ALIGN(16, PxU32 f[4])={0,0xFFFFFFFF,0xFFFFFFFF,0}; + const __m128 fttf=_mm_load_ps((float*)&f); + return fttf;*/ + return m128_I2F(_mm_set_epi32(0, -1, -1, 0)); +} + +PX_FORCE_INLINE BoolV BFTTT() +{ + /*const PX_ALIGN(16, PxU32 f[4])={0,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF}; + const __m128 fttt=_mm_load_ps((float*)&f); + return fttt;*/ + return m128_I2F(_mm_set_epi32(-1, -1, -1, 0)); +} + +PX_FORCE_INLINE BoolV BTFFF() +{ + // const PX_ALIGN(16, PxU32 f[4])={0xFFFFFFFF,0,0,0}; + // const __m128 tfff=_mm_load_ps((float*)&f); + // return tfff; + return m128_I2F(_mm_set_epi32(0, 0, 0, -1)); +} + +PX_FORCE_INLINE BoolV BTFFT() +{ + /*const PX_ALIGN(16, PxU32 f[4])={0xFFFFFFFF,0,0,0xFFFFFFFF}; + const __m128 tfft=_mm_load_ps((float*)&f); + return tfft;*/ + return m128_I2F(_mm_set_epi32(-1, 0, 0, -1)); +} + +PX_FORCE_INLINE BoolV BTFTF() +{ + /*const PX_ALIGN(16, PxU32 f[4])={0xFFFFFFFF,0,0xFFFFFFFF,0}; + const __m128 tftf=_mm_load_ps((float*)&f); + return tftf;*/ + return m128_I2F(_mm_set_epi32(0, -1, 0, -1)); +} + +PX_FORCE_INLINE BoolV BTFTT() +{ + /*const PX_ALIGN(16, PxU32 f[4])={0xFFFFFFFF,0,0xFFFFFFFF,0xFFFFFFFF}; + const __m128 tftt=_mm_load_ps((float*)&f); + return tftt;*/ + return m128_I2F(_mm_set_epi32(-1, -1, 0, -1)); +} + +PX_FORCE_INLINE BoolV BTTFF() +{ + /*const PX_ALIGN(16, PxU32 f[4])={0xFFFFFFFF,0xFFFFFFFF,0,0}; + const __m128 ttff=_mm_load_ps((float*)&f); + return ttff;*/ + return m128_I2F(_mm_set_epi32(0, 0, -1, -1)); +} + +PX_FORCE_INLINE BoolV BTTFT() +{ + /*const PX_ALIGN(16, PxU32 f[4])={0xFFFFFFFF,0xFFFFFFFF,0,0xFFFFFFFF}; + const __m128 ttft=_mm_load_ps((float*)&f); + return ttft;*/ + return m128_I2F(_mm_set_epi32(-1, 0, -1, -1)); +} + +PX_FORCE_INLINE BoolV BTTTF() +{ + /*const PX_ALIGN(16, PxU32 f[4])={0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0}; + const __m128 tttf=_mm_load_ps((float*)&f); + return tttf;*/ + return m128_I2F(_mm_set_epi32(0, -1, -1, -1)); +} + +PX_FORCE_INLINE BoolV BTTTT() +{ + /*const PX_ALIGN(16, PxU32 f[4])={0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF}; + const __m128 tttt=_mm_load_ps((float*)&f); + return tttt;*/ + return m128_I2F(_mm_set_epi32(-1, -1, -1, -1)); +} + +PX_FORCE_INLINE BoolV BXMask() +{ + /*const PX_ALIGN(16, PxU32 f[4])={0xFFFFFFFF,0,0,0}; + const __m128 tfff=_mm_load_ps((float*)&f); + return tfff;*/ + return m128_I2F(_mm_set_epi32(0, 0, 0, -1)); +} + +PX_FORCE_INLINE BoolV BYMask() +{ + /*const PX_ALIGN(16, PxU32 f[4])={0,0xFFFFFFFF,0,0}; + const __m128 ftff=_mm_load_ps((float*)&f); + return ftff;*/ + return m128_I2F(_mm_set_epi32(0, 0, -1, 0)); +} + +PX_FORCE_INLINE BoolV BZMask() +{ + /*const PX_ALIGN(16, PxU32 f[4])={0,0,0xFFFFFFFF,0}; + const __m128 fftf=_mm_load_ps((float*)&f); + return fftf;*/ + return m128_I2F(_mm_set_epi32(0, -1, 0, 0)); +} + +PX_FORCE_INLINE BoolV BWMask() +{ + /*const PX_ALIGN(16, PxU32 f[4])={0,0,0,0xFFFFFFFF}; + const __m128 ffft=_mm_load_ps((float*)&f); + return ffft;*/ + return m128_I2F(_mm_set_epi32(-1, 0, 0, 0)); +} + +PX_FORCE_INLINE BoolV BGetX(const BoolV f) +{ + return _mm_shuffle_ps(f, f, _MM_SHUFFLE(0, 0, 0, 0)); +} + +PX_FORCE_INLINE BoolV BGetY(const BoolV f) +{ + return _mm_shuffle_ps(f, f, _MM_SHUFFLE(1, 1, 1, 1)); +} + +PX_FORCE_INLINE BoolV BGetZ(const BoolV f) +{ + return _mm_shuffle_ps(f, f, _MM_SHUFFLE(2, 2, 2, 2)); +} + +PX_FORCE_INLINE BoolV BGetW(const BoolV f) +{ + return _mm_shuffle_ps(f, f, _MM_SHUFFLE(3, 3, 3, 3)); +} + +PX_FORCE_INLINE BoolV BSetX(const BoolV v, const BoolV f) +{ + return V4Sel(BFTTT(), v, f); +} + +PX_FORCE_INLINE BoolV BSetY(const BoolV v, const BoolV f) +{ + return V4Sel(BTFTT(), v, f); +} + +PX_FORCE_INLINE BoolV BSetZ(const BoolV v, const BoolV f) +{ + return V4Sel(BTTFT(), v, f); +} + +PX_FORCE_INLINE BoolV BSetW(const BoolV v, const BoolV f) +{ + return V4Sel(BTTTF(), v, f); +} + +PX_FORCE_INLINE BoolV BAnd(const BoolV a, const BoolV b) +{ + return _mm_and_ps(a, b); +} + +PX_FORCE_INLINE BoolV BNot(const BoolV a) +{ + const BoolV bAllTrue(BTTTT()); + return _mm_xor_ps(a, bAllTrue); +} + +PX_FORCE_INLINE BoolV BAndNot(const BoolV a, const BoolV b) +{ + return _mm_andnot_ps(b, a); +} + +PX_FORCE_INLINE BoolV BOr(const BoolV a, const BoolV b) +{ + return _mm_or_ps(a, b); +} + +PX_FORCE_INLINE BoolV BAllTrue4(const BoolV a) +{ + const BoolV bTmp = + _mm_and_ps(_mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 1, 0, 1)), _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 3, 2, 3))); + return _mm_and_ps(_mm_shuffle_ps(bTmp, bTmp, _MM_SHUFFLE(0, 0, 0, 0)), + _mm_shuffle_ps(bTmp, bTmp, _MM_SHUFFLE(1, 1, 1, 1))); +} + +PX_FORCE_INLINE BoolV BAnyTrue4(const BoolV a) +{ + const BoolV bTmp = + _mm_or_ps(_mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 1, 0, 1)), _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 3, 2, 3))); + return _mm_or_ps(_mm_shuffle_ps(bTmp, bTmp, _MM_SHUFFLE(0, 0, 0, 0)), + _mm_shuffle_ps(bTmp, bTmp, _MM_SHUFFLE(1, 1, 1, 1))); +} + +PX_FORCE_INLINE BoolV BAllTrue3(const BoolV a) +{ + const BoolV bTmp = + _mm_and_ps(_mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 1, 0, 1)), _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 2, 2, 2))); + return _mm_and_ps(_mm_shuffle_ps(bTmp, bTmp, _MM_SHUFFLE(0, 0, 0, 0)), + _mm_shuffle_ps(bTmp, bTmp, _MM_SHUFFLE(1, 1, 1, 1))); +} + +PX_FORCE_INLINE BoolV BAnyTrue3(const BoolV a) +{ + const BoolV bTmp = + _mm_or_ps(_mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 1, 0, 1)), _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 2, 2, 2))); + return _mm_or_ps(_mm_shuffle_ps(bTmp, bTmp, _MM_SHUFFLE(0, 0, 0, 0)), + _mm_shuffle_ps(bTmp, bTmp, _MM_SHUFFLE(1, 1, 1, 1))); +} + +PX_FORCE_INLINE PxU32 BAllEq(const BoolV a, const BoolV b) +{ + const BoolV bTest = m128_I2F(_mm_cmpeq_epi32(m128_F2I(a), m128_F2I(b))); + return internalUnitSSE2Simd::BAllTrue4_R(bTest); +} + +PX_FORCE_INLINE PxU32 BAllEqTTTT(const BoolV a) +{ + return PxU32(_mm_movemask_ps(a)==15); +} + +PX_FORCE_INLINE PxU32 BAllEqFFFF(const BoolV a) +{ + return PxU32(_mm_movemask_ps(a)==0); +} + +PX_FORCE_INLINE PxU32 BGetBitMask(const BoolV a) +{ + return PxU32(_mm_movemask_ps(a)); +} + +////////////////////////////////// +// MAT33V +////////////////////////////////// + +PX_FORCE_INLINE Vec3V M33MulV3(const Mat33V& a, const Vec3V b) +{ + const FloatV x = V3GetX(b); + const FloatV y = V3GetY(b); + const FloatV z = V3GetZ(b); + const Vec3V v0 = V3Scale(a.col0, x); + const Vec3V v1 = V3Scale(a.col1, y); + const Vec3V v2 = V3Scale(a.col2, z); + const Vec3V v0PlusV1 = V3Add(v0, v1); + return V3Add(v0PlusV1, v2); +} + +PX_FORCE_INLINE Vec3V M33TrnspsMulV3(const Mat33V& a, const Vec3V b) +{ + const FloatV x = V3Dot(a.col0, b); + const FloatV y = V3Dot(a.col1, b); + const FloatV z = V3Dot(a.col2, b); + return V3Merge(x, y, z); +} + +PX_FORCE_INLINE Vec3V M33MulV3AddV3(const Mat33V& A, const Vec3V b, const Vec3V c) +{ + const FloatV x = V3GetX(b); + const FloatV y = V3GetY(b); + const FloatV z = V3GetZ(b); + Vec3V result = V3ScaleAdd(A.col0, x, c); + result = V3ScaleAdd(A.col1, y, result); + return V3ScaleAdd(A.col2, z, result); +} + +PX_FORCE_INLINE Mat33V M33MulM33(const Mat33V& a, const Mat33V& b) +{ + return Mat33V(M33MulV3(a, b.col0), M33MulV3(a, b.col1), M33MulV3(a, b.col2)); +} + +PX_FORCE_INLINE Mat33V M33Add(const Mat33V& a, const Mat33V& b) +{ + return Mat33V(V3Add(a.col0, b.col0), V3Add(a.col1, b.col1), V3Add(a.col2, b.col2)); +} + +PX_FORCE_INLINE Mat33V M33Scale(const Mat33V& a, const FloatV& b) +{ + return Mat33V(V3Scale(a.col0, b), V3Scale(a.col1, b), V3Scale(a.col2, b)); +} + +PX_FORCE_INLINE Mat33V M33Inverse(const Mat33V& a) +{ + const BoolV tfft = BTFFT(); + const BoolV tttf = BTTTF(); + const FloatV zero = FZero(); + const Vec3V cross01 = V3Cross(a.col0, a.col1); + const Vec3V cross12 = V3Cross(a.col1, a.col2); + const Vec3V cross20 = V3Cross(a.col2, a.col0); + const FloatV dot = V3Dot(cross01, a.col2); + const FloatV invDet = _mm_rcp_ps(dot); + const Vec3V mergeh = _mm_unpacklo_ps(cross12, cross01); + const Vec3V mergel = _mm_unpackhi_ps(cross12, cross01); + Vec3V colInv0 = _mm_unpacklo_ps(mergeh, cross20); + colInv0 = _mm_or_ps(_mm_andnot_ps(tttf, zero), _mm_and_ps(tttf, colInv0)); + const Vec3V zppd = _mm_shuffle_ps(mergeh, cross20, _MM_SHUFFLE(3, 0, 0, 2)); + const Vec3V pbwp = _mm_shuffle_ps(cross20, mergeh, _MM_SHUFFLE(3, 3, 1, 0)); + const Vec3V colInv1 = _mm_or_ps(_mm_andnot_ps(BTFFT(), pbwp), _mm_and_ps(BTFFT(), zppd)); + const Vec3V xppd = _mm_shuffle_ps(mergel, cross20, _MM_SHUFFLE(3, 0, 0, 0)); + const Vec3V pcyp = _mm_shuffle_ps(cross20, mergel, _MM_SHUFFLE(3, 1, 2, 0)); + const Vec3V colInv2 = _mm_or_ps(_mm_andnot_ps(tfft, pcyp), _mm_and_ps(tfft, xppd)); + + return Mat33V(_mm_mul_ps(colInv0, invDet), _mm_mul_ps(colInv1, invDet), _mm_mul_ps(colInv2, invDet)); +} + +PX_FORCE_INLINE Mat33V M33Trnsps(const Mat33V& a) +{ + return Mat33V(V3Merge(V3GetX(a.col0), V3GetX(a.col1), V3GetX(a.col2)), + V3Merge(V3GetY(a.col0), V3GetY(a.col1), V3GetY(a.col2)), + V3Merge(V3GetZ(a.col0), V3GetZ(a.col1), V3GetZ(a.col2))); +} + +PX_FORCE_INLINE Mat33V M33Identity() +{ + return Mat33V(V3UnitX(), V3UnitY(), V3UnitZ()); +} + +PX_FORCE_INLINE Mat33V M33Sub(const Mat33V& a, const Mat33V& b) +{ + return Mat33V(V3Sub(a.col0, b.col0), V3Sub(a.col1, b.col1), V3Sub(a.col2, b.col2)); +} + +PX_FORCE_INLINE Mat33V M33Neg(const Mat33V& a) +{ + return Mat33V(V3Neg(a.col0), V3Neg(a.col1), V3Neg(a.col2)); +} + +PX_FORCE_INLINE Mat33V M33Abs(const Mat33V& a) +{ + return Mat33V(V3Abs(a.col0), V3Abs(a.col1), V3Abs(a.col2)); +} + +PX_FORCE_INLINE Mat33V PromoteVec3V(const Vec3V v) +{ + const BoolV bTFFF = BTFFF(); + const BoolV bFTFF = BFTFF(); + const BoolV bFFTF = BTFTF(); + + const Vec3V zero = V3Zero(); + + return Mat33V(V3Sel(bTFFF, v, zero), V3Sel(bFTFF, v, zero), V3Sel(bFFTF, v, zero)); +} + +PX_FORCE_INLINE Mat33V M33Diagonal(const Vec3VArg d) +{ + const FloatV x = V3Mul(V3UnitX(), d); + const FloatV y = V3Mul(V3UnitY(), d); + const FloatV z = V3Mul(V3UnitZ(), d); + return Mat33V(x, y, z); +} + +////////////////////////////////// +// MAT34V +////////////////////////////////// + +PX_FORCE_INLINE Vec3V M34MulV3(const Mat34V& a, const Vec3V b) +{ + const FloatV x = V3GetX(b); + const FloatV y = V3GetY(b); + const FloatV z = V3GetZ(b); + const Vec3V v0 = V3Scale(a.col0, x); + const Vec3V v1 = V3Scale(a.col1, y); + const Vec3V v2 = V3Scale(a.col2, z); + const Vec3V v0PlusV1 = V3Add(v0, v1); + const Vec3V v0PlusV1Plusv2 = V3Add(v0PlusV1, v2); + return V3Add(v0PlusV1Plusv2, a.col3); +} + +PX_FORCE_INLINE Vec3V M34Mul33V3(const Mat34V& a, const Vec3V b) +{ + const FloatV x = V3GetX(b); + const FloatV y = V3GetY(b); + const FloatV z = V3GetZ(b); + const Vec3V v0 = V3Scale(a.col0, x); + const Vec3V v1 = V3Scale(a.col1, y); + const Vec3V v2 = V3Scale(a.col2, z); + const Vec3V v0PlusV1 = V3Add(v0, v1); + return V3Add(v0PlusV1, v2); +} + +PX_FORCE_INLINE Vec3V M34TrnspsMul33V3(const Mat34V& a, const Vec3V b) +{ + const FloatV x = V3Dot(a.col0, b); + const FloatV y = V3Dot(a.col1, b); + const FloatV z = V3Dot(a.col2, b); + return V3Merge(x, y, z); +} + +PX_FORCE_INLINE Mat34V M34MulM34(const Mat34V& a, const Mat34V& b) +{ + return Mat34V(M34Mul33V3(a, b.col0), M34Mul33V3(a, b.col1), M34Mul33V3(a, b.col2), M34MulV3(a, b.col3)); +} + +PX_FORCE_INLINE Mat33V M34MulM33(const Mat34V& a, const Mat33V& b) +{ + return Mat33V(M34Mul33V3(a, b.col0), M34Mul33V3(a, b.col1), M34Mul33V3(a, b.col2)); +} + +PX_FORCE_INLINE Mat33V M34Mul33MM34(const Mat34V& a, const Mat34V& b) +{ + return Mat33V(M34Mul33V3(a, b.col0), M34Mul33V3(a, b.col1), M34Mul33V3(a, b.col2)); +} + +PX_FORCE_INLINE Mat34V M34Add(const Mat34V& a, const Mat34V& b) +{ + return Mat34V(V3Add(a.col0, b.col0), V3Add(a.col1, b.col1), V3Add(a.col2, b.col2), V3Add(a.col3, b.col3)); +} + +PX_FORCE_INLINE Mat33V M34Trnsps33(const Mat34V& a) +{ + return Mat33V(V3Merge(V3GetX(a.col0), V3GetX(a.col1), V3GetX(a.col2)), + V3Merge(V3GetY(a.col0), V3GetY(a.col1), V3GetY(a.col2)), + V3Merge(V3GetZ(a.col0), V3GetZ(a.col1), V3GetZ(a.col2))); +} + +////////////////////////////////// +// MAT44V +////////////////////////////////// + +PX_FORCE_INLINE Vec4V M44MulV4(const Mat44V& a, const Vec4V b) +{ + const FloatV x = V4GetX(b); + const FloatV y = V4GetY(b); + const FloatV z = V4GetZ(b); + const FloatV w = V4GetW(b); + + const Vec4V v0 = V4Scale(a.col0, x); + const Vec4V v1 = V4Scale(a.col1, y); + const Vec4V v2 = V4Scale(a.col2, z); + const Vec4V v3 = V4Scale(a.col3, w); + const Vec4V v0PlusV1 = V4Add(v0, v1); + const Vec4V v0PlusV1Plusv2 = V4Add(v0PlusV1, v2); + return V4Add(v0PlusV1Plusv2, v3); +} + +PX_FORCE_INLINE Vec4V M44TrnspsMulV4(const Mat44V& a, const Vec4V b) +{ + PX_ALIGN(16, FloatV) dotProdArray[4] = { V4Dot(a.col0, b), V4Dot(a.col1, b), V4Dot(a.col2, b), V4Dot(a.col3, b) }; + return V4Merge(dotProdArray); +} + +PX_FORCE_INLINE Mat44V M44MulM44(const Mat44V& a, const Mat44V& b) +{ + return Mat44V(M44MulV4(a, b.col0), M44MulV4(a, b.col1), M44MulV4(a, b.col2), M44MulV4(a, b.col3)); +} + +PX_FORCE_INLINE Mat44V M44Add(const Mat44V& a, const Mat44V& b) +{ + return Mat44V(V4Add(a.col0, b.col0), V4Add(a.col1, b.col1), V4Add(a.col2, b.col2), V4Add(a.col3, b.col3)); +} + +PX_FORCE_INLINE Mat44V M44Trnsps(const Mat44V& a) +{ + const Vec4V v0 = _mm_unpacklo_ps(a.col0, a.col2); + const Vec4V v1 = _mm_unpackhi_ps(a.col0, a.col2); + const Vec4V v2 = _mm_unpacklo_ps(a.col1, a.col3); + const Vec4V v3 = _mm_unpackhi_ps(a.col1, a.col3); + return Mat44V(_mm_unpacklo_ps(v0, v2), _mm_unpackhi_ps(v0, v2), _mm_unpacklo_ps(v1, v3), _mm_unpackhi_ps(v1, v3)); +} + +PX_FORCE_INLINE Mat44V M44Inverse(const Mat44V& a) +{ + __m128 minor0, minor1, minor2, minor3; + __m128 row0, row1, row2, row3; + __m128 det, tmp1; + + tmp1 = V4Zero(); + row1 = V4Zero(); + row3 = V4Zero(); + + row0 = a.col0; + row1 = _mm_shuffle_ps(a.col1, a.col1, _MM_SHUFFLE(1, 0, 3, 2)); + row2 = a.col2; + row3 = _mm_shuffle_ps(a.col3, a.col3, _MM_SHUFFLE(1, 0, 3, 2)); + + tmp1 = _mm_mul_ps(row2, row3); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1); + minor0 = _mm_mul_ps(row1, tmp1); + minor1 = _mm_mul_ps(row0, tmp1); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E); + minor0 = _mm_sub_ps(_mm_mul_ps(row1, tmp1), minor0); + minor1 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor1); + minor1 = _mm_shuffle_ps(minor1, minor1, 0x4E); + + tmp1 = _mm_mul_ps(row1, row2); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1); + minor0 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor0); + minor3 = _mm_mul_ps(row0, tmp1); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E); + minor0 = _mm_sub_ps(minor0, _mm_mul_ps(row3, tmp1)); + minor3 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor3); + minor3 = _mm_shuffle_ps(minor3, minor3, 0x4E); + + tmp1 = _mm_mul_ps(_mm_shuffle_ps(row1, row1, 0x4E), row3); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1); + row2 = _mm_shuffle_ps(row2, row2, 0x4E); + minor0 = _mm_add_ps(_mm_mul_ps(row2, tmp1), minor0); + minor2 = _mm_mul_ps(row0, tmp1); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E); + minor0 = _mm_sub_ps(minor0, _mm_mul_ps(row2, tmp1)); + minor2 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor2); + minor2 = _mm_shuffle_ps(minor2, minor2, 0x4E); + + tmp1 = _mm_mul_ps(row0, row1); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1); + minor2 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor2); + minor3 = _mm_sub_ps(_mm_mul_ps(row2, tmp1), minor3); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E); + minor2 = _mm_sub_ps(_mm_mul_ps(row3, tmp1), minor2); + minor3 = _mm_sub_ps(minor3, _mm_mul_ps(row2, tmp1)); + + tmp1 = _mm_mul_ps(row0, row3); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1); + minor1 = _mm_sub_ps(minor1, _mm_mul_ps(row2, tmp1)); + minor2 = _mm_add_ps(_mm_mul_ps(row1, tmp1), minor2); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E); + minor1 = _mm_add_ps(_mm_mul_ps(row2, tmp1), minor1); + minor2 = _mm_sub_ps(minor2, _mm_mul_ps(row1, tmp1)); + + tmp1 = _mm_mul_ps(row0, row2); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1); + minor1 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor1); + minor3 = _mm_sub_ps(minor3, _mm_mul_ps(row1, tmp1)); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E); + minor1 = _mm_sub_ps(minor1, _mm_mul_ps(row3, tmp1)); + minor3 = _mm_add_ps(_mm_mul_ps(row1, tmp1), minor3); + + det = _mm_mul_ps(row0, minor0); + det = _mm_add_ps(_mm_shuffle_ps(det, det, 0x4E), det); + det = _mm_add_ss(_mm_shuffle_ps(det, det, 0xB1), det); + tmp1 = _mm_rcp_ss(det); +#if 0 + det = _mm_sub_ss(_mm_add_ss(tmp1, tmp1), _mm_mul_ss(det, _mm_mul_ss(tmp1, tmp1))); + det = _mm_shuffle_ps(det, det, 0x00); +#else + det = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(0, 0, 0, 0)); +#endif + + minor0 = _mm_mul_ps(det, minor0); + minor1 = _mm_mul_ps(det, minor1); + minor2 = _mm_mul_ps(det, minor2); + minor3 = _mm_mul_ps(det, minor3); + Mat44V invTrans(minor0, minor1, minor2, minor3); + return M44Trnsps(invTrans); +} + +PX_FORCE_INLINE Vec4V V4LoadXYZW(const PxF32& x, const PxF32& y, const PxF32& z, const PxF32& w) +{ + return _mm_set_ps(w, z, y, x); +} + +/* +// AP: work in progress - use proper SSE intrinsics where possible +PX_FORCE_INLINE VecU16V V4U32PK(VecU32V a, VecU32V b) +{ + VecU16V result; + result.m128_u16[0] = PxU16(PxClamp<PxU32>((a).m128_u32[0], 0, 0xFFFF)); + result.m128_u16[1] = PxU16(PxClamp<PxU32>((a).m128_u32[1], 0, 0xFFFF)); + result.m128_u16[2] = PxU16(PxClamp<PxU32>((a).m128_u32[2], 0, 0xFFFF)); + result.m128_u16[3] = PxU16(PxClamp<PxU32>((a).m128_u32[3], 0, 0xFFFF)); + result.m128_u16[4] = PxU16(PxClamp<PxU32>((b).m128_u32[0], 0, 0xFFFF)); + result.m128_u16[5] = PxU16(PxClamp<PxU32>((b).m128_u32[1], 0, 0xFFFF)); + result.m128_u16[6] = PxU16(PxClamp<PxU32>((b).m128_u32[2], 0, 0xFFFF)); + result.m128_u16[7] = PxU16(PxClamp<PxU32>((b).m128_u32[3], 0, 0xFFFF)); + return result; +} +*/ + +PX_FORCE_INLINE VecU32V V4U32Sel(const BoolV c, const VecU32V a, const VecU32V b) +{ + return m128_I2F(_mm_or_si128(_mm_andnot_si128(m128_F2I(c), m128_F2I(b)), _mm_and_si128(m128_F2I(c), m128_F2I(a)))); +} + +PX_FORCE_INLINE VecU32V V4U32or(VecU32V a, VecU32V b) +{ + return m128_I2F(_mm_or_si128(m128_F2I(a), m128_F2I(b))); +} + +PX_FORCE_INLINE VecU32V V4U32xor(VecU32V a, VecU32V b) +{ + return m128_I2F(_mm_xor_si128(m128_F2I(a), m128_F2I(b))); +} + +PX_FORCE_INLINE VecU32V V4U32and(VecU32V a, VecU32V b) +{ + return m128_I2F(_mm_and_si128(m128_F2I(a), m128_F2I(b))); +} + +PX_FORCE_INLINE VecU32V V4U32Andc(VecU32V a, VecU32V b) +{ + return m128_I2F(_mm_andnot_si128(m128_F2I(b), m128_F2I(a))); +} + +/* +PX_FORCE_INLINE VecU16V V4U16Or(VecU16V a, VecU16V b) +{ + return m128_I2F(_mm_or_si128(m128_F2I(a), m128_F2I(b))); +} +*/ + +/* +PX_FORCE_INLINE VecU16V V4U16And(VecU16V a, VecU16V b) +{ + return m128_I2F(_mm_and_si128(m128_F2I(a), m128_F2I(b))); +} +*/ + +/* +PX_FORCE_INLINE VecU16V V4U16Andc(VecU16V a, VecU16V b) +{ + return m128_I2F(_mm_andnot_si128(m128_F2I(b), m128_F2I(a))); +} +*/ + +PX_FORCE_INLINE VecI32V I4Load(const PxI32 i) +{ + return m128_F2I(_mm_load1_ps(reinterpret_cast<const PxF32*>(&i))); +} + +PX_FORCE_INLINE VecI32V I4LoadU(const PxI32* i) +{ + return m128_F2I(_mm_loadu_ps(reinterpret_cast<const PxF32*>(i))); +} + +PX_FORCE_INLINE VecI32V I4LoadA(const PxI32* i) +{ + return m128_F2I(_mm_load_ps(reinterpret_cast<const PxF32*>(i))); +} + +PX_FORCE_INLINE VecI32V VecI32V_Add(const VecI32VArg a, const VecI32VArg b) +{ + return _mm_add_epi32(a, b); +} + +PX_FORCE_INLINE VecI32V VecI32V_Sub(const VecI32VArg a, const VecI32VArg b) +{ + return _mm_sub_epi32(a, b); +} + +PX_FORCE_INLINE BoolV VecI32V_IsGrtr(const VecI32VArg a, const VecI32VArg b) +{ + return m128_I2F(_mm_cmpgt_epi32(a, b)); +} + +PX_FORCE_INLINE BoolV VecI32V_IsEq(const VecI32VArg a, const VecI32VArg b) +{ + return m128_I2F(_mm_cmpeq_epi32(a, b)); +} + +PX_FORCE_INLINE VecI32V V4I32Sel(const BoolV c, const VecI32V a, const VecI32V b) +{ + return _mm_or_si128(_mm_andnot_si128(m128_F2I(c), b), _mm_and_si128(m128_F2I(c), a)); +} + +PX_FORCE_INLINE VecI32V VecI32V_Zero() +{ + return _mm_setzero_si128(); +} + +PX_FORCE_INLINE VecI32V VecI32V_One() +{ + return I4Load(1); +} + +PX_FORCE_INLINE VecI32V VecI32V_Two() +{ + return I4Load(2); +} + +PX_FORCE_INLINE VecI32V VecI32V_MinusOne() +{ + return I4Load(-1); +} + +PX_FORCE_INLINE VecU32V U4Zero() +{ + return U4Load(0); +} + +PX_FORCE_INLINE VecU32V U4One() +{ + return U4Load(1); +} + +PX_FORCE_INLINE VecU32V U4Two() +{ + return U4Load(2); +} + +PX_FORCE_INLINE VecI32V VecI32V_Sel(const BoolV c, const VecI32VArg a, const VecI32VArg b) +{ + return _mm_or_si128(_mm_andnot_si128(m128_F2I(c), b), _mm_and_si128(m128_F2I(c), a)); +} + +PX_FORCE_INLINE VecShiftV VecI32V_PrepareShift(const VecI32VArg shift) +{ + VecShiftV s; + s.shift = VecI32V_Sel(BTFFF(), shift, VecI32V_Zero()); + return s; +} + +PX_FORCE_INLINE VecI32V VecI32V_LeftShift(const VecI32VArg a, const VecShiftVArg count) +{ + return _mm_sll_epi32(a, count.shift); +} + +PX_FORCE_INLINE VecI32V VecI32V_RightShift(const VecI32VArg a, const VecShiftVArg count) +{ + return _mm_srl_epi32(a, count.shift); +} + +PX_FORCE_INLINE VecI32V VecI32V_And(const VecI32VArg a, const VecI32VArg b) +{ + return _mm_and_si128(a, b); +} + +PX_FORCE_INLINE VecI32V VecI32V_Or(const VecI32VArg a, const VecI32VArg b) +{ + return _mm_or_si128(a, b); +} + +PX_FORCE_INLINE VecI32V VecI32V_GetX(const VecI32VArg a) +{ + return m128_F2I(_mm_shuffle_ps(m128_I2F(a), m128_I2F(a), _MM_SHUFFLE(0, 0, 0, 0))); +} + +PX_FORCE_INLINE VecI32V VecI32V_GetY(const VecI32VArg a) +{ + return m128_F2I(_mm_shuffle_ps(m128_I2F(a), m128_I2F(a), _MM_SHUFFLE(1, 1, 1, 1))); +} + +PX_FORCE_INLINE VecI32V VecI32V_GetZ(const VecI32VArg a) +{ + return m128_F2I(_mm_shuffle_ps(m128_I2F(a), m128_I2F(a), _MM_SHUFFLE(2, 2, 2, 2))); +} + +PX_FORCE_INLINE VecI32V VecI32V_GetW(const VecI32VArg a) +{ + return m128_F2I(_mm_shuffle_ps(m128_I2F(a), m128_I2F(a), _MM_SHUFFLE(3, 3, 3, 3))); +} + +PX_FORCE_INLINE void PxI32_From_VecI32V(const VecI32VArg a, PxI32* i) +{ + _mm_store_ss(reinterpret_cast<PxF32*>(i), m128_I2F(a)); +} + +PX_FORCE_INLINE VecI32V VecI32V_Merge(const VecI32VArg x, const VecI32VArg y, const VecI32VArg z, const VecI32VArg w) +{ + const __m128 xw = _mm_move_ss(m128_I2F(y), m128_I2F(x)); // y, y, y, x + const __m128 yz = _mm_move_ss(m128_I2F(z), m128_I2F(w)); // z, z, z, w + return m128_F2I(_mm_shuffle_ps(xw, yz, _MM_SHUFFLE(0, 2, 1, 0))); +} + +PX_FORCE_INLINE VecI32V VecI32V_From_BoolV(const BoolVArg a) +{ + return m128_F2I(a); +} + +PX_FORCE_INLINE VecU32V VecU32V_From_BoolV(const BoolVArg a) +{ + return a; +} + +/* +template<int a> PX_FORCE_INLINE VecI32V V4ISplat() +{ + VecI32V result; + result.m128_i32[0] = a; + result.m128_i32[1] = a; + result.m128_i32[2] = a; + result.m128_i32[3] = a; + return result; +} + +template<PxU32 a> PX_FORCE_INLINE VecU32V V4USplat() +{ + VecU32V result; + result.m128_u32[0] = a; + result.m128_u32[1] = a; + result.m128_u32[2] = a; + result.m128_u32[3] = a; + return result; +} +*/ + +/* +PX_FORCE_INLINE void V4U16StoreAligned(VecU16V val, VecU16V* address) +{ + *address = val; +} +*/ + +PX_FORCE_INLINE void V4U32StoreAligned(VecU32V val, VecU32V* address) +{ + *address = val; +} + +PX_FORCE_INLINE Vec4V V4LoadAligned(Vec4V* addr) +{ + return *addr; +} + +PX_FORCE_INLINE Vec4V V4LoadUnaligned(Vec4V* addr) +{ + return V4LoadU(reinterpret_cast<float*>(addr)); +} + +PX_FORCE_INLINE Vec4V V4Andc(const Vec4V a, const VecU32V b) +{ + VecU32V result32(a); + result32 = V4U32Andc(result32, b); + return Vec4V(result32); +} + +PX_FORCE_INLINE VecU32V V4IsGrtrV32u(const Vec4V a, const Vec4V b) +{ + return V4IsGrtr(a, b); +} + +PX_FORCE_INLINE VecU16V V4U16LoadAligned(VecU16V* addr) +{ + return *addr; +} + +PX_FORCE_INLINE VecU16V V4U16LoadUnaligned(VecU16V* addr) +{ + return *addr; +} + +PX_FORCE_INLINE VecU16V V4U16CompareGt(VecU16V a, VecU16V b) +{ + // _mm_cmpgt_epi16 doesn't work for unsigned values unfortunately + // return m128_I2F(_mm_cmpgt_epi16(m128_F2I(a), m128_F2I(b))); + VecU16V result; + result.m128_u16[0] = (a).m128_u16[0] > (b).m128_u16[0]; + result.m128_u16[1] = (a).m128_u16[1] > (b).m128_u16[1]; + result.m128_u16[2] = (a).m128_u16[2] > (b).m128_u16[2]; + result.m128_u16[3] = (a).m128_u16[3] > (b).m128_u16[3]; + result.m128_u16[4] = (a).m128_u16[4] > (b).m128_u16[4]; + result.m128_u16[5] = (a).m128_u16[5] > (b).m128_u16[5]; + result.m128_u16[6] = (a).m128_u16[6] > (b).m128_u16[6]; + result.m128_u16[7] = (a).m128_u16[7] > (b).m128_u16[7]; + return result; +} + +PX_FORCE_INLINE VecU16V V4I16CompareGt(VecU16V a, VecU16V b) +{ + return m128_I2F(_mm_cmpgt_epi16(m128_F2I(a), m128_F2I(b))); +} + +PX_FORCE_INLINE Vec4V Vec4V_From_VecU32V(VecU32V a) +{ + Vec4V result = V4LoadXYZW(PxF32(a.m128_u32[0]), PxF32(a.m128_u32[1]), PxF32(a.m128_u32[2]), PxF32(a.m128_u32[3])); + return result; +} + +PX_FORCE_INLINE Vec4V Vec4V_From_VecI32V(VecI32V in) +{ + return _mm_cvtepi32_ps(in); +} + +PX_FORCE_INLINE VecI32V VecI32V_From_Vec4V(Vec4V a) +{ + return _mm_cvttps_epi32(a); +} + +PX_FORCE_INLINE Vec4V Vec4V_ReinterpretFrom_VecU32V(VecU32V a) +{ + return Vec4V(a); +} + +PX_FORCE_INLINE Vec4V Vec4V_ReinterpretFrom_VecI32V(VecI32V a) +{ + return m128_I2F(a); +} + +PX_FORCE_INLINE VecU32V VecU32V_ReinterpretFrom_Vec4V(Vec4V a) +{ + return VecU32V(a); +} + +PX_FORCE_INLINE VecI32V VecI32V_ReinterpretFrom_Vec4V(Vec4V a) +{ + return m128_F2I(a); +} + +/* +template<int index> PX_FORCE_INLINE BoolV BSplatElement(BoolV a) +{ + BoolV result; + result[0] = result[1] = result[2] = result[3] = a[index]; + return result; +} +*/ + +template <int index> +BoolV BSplatElement(BoolV a) +{ + float* data = reinterpret_cast<float*>(&a); + return V4Load(data[index]); +} + +template <int index> +PX_FORCE_INLINE VecU32V V4U32SplatElement(VecU32V a) +{ + VecU32V result; + result.m128_u32[0] = result.m128_u32[1] = result.m128_u32[2] = result.m128_u32[3] = a.m128_u32[index]; + return result; +} + +template <int index> +PX_FORCE_INLINE Vec4V V4SplatElement(Vec4V a) +{ + float* data = reinterpret_cast<float*>(&a); + return V4Load(data[index]); +} + +PX_FORCE_INLINE VecU32V U4LoadXYZW(PxU32 x, PxU32 y, PxU32 z, PxU32 w) +{ + VecU32V result; + result.m128_u32[0] = x; + result.m128_u32[1] = y; + result.m128_u32[2] = z; + result.m128_u32[3] = w; + return result; +} + +PX_FORCE_INLINE Vec4V V4Ceil(const Vec4V in) +{ + UnionM128 a(in); + return V4LoadXYZW(PxCeil(a.m128_f32[0]), PxCeil(a.m128_f32[1]), PxCeil(a.m128_f32[2]), PxCeil(a.m128_f32[3])); +} + +PX_FORCE_INLINE Vec4V V4Floor(const Vec4V in) +{ + UnionM128 a(in); + return V4LoadXYZW(PxFloor(a.m128_f32[0]), PxFloor(a.m128_f32[1]), PxFloor(a.m128_f32[2]), PxFloor(a.m128_f32[3])); +} + +PX_FORCE_INLINE VecU32V V4ConvertToU32VSaturate(const Vec4V in, PxU32 power) +{ + PX_ASSERT(power == 0 && "Non-zero power not supported in convertToU32VSaturate"); + PX_UNUSED(power); // prevent warning in release builds + PxF32 ffffFFFFasFloat = PxF32(0xFFFF0000); + UnionM128 a(in); + VecU32V result; + result.m128_u32[0] = PxU32(PxClamp<PxF32>((a).m128_f32[0], 0.0f, ffffFFFFasFloat)); + result.m128_u32[1] = PxU32(PxClamp<PxF32>((a).m128_f32[1], 0.0f, ffffFFFFasFloat)); + result.m128_u32[2] = PxU32(PxClamp<PxF32>((a).m128_f32[2], 0.0f, ffffFFFFasFloat)); + result.m128_u32[3] = PxU32(PxClamp<PxF32>((a).m128_f32[3], 0.0f, ffffFFFFasFloat)); + return result; +} + +#endif // PSFOUNDATION_PSUNIXSSE2INLINEAOS_H diff --git a/PxShared/src/foundation/include/windows/PsWindowsAoS.h b/PxShared/src/foundation/include/windows/PsWindowsAoS.h new file mode 100644 index 0000000..aab0712 --- /dev/null +++ b/PxShared/src/foundation/include/windows/PsWindowsAoS.h @@ -0,0 +1,131 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSWINDOWSAOS_H +#define PSFOUNDATION_PSWINDOWSAOS_H + +// no includes here! this file should be included from PxcVecMath.h only!!! + +#if !COMPILE_VECTOR_INTRINSICS +#error Vector intrinsics should not be included when using scalar implementation. +#endif + +typedef __m128 FloatV; +typedef __m128 Vec3V; +typedef __m128 Vec4V; +typedef __m128 BoolV; +typedef __m128 VecU32V; +typedef __m128 VecI32V; +typedef __m128 VecU16V; +typedef __m128 VecI16V; +typedef __m128 QuatV; + +#define FloatVArg FloatV & +#define Vec3VArg Vec3V & +#define Vec4VArg Vec4V & +#define BoolVArg BoolV & +#define VecU32VArg VecU32V & +#define VecI32VArg VecI32V & +#define VecU16VArg VecU16V & +#define VecI16VArg VecI16V & +#define QuatVArg QuatV & + +// Optimization for situations in which you cross product multiple vectors with the same vector. +// Avoids 2X shuffles per product +struct VecCrossV +{ + Vec3V mL1; + Vec3V mR1; +}; + +struct VecShiftV +{ + VecI32V shift; +}; +#define VecShiftVArg VecShiftV & + +PX_ALIGN_PREFIX(16) +struct Mat33V +{ + Mat33V() + { + } + Mat33V(const Vec3V& c0, const Vec3V& c1, const Vec3V& c2) : col0(c0), col1(c1), col2(c2) + { + } + Vec3V PX_ALIGN(16, col0); + Vec3V PX_ALIGN(16, col1); + Vec3V PX_ALIGN(16, col2); +} PX_ALIGN_SUFFIX(16); + +PX_ALIGN_PREFIX(16) +struct Mat34V +{ + Mat34V() + { + } + Mat34V(const Vec3V& c0, const Vec3V& c1, const Vec3V& c2, const Vec3V& c3) : col0(c0), col1(c1), col2(c2), col3(c3) + { + } + Vec3V PX_ALIGN(16, col0); + Vec3V PX_ALIGN(16, col1); + Vec3V PX_ALIGN(16, col2); + Vec3V PX_ALIGN(16, col3); +} PX_ALIGN_SUFFIX(16); + +PX_ALIGN_PREFIX(16) +struct Mat43V +{ + Mat43V() + { + } + Mat43V(const Vec4V& c0, const Vec4V& c1, const Vec4V& c2) : col0(c0), col1(c1), col2(c2) + { + } + Vec4V PX_ALIGN(16, col0); + Vec4V PX_ALIGN(16, col1); + Vec4V PX_ALIGN(16, col2); +} PX_ALIGN_SUFFIX(16); + +PX_ALIGN_PREFIX(16) +struct Mat44V +{ + Mat44V() + { + } + Mat44V(const Vec4V& c0, const Vec4V& c1, const Vec4V& c2, const Vec4V& c3) : col0(c0), col1(c1), col2(c2), col3(c3) + { + } + Vec4V PX_ALIGN(16, col0); + Vec4V PX_ALIGN(16, col1); + Vec4V PX_ALIGN(16, col2); + Vec4V PX_ALIGN(16, col3); +} PX_ALIGN_SUFFIX(16); + +#endif // PSFOUNDATION_PSWINDOWSAOS_H diff --git a/PxShared/src/foundation/include/windows/PsWindowsFPU.h b/PxShared/src/foundation/include/windows/PsWindowsFPU.h new file mode 100644 index 0000000..d85e531 --- /dev/null +++ b/PxShared/src/foundation/include/windows/PsWindowsFPU.h @@ -0,0 +1,51 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSWINDOWSFPU_H +#define PSFOUNDATION_PSWINDOWSFPU_H + +PX_INLINE physx::shdfnd::SIMDGuard::SIMDGuard() +{ +#if !PX_ARM + mControlWord = _mm_getcsr(); + // set default (disable exceptions: _MM_MASK_MASK) and FTZ (_MM_FLUSH_ZERO_ON), DAZ (_MM_DENORMALS_ZERO_ON: (1<<6)) + _mm_setcsr(_MM_MASK_MASK | _MM_FLUSH_ZERO_ON | (1 << 6)); +#endif +} + +PX_INLINE physx::shdfnd::SIMDGuard::~SIMDGuard() +{ +#if !PX_ARM + // restore control word and clear any exception flags + // (setting exception state flags cause exceptions on the first following fp operation) + _mm_setcsr(mControlWord & ~_MM_EXCEPT_MASK); +#endif +} + +#endif // #ifndef PSFOUNDATION_PSWINDOWSFPU_H diff --git a/PxShared/src/foundation/include/windows/PsWindowsInclude.h b/PxShared/src/foundation/include/windows/PsWindowsInclude.h new file mode 100644 index 0000000..75962e1 --- /dev/null +++ b/PxShared/src/foundation/include/windows/PsWindowsInclude.h @@ -0,0 +1,96 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSWINDOWSINCLUDE_H +#define PSFOUNDATION_PSWINDOWSINCLUDE_H + +#include "Ps.h" + +#ifndef _WIN32 +#error "This file should only be included by Windows builds!!" +#endif + +#ifdef _WINDOWS_ // windows already included +#error "Only include windows.h through this file!!" +#endif + +// We only support >= Windows XP, and we need this for critical section and +#define _WIN32_WINNT 0x0501 + +// turn off as much as we can for windows. All we really need is the thread functions(critical sections/Interlocked* +// etc) +#define NOGDICAPMASKS +#define NOVIRTUALKEYCODES +#define NOWINMESSAGES +#define NOWINSTYLES +#define NOSYSMETRICS +#define NOMENUS +#define NOICONS +#define NOKEYSTATES +#define NOSYSCOMMANDS +#define NORASTEROPS +#define NOSHOWWINDOW +#define NOATOM +#define NOCLIPBOARD +#define NOCOLOR +#define NOCTLMGR +#define NODRAWTEXT +#define NOGDI +#define NOMB +#define NOMEMMGR +#define NOMETAFILE +#define NOMINMAX +#define NOOPENFILE +#define NOSCROLL +#define NOSERVICE +#define NOSOUND +#define NOTEXTMETRIC +#define NOWH +#define NOWINOFFSETS +#define NOCOMM +#define NOKANJI +#define NOHELP +#define NOPROFILER +#define NODEFERWINDOWPOS +#define NOMCX +#define WIN32_LEAN_AND_MEAN +#define NOUSER +#define NONLS +#define NOMSG + +#pragma warning(push) +#pragma warning(disable : 4668) //'symbol' is not defined as a preprocessor macro, replacing with '0' for 'directives' +#include <windows.h> +#pragma warning(pop) + +#if PX_SSE2 +#include <xmmintrin.h> +#endif + +#endif // #ifndef PSFOUNDATION_PSWINDOWSINCLUDE_H diff --git a/PxShared/src/foundation/include/windows/PsWindowsInlineAoS.h b/PxShared/src/foundation/include/windows/PsWindowsInlineAoS.h new file mode 100644 index 0000000..14a311f --- /dev/null +++ b/PxShared/src/foundation/include/windows/PsWindowsInlineAoS.h @@ -0,0 +1,3119 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSWINDOWSINLINEAOS_H +#define PSFOUNDATION_PSWINDOWSINLINEAOS_H + +#if !COMPILE_VECTOR_INTRINSICS +#error Vector intrinsics should not be included when using scalar implementation. +#endif + +// Remove this define when all platforms use simd solver. +#define PX_SUPPORT_SIMD + +#include "../PsVecMathSSE.h" + +////////////////////////////////////////////////////////////////////// +//Test that Vec3V and FloatV are legal +////////////////////////////////////////////////////////////////////// + +#define FLOAT_COMPONENTS_EQUAL_THRESHOLD 0.01f +PX_FORCE_INLINE bool isValidFloatV(const FloatV a) +{ + const PxF32 x = V4ReadX(a); + const PxF32 y = V4ReadY(a); + const PxF32 z = V4ReadZ(a); + const PxF32 w = V4ReadW(a); + + if ( + (PxAbs(x - y) < FLOAT_COMPONENTS_EQUAL_THRESHOLD) && + (PxAbs(x - z) < FLOAT_COMPONENTS_EQUAL_THRESHOLD) && + (PxAbs(x - w) < FLOAT_COMPONENTS_EQUAL_THRESHOLD) + ) + { + return true; + } + + if ( + (PxAbs((x - y) / x) < FLOAT_COMPONENTS_EQUAL_THRESHOLD) && + (PxAbs((x - z) / x) < FLOAT_COMPONENTS_EQUAL_THRESHOLD) && + (PxAbs((x - w) / x) < FLOAT_COMPONENTS_EQUAL_THRESHOLD) + ) + { + return true; + } + return false; +} + +PX_FORCE_INLINE bool isValidVec3V(const Vec3V a) +{ + //using _mm_comieq_ss to do the comparison doesn't work for NaN. + PX_ALIGN(16, PxF32 f[4]); + V4StoreA((const Vec4V&)a, f); + return f[3] == 0.0f; +} + +PX_FORCE_INLINE bool isFiniteLength(const Vec3V a) +{ + return !FAllEq(V4LengthSq(a), FZero()); +} + +PX_FORCE_INLINE bool isAligned16(void* a) +{ + return(0 == ((size_t)a & 0x0f)); +} + +//ASSERT_FINITELENGTH is deactivated because there is a lot of code that calls a simd normalisation function with zero length but then ignores the result. + +#if PX_DEBUG +#define ASSERT_ISVALIDVEC3V(a) PX_ASSERT(isValidVec3V(a)) +#define ASSERT_ISVALIDFLOATV(a) PX_ASSERT(isValidFloatV(a)) +#define ASSERT_ISALIGNED16(a) PX_ASSERT(isAligned16((void*)a)) +#define ASSERT_ISFINITELENGTH(a) //PX_ASSERT(isFiniteLength(a)) +#else +#define ASSERT_ISVALIDVEC3V(a) +#define ASSERT_ISVALIDFLOATV(a) +#define ASSERT_ISALIGNED16(a) +#define ASSERT_ISFINITELENGTH(a) +#endif +///////////////////////////////////////////////////////////////////// +////FUNCTIONS USED ONLY FOR ASSERTS IN VECTORISED IMPLEMENTATIONS +///////////////////////////////////////////////////////////////////// + +////////////////////////////////////////////////////////////////////// +// USED ONLY INTERNALLY +////////////////////////////////////////////////////////////////////// + +namespace internalWindowsSimd +{ +PX_FORCE_INLINE __m128 m128_I2F(__m128i n) +{ + return _mm_castsi128_ps(n); +} + +PX_FORCE_INLINE __m128i m128_F2I(__m128 n) +{ + return _mm_castps_si128(n); +} + +PX_FORCE_INLINE PxU32 BAllTrue4_R(const BoolV a) +{ + const PxI32 moveMask = _mm_movemask_ps(a); + return PxU32(moveMask == 0xf); +} + +PX_FORCE_INLINE PxU32 BAllTrue3_R(const BoolV a) +{ + const PxI32 moveMask = _mm_movemask_ps(a); + return PxU32((moveMask & 0x7) == 0x7); +} + +PX_FORCE_INLINE PxU32 BAnyTrue4_R(const BoolV a) +{ + const PxI32 moveMask = _mm_movemask_ps(a); + return PxU32(moveMask != 0x0); +} + +PX_FORCE_INLINE PxU32 BAnyTrue3_R(const BoolV a) +{ + const PxI32 moveMask = _mm_movemask_ps(a); + return PxU32(((moveMask & 0x7) != 0x0)); +} + +PX_FORCE_INLINE PxU32 FiniteTestEq(const Vec4V a, const Vec4V b) +{ + // This is a bit of a bodge. + //_mm_comieq_ss returns 1 if either value is nan so we need to re-cast a and b with true encoded as a non-nan + // number. + // There must be a better way of doing this in sse. + const BoolV one = FOne(); + const BoolV zero = FZero(); + const BoolV a1 = V4Sel(a, one, zero); + const BoolV b1 = V4Sel(b, one, zero); + return (PxU32( + _mm_comieq_ss(a1, b1) && + _mm_comieq_ss(_mm_shuffle_ps(a1, a1, _MM_SHUFFLE(1, 1, 1, 1)), _mm_shuffle_ps(b1, b1, _MM_SHUFFLE(1, 1, 1, 1))) && + _mm_comieq_ss(_mm_shuffle_ps(a1, a1, _MM_SHUFFLE(2, 2, 2, 2)), _mm_shuffle_ps(b1, b1, _MM_SHUFFLE(2, 2, 2, 2))) && + _mm_comieq_ss(_mm_shuffle_ps(a1, a1, _MM_SHUFFLE(3, 3, 3, 3)), _mm_shuffle_ps(b1, b1, _MM_SHUFFLE(3, 3, 3, 3))))); +} + +PX_FORCE_INLINE bool hasZeroElementinFloatV(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + return _mm_comieq_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 0, 0, 0)), FZero()) ? true : false; +} + +PX_FORCE_INLINE bool hasZeroElementInVec3V(const Vec3V a) +{ + return (_mm_comieq_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 0, 0, 0)), FZero()) || + _mm_comieq_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1)), FZero()) || + _mm_comieq_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 2, 2, 2)), FZero())); +} + +PX_FORCE_INLINE bool hasZeroElementInVec4V(const Vec4V a) +{ + return (_mm_comieq_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 0, 0, 0)), FZero()) || + _mm_comieq_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1)), FZero()) || + _mm_comieq_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 2, 2, 2)), FZero()) || + _mm_comieq_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 3, 3, 3)), FZero())); +} + +const PX_ALIGN(16, PxU32 gMaskXYZ[4]) = { 0xffffffff, 0xffffffff, 0xffffffff, 0 }; +} //internalWindowsSimd + +namespace _VecMathTests +{ +// PT: this function returns an invalid Vec3V (W!=0.0f) just for unit-testing 'isValidVec3V' +PX_FORCE_INLINE Vec3V getInvalidVec3V() +{ + const float f = 1.0f; + return _mm_load1_ps(&f); +} + +PX_FORCE_INLINE bool allElementsEqualFloatV(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return _mm_comieq_ss(a, b) != 0; +} + +PX_FORCE_INLINE bool allElementsEqualVec3V(const Vec3V a, const Vec3V b) +{ + return V3AllEq(a, b) != 0; +} + +PX_FORCE_INLINE bool allElementsEqualVec4V(const Vec4V a, const Vec4V b) +{ + return V4AllEq(a, b) != 0; +} + +PX_FORCE_INLINE bool allElementsEqualBoolV(const BoolV a, const BoolV b) +{ + return internalWindowsSimd::BAllTrue4_R(VecI32V_IsEq(a, b)) != 0; +} + +PX_FORCE_INLINE bool allElementsEqualVecU32V(const VecU32V a, const VecU32V b) +{ + return internalWindowsSimd::BAllTrue4_R(V4IsEqU32(a, b)) != 0; +} + +PX_FORCE_INLINE bool allElementsEqualVecI32V(const VecI32V a, const VecI32V b) +{ + BoolV c = internalWindowsSimd::m128_I2F( + _mm_cmpeq_epi32(internalWindowsSimd::m128_F2I(a), internalWindowsSimd::m128_F2I(b))); + return internalWindowsSimd::BAllTrue4_R(c) != 0; +} + +#define VECMATH_AOS_EPSILON (1e-3f) +static const FloatV minFError = FLoad(-VECMATH_AOS_EPSILON); +static const FloatV maxFError = FLoad(VECMATH_AOS_EPSILON); +static const Vec3V minV3Error = V3Load(-VECMATH_AOS_EPSILON); +static const Vec3V maxV3Error = V3Load(VECMATH_AOS_EPSILON); +static const Vec4V minV4Error = V4Load(-VECMATH_AOS_EPSILON); +static const Vec4V maxV4Error = V4Load(VECMATH_AOS_EPSILON); + +PX_FORCE_INLINE bool allElementsNearEqualFloatV(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + const FloatV c = FSub(a, b); + return _mm_comigt_ss(c, minFError) && _mm_comilt_ss(c, maxFError); +} + +PX_FORCE_INLINE bool allElementsNearEqualVec3V(const Vec3V a, const Vec3V b) +{ + const Vec3V c = V3Sub(a, b); + return (_mm_comigt_ss(_mm_shuffle_ps(c, c, _MM_SHUFFLE(0, 0, 0, 0)), minV3Error) && + _mm_comilt_ss(_mm_shuffle_ps(c, c, _MM_SHUFFLE(0, 0, 0, 0)), maxV3Error) && + _mm_comigt_ss(_mm_shuffle_ps(c, c, _MM_SHUFFLE(1, 1, 1, 1)), minV3Error) && + _mm_comilt_ss(_mm_shuffle_ps(c, c, _MM_SHUFFLE(1, 1, 1, 1)), maxV3Error) && + _mm_comigt_ss(_mm_shuffle_ps(c, c, _MM_SHUFFLE(2, 2, 2, 2)), minV3Error) && + _mm_comilt_ss(_mm_shuffle_ps(c, c, _MM_SHUFFLE(2, 2, 2, 2)), maxV3Error)); +} + +PX_FORCE_INLINE bool allElementsNearEqualVec4V(const Vec4V a, const Vec4V b) +{ + const Vec4V c = V4Sub(a, b); + return (_mm_comigt_ss(_mm_shuffle_ps(c, c, _MM_SHUFFLE(0, 0, 0, 0)), minV4Error) && + _mm_comilt_ss(_mm_shuffle_ps(c, c, _MM_SHUFFLE(0, 0, 0, 0)), maxV4Error) && + _mm_comigt_ss(_mm_shuffle_ps(c, c, _MM_SHUFFLE(1, 1, 1, 1)), minV4Error) && + _mm_comilt_ss(_mm_shuffle_ps(c, c, _MM_SHUFFLE(1, 1, 1, 1)), maxV4Error) && + _mm_comigt_ss(_mm_shuffle_ps(c, c, _MM_SHUFFLE(2, 2, 2, 2)), minV4Error) && + _mm_comilt_ss(_mm_shuffle_ps(c, c, _MM_SHUFFLE(2, 2, 2, 2)), maxV4Error) && + _mm_comigt_ss(_mm_shuffle_ps(c, c, _MM_SHUFFLE(3, 3, 3, 3)), minV4Error) && + _mm_comilt_ss(_mm_shuffle_ps(c, c, _MM_SHUFFLE(3, 3, 3, 3)), maxV4Error)); +} +} //_VecMathTests + +PX_FORCE_INLINE bool isFiniteFloatV(const FloatV a) +{ + PxF32 f; + FStore(a, &f); + return PxIsFinite(f); + /* + const PxU32 badNumber = (_FPCLASS_SNAN | _FPCLASS_QNAN | _FPCLASS_NINF | _FPCLASS_PINF); + const FloatV vBadNum = FloatV_From_F32((PxF32&)badNumber); + const BoolV vMask = BAnd(vBadNum, a); + return FiniteTestEq(vMask, BFFFF()) == 1; + */ +} + +PX_FORCE_INLINE bool isFiniteVec3V(const Vec3V a) +{ + PX_ALIGN(16, PxF32 f[4]); + V4StoreA((Vec4V&)a, f); + return PxIsFinite(f[0]) && PxIsFinite(f[1]) && PxIsFinite(f[2]); + + /* + const PxU32 badNumber = (_FPCLASS_SNAN | _FPCLASS_QNAN | _FPCLASS_NINF | _FPCLASS_PINF); + const Vec3V vBadNum = Vec3V_From_F32((PxF32&)badNumber); + const BoolV vMask = BAnd(BAnd(vBadNum, a), BTTTF()); + return FiniteTestEq(vMask, BFFFF()) == 1; + */ +} + +PX_FORCE_INLINE bool isFiniteVec4V(const Vec4V a) +{ + PX_ALIGN(16, PxF32 f[4]); + V4StoreA(a, f); + return PxIsFinite(f[0]) && PxIsFinite(f[1]) && PxIsFinite(f[2]) && PxIsFinite(f[3]); + + /* + const PxU32 badNumber = (_FPCLASS_SNAN | _FPCLASS_QNAN | _FPCLASS_NINF | _FPCLASS_PINF); + const Vec4V vBadNum = Vec4V_From_U32((PxF32&)badNumber); + const BoolV vMask = BAnd(vBadNum, a); + + return FiniteTestEq(vMask, BFFFF()) == 1; + */ +} + +///////////////////////////////////////////////////////////////////// +////VECTORISED FUNCTION IMPLEMENTATIONS +///////////////////////////////////////////////////////////////////// + +PX_FORCE_INLINE FloatV FLoad(const PxF32 f) +{ + return _mm_load1_ps(&f); +} + +PX_FORCE_INLINE Vec3V V3Load(const PxF32 f) +{ + return _mm_set_ps(0.0f, f, f, f); +} + +PX_FORCE_INLINE Vec4V V4Load(const PxF32 f) +{ + return _mm_load1_ps(&f); +} + +PX_FORCE_INLINE BoolV BLoad(const bool f) +{ + const PxU32 i = PxU32(-(PxI32)f); + return _mm_load1_ps((float*)&i); +} + +PX_FORCE_INLINE Vec3V V3LoadA(const PxVec3& f) +{ + ASSERT_ISALIGNED16(&f); + return _mm_and_ps(_mm_load_ps(&f.x), reinterpret_cast<const Vec4V&>(internalWindowsSimd::gMaskXYZ)); +} + +PX_FORCE_INLINE Vec3V V3LoadU(const PxVec3& f) +{ + return _mm_set_ps(0.0f, f.z, f.y, f.x); +} + +// w component of result is undefined +PX_FORCE_INLINE Vec3V V3LoadUnsafeA(const PxVec3& f) +{ + ASSERT_ISALIGNED16(&f); + return _mm_load_ps(&f.x); +} + +PX_FORCE_INLINE Vec3V V3LoadA(const PxF32* const f) +{ + ASSERT_ISALIGNED16(f); + return V4ClearW(_mm_load_ps(f)); +} + +PX_FORCE_INLINE Vec3V V3LoadU(const PxF32* const i) +{ + return _mm_set_ps(0.0f, i[2], i[1], i[0]); +} + +PX_FORCE_INLINE Vec3V Vec3V_From_Vec4V(Vec4V v) +{ + return V4ClearW(v); +} + +PX_FORCE_INLINE Vec3V Vec3V_From_Vec4V_WUndefined(const Vec4V v) +{ + return v; +} + +PX_FORCE_INLINE Vec4V Vec4V_From_Vec3V(Vec3V f) +{ + return f; // ok if it is implemented as the same type. +} + +PX_FORCE_INLINE Vec4V Vec4V_From_FloatV(FloatV f) +{ + return f; +} + +PX_FORCE_INLINE Vec3V Vec3V_From_FloatV(FloatV f) +{ + return Vec3V_From_Vec4V(Vec4V_From_FloatV(f)); +} + +PX_FORCE_INLINE Vec3V Vec3V_From_FloatV_WUndefined(FloatV f) +{ + return Vec3V_From_Vec4V_WUndefined(Vec4V_From_FloatV(f)); +} + +PX_FORCE_INLINE Vec4V Vec4V_From_PxVec3_WUndefined(const PxVec3& f) +{ + return _mm_set_ps(0.0f, f.z, f.y, f.x); +} + +PX_FORCE_INLINE Vec4V V4LoadA(const PxF32* const f) +{ + ASSERT_ISALIGNED16(f); + return _mm_load_ps(f); +} + +PX_FORCE_INLINE void V4StoreA(const Vec4V a, PxF32* f) +{ + ASSERT_ISALIGNED16(f); + _mm_store_ps(f, a); +} + +PX_FORCE_INLINE void V4StoreU(const Vec4V a, PxF32* f) +{ + _mm_storeu_ps(f, a); +} + +PX_FORCE_INLINE void BStoreA(const BoolV a, PxU32* f) +{ + ASSERT_ISALIGNED16(f); + _mm_store_ps((PxF32*)f, a); +} + +PX_FORCE_INLINE void U4StoreA(const VecU32V uv, PxU32* u) +{ + ASSERT_ISALIGNED16(u); + _mm_store_ps((PxF32*)u, uv); +} + +PX_FORCE_INLINE void I4StoreA(const VecI32V iv, PxI32* i) +{ + ASSERT_ISALIGNED16(i); + _mm_store_ps((PxF32*)i, iv); +} + +PX_FORCE_INLINE Vec4V V4LoadU(const PxF32* const f) +{ + return _mm_loadu_ps(f); +} + +PX_FORCE_INLINE BoolV BLoad(const bool* const f) +{ + const PX_ALIGN(16, PxU32 b[4]) = { PxU32(-(PxI32)f[0]), PxU32(-(PxI32)f[1]), + PxU32(-(PxI32)f[2]), PxU32(-(PxI32)f[3]) }; + return _mm_load_ps((float*)&b); +} + +PX_FORCE_INLINE void FStore(const FloatV a, PxF32* PX_RESTRICT f) +{ + ASSERT_ISVALIDFLOATV(a); + _mm_store_ss(f, a); +} + +PX_FORCE_INLINE void V3StoreA(const Vec3V a, PxVec3& f) +{ + ASSERT_ISALIGNED16(&f); + PX_ALIGN(16, PxF32 f2[4]); + _mm_store_ps(f2, a); + f = PxVec3(f2[0], f2[1], f2[2]); +} + +PX_FORCE_INLINE void Store_From_BoolV(const BoolV b, PxU32* b2) +{ + _mm_store_ss((PxF32*)b2, b); +} + +PX_FORCE_INLINE void V3StoreU(const Vec3V a, PxVec3& f) +{ + PX_ALIGN(16, PxF32 f2[4]); + _mm_store_ps(f2, a); + f = PxVec3(f2[0], f2[1], f2[2]); +} + +PX_FORCE_INLINE Mat33V Mat33V_From_PxMat33(const PxMat33& m) +{ + return Mat33V(V3LoadU(m.column0), V3LoadU(m.column1), V3LoadU(m.column2)); +} + +PX_FORCE_INLINE void PxMat33_From_Mat33V(const Mat33V& m, PxMat33& out) +{ + ASSERT_ISALIGNED16(&out); + V3StoreU(m.col0, out.column0); + V3StoreU(m.col1, out.column1); + V3StoreU(m.col2, out.column2); +} + +////////////////////////////////// +// FLOATV +////////////////////////////////// + +PX_FORCE_INLINE FloatV FZero() +{ + return _mm_setzero_ps(); +} + +PX_FORCE_INLINE FloatV FOne() +{ + return FLoad(1.0f); +} + +PX_FORCE_INLINE FloatV FHalf() +{ + return FLoad(0.5f); +} + +PX_FORCE_INLINE FloatV FEps() +{ + return FLoad(PX_EPS_REAL); +} + +PX_FORCE_INLINE FloatV FEps6() +{ + return FLoad(1e-6f); +} + +PX_FORCE_INLINE FloatV FMax() +{ + return FLoad(PX_MAX_REAL); +} + +PX_FORCE_INLINE FloatV FNegMax() +{ + return FLoad(-PX_MAX_REAL); +} + +PX_FORCE_INLINE FloatV IZero() +{ + const PxU32 zero = 0; + return _mm_load1_ps((PxF32*)&zero); +} + +PX_FORCE_INLINE FloatV IOne() +{ + const PxU32 one = 1; + return _mm_load1_ps((PxF32*)&one); +} + +PX_FORCE_INLINE FloatV ITwo() +{ + const PxU32 two = 2; + return _mm_load1_ps((PxF32*)&two); +} + +PX_FORCE_INLINE FloatV IThree() +{ + const PxU32 three = 3; + return _mm_load1_ps((PxF32*)&three); +} + +PX_FORCE_INLINE FloatV IFour() +{ + const PxU32 four = 4; + return _mm_load1_ps((PxF32*)&four); +} + +PX_FORCE_INLINE FloatV FNeg(const FloatV f) +{ + ASSERT_ISVALIDFLOATV(f); + return _mm_sub_ps(_mm_setzero_ps(), f); +} + +PX_FORCE_INLINE FloatV FAdd(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return _mm_add_ps(a, b); +} + +PX_FORCE_INLINE FloatV FSub(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return _mm_sub_ps(a, b); +} + +PX_FORCE_INLINE FloatV FMul(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return _mm_mul_ps(a, b); +} + +PX_FORCE_INLINE FloatV FDiv(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return _mm_div_ps(a, b); +} + +PX_FORCE_INLINE FloatV FDivFast(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return _mm_mul_ps(a, _mm_rcp_ps(b)); +} + +PX_FORCE_INLINE FloatV FRecip(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + return _mm_div_ps(FOne(), a); +} + +PX_FORCE_INLINE FloatV FRecipFast(const FloatV a) +{ + return _mm_rcp_ps(a); +} + +PX_FORCE_INLINE FloatV FRsqrt(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + return _mm_div_ps(FOne(), _mm_sqrt_ps(a)); +} + +PX_FORCE_INLINE FloatV FSqrt(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + return _mm_sqrt_ps(a); +} + +PX_FORCE_INLINE FloatV FRsqrtFast(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + return _mm_rsqrt_ps(a); +} + +PX_FORCE_INLINE FloatV FScaleAdd(const FloatV a, const FloatV b, const FloatV c) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + ASSERT_ISVALIDFLOATV(c); + return FAdd(FMul(a, b), c); +} + +PX_FORCE_INLINE FloatV FNegScaleSub(const FloatV a, const FloatV b, const FloatV c) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + ASSERT_ISVALIDFLOATV(c); + return FSub(c, FMul(a, b)); +} + +PX_FORCE_INLINE FloatV FAbs(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + PX_ALIGN(16, const static PxU32 absMask[4]) = { 0x7fFFffFF, 0x7fFFffFF, 0x7fFFffFF, 0x7fFFffFF }; + return _mm_and_ps(a, _mm_load_ps((PxF32*)absMask)); +} + +PX_FORCE_INLINE FloatV FSel(const BoolV c, const FloatV a, const FloatV b) +{ + PX_ASSERT(_VecMathTests::allElementsEqualBoolV(c, BTTTT()) || + _VecMathTests::allElementsEqualBoolV(c, BFFFF())); + ASSERT_ISVALIDFLOATV(_mm_or_ps(_mm_andnot_ps(c, b), _mm_and_ps(c, a))); + return _mm_or_ps(_mm_andnot_ps(c, b), _mm_and_ps(c, a)); +} + +PX_FORCE_INLINE BoolV FIsGrtr(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return _mm_cmpgt_ps(a, b); +} + +PX_FORCE_INLINE BoolV FIsGrtrOrEq(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return _mm_cmpge_ps(a, b); +} + +PX_FORCE_INLINE BoolV FIsEq(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return _mm_cmpeq_ps(a, b); +} + +PX_FORCE_INLINE FloatV FMax(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return _mm_max_ps(a, b); +} + +PX_FORCE_INLINE FloatV FMin(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return _mm_min_ps(a, b); +} + +PX_FORCE_INLINE FloatV FClamp(const FloatV a, const FloatV minV, const FloatV maxV) +{ + ASSERT_ISVALIDFLOATV(minV); + ASSERT_ISVALIDFLOATV(maxV); + return _mm_max_ps(_mm_min_ps(a, maxV), minV); +} + +PX_FORCE_INLINE PxU32 FAllGrtr(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return PxU32(_mm_comigt_ss(a, b)); +} + +PX_FORCE_INLINE PxU32 FAllGrtrOrEq(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return PxU32(_mm_comige_ss(a, b)); +} + +PX_FORCE_INLINE PxU32 FAllEq(const FloatV a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(b); + return PxU32(_mm_comieq_ss(a, b)); +} + +PX_FORCE_INLINE FloatV FRound(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + // return _mm_round_ps(a, 0x0); + const FloatV half = FLoad(0.5f); + const __m128 signBit = _mm_cvtepi32_ps(_mm_srli_epi32(_mm_cvtps_epi32(a), 31)); + const FloatV aRound = FSub(FAdd(a, half), signBit); + __m128i tmp = _mm_cvttps_epi32(aRound); + return _mm_cvtepi32_ps(tmp); +} + +PX_FORCE_INLINE FloatV FSin(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + // Modulo the range of the given angles such that -XM_2PI <= Angles < XM_2PI + const FloatV recipTwoPi = V4LoadA(g_PXReciprocalTwoPi.f); + const FloatV twoPi = V4LoadA(g_PXTwoPi.f); + const FloatV tmp = FMul(a, recipTwoPi); + const FloatV b = FRound(tmp); + const FloatV V1 = FNegScaleSub(twoPi, b, a); + + // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - + // V^15 / 15! + V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI) + const FloatV V2 = FMul(V1, V1); + const FloatV V3 = FMul(V2, V1); + const FloatV V5 = FMul(V3, V2); + const FloatV V7 = FMul(V5, V2); + const FloatV V9 = FMul(V7, V2); + const FloatV V11 = FMul(V9, V2); + const FloatV V13 = FMul(V11, V2); + const FloatV V15 = FMul(V13, V2); + const FloatV V17 = FMul(V15, V2); + const FloatV V19 = FMul(V17, V2); + const FloatV V21 = FMul(V19, V2); + const FloatV V23 = FMul(V21, V2); + + const Vec4V sinCoefficients0 = V4LoadA(g_PXSinCoefficients0.f); + const Vec4V sinCoefficients1 = V4LoadA(g_PXSinCoefficients1.f); + const Vec4V sinCoefficients2 = V4LoadA(g_PXSinCoefficients2.f); + + const FloatV S1 = V4GetY(sinCoefficients0); + const FloatV S2 = V4GetZ(sinCoefficients0); + const FloatV S3 = V4GetW(sinCoefficients0); + const FloatV S4 = V4GetX(sinCoefficients1); + const FloatV S5 = V4GetY(sinCoefficients1); + const FloatV S6 = V4GetZ(sinCoefficients1); + const FloatV S7 = V4GetW(sinCoefficients1); + const FloatV S8 = V4GetX(sinCoefficients2); + const FloatV S9 = V4GetY(sinCoefficients2); + const FloatV S10 = V4GetZ(sinCoefficients2); + const FloatV S11 = V4GetW(sinCoefficients2); + + FloatV Result; + Result = FScaleAdd(S1, V3, V1); + Result = FScaleAdd(S2, V5, Result); + Result = FScaleAdd(S3, V7, Result); + Result = FScaleAdd(S4, V9, Result); + Result = FScaleAdd(S5, V11, Result); + Result = FScaleAdd(S6, V13, Result); + Result = FScaleAdd(S7, V15, Result); + Result = FScaleAdd(S8, V17, Result); + Result = FScaleAdd(S9, V19, Result); + Result = FScaleAdd(S10, V21, Result); + Result = FScaleAdd(S11, V23, Result); + + return Result; +} + +PX_FORCE_INLINE FloatV FCos(const FloatV a) +{ + ASSERT_ISVALIDFLOATV(a); + + // Modulo the range of the given angles such that -XM_2PI <= Angles < XM_2PI + const FloatV recipTwoPi = V4LoadA(g_PXReciprocalTwoPi.f); + const FloatV twoPi = V4LoadA(g_PXTwoPi.f); + const FloatV tmp = FMul(a, recipTwoPi); + const FloatV b = FRound(tmp); + const FloatV V1 = FNegScaleSub(twoPi, b, a); + + // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + V^12 / 12! - + // V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI) + const FloatV V2 = FMul(V1, V1); + const FloatV V4 = FMul(V2, V2); + const FloatV V6 = FMul(V4, V2); + const FloatV V8 = FMul(V4, V4); + const FloatV V10 = FMul(V6, V4); + const FloatV V12 = FMul(V6, V6); + const FloatV V14 = FMul(V8, V6); + const FloatV V16 = FMul(V8, V8); + const FloatV V18 = FMul(V10, V8); + const FloatV V20 = FMul(V10, V10); + const FloatV V22 = FMul(V12, V10); + + const Vec4V cosCoefficients0 = V4LoadA(g_PXCosCoefficients0.f); + const Vec4V cosCoefficients1 = V4LoadA(g_PXCosCoefficients1.f); + const Vec4V cosCoefficients2 = V4LoadA(g_PXCosCoefficients2.f); + + const FloatV C1 = V4GetY(cosCoefficients0); + const FloatV C2 = V4GetZ(cosCoefficients0); + const FloatV C3 = V4GetW(cosCoefficients0); + const FloatV C4 = V4GetX(cosCoefficients1); + const FloatV C5 = V4GetY(cosCoefficients1); + const FloatV C6 = V4GetZ(cosCoefficients1); + const FloatV C7 = V4GetW(cosCoefficients1); + const FloatV C8 = V4GetX(cosCoefficients2); + const FloatV C9 = V4GetY(cosCoefficients2); + const FloatV C10 = V4GetZ(cosCoefficients2); + const FloatV C11 = V4GetW(cosCoefficients2); + + FloatV Result; + Result = FScaleAdd(C1, V2, V4One()); + Result = FScaleAdd(C2, V4, Result); + Result = FScaleAdd(C3, V6, Result); + Result = FScaleAdd(C4, V8, Result); + Result = FScaleAdd(C5, V10, Result); + Result = FScaleAdd(C6, V12, Result); + Result = FScaleAdd(C7, V14, Result); + Result = FScaleAdd(C8, V16, Result); + Result = FScaleAdd(C9, V18, Result); + Result = FScaleAdd(C10, V20, Result); + Result = FScaleAdd(C11, V22, Result); + + return Result; +} + +PX_FORCE_INLINE PxU32 FOutOfBounds(const FloatV a, const FloatV min, const FloatV max) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(min); + ASSERT_ISVALIDFLOATV(max); + const BoolV c = BOr(FIsGrtr(a, max), FIsGrtr(min, a)); + return PxU32(!BAllEqFFFF(c)); +} + +PX_FORCE_INLINE PxU32 FInBounds(const FloatV a, const FloatV min, const FloatV max) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(min); + ASSERT_ISVALIDFLOATV(max); + const BoolV c = BAnd(FIsGrtrOrEq(a, min), FIsGrtrOrEq(max, a)); + return BAllEqTTTT(c); +} + +PX_FORCE_INLINE PxU32 FOutOfBounds(const FloatV a, const FloatV bounds) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(bounds); + return FOutOfBounds(a, FNeg(bounds), bounds); +} + +PX_FORCE_INLINE PxU32 FInBounds(const FloatV a, const FloatV bounds) +{ + ASSERT_ISVALIDFLOATV(a); + ASSERT_ISVALIDFLOATV(bounds); + return FInBounds(a, FNeg(bounds), bounds); +} + +////////////////////////////////// +// VEC3V +////////////////////////////////// + +PX_FORCE_INLINE Vec3V V3Splat(const FloatV f) +{ + ASSERT_ISVALIDFLOATV(f); + const __m128 zero = V3Zero(); + const __m128 fff0 = _mm_move_ss(f, zero); + return _mm_shuffle_ps(fff0, fff0, _MM_SHUFFLE(0, 1, 2, 3)); +} + +PX_FORCE_INLINE Vec3V V3Merge(const FloatVArg x, const FloatVArg y, const FloatVArg z) +{ + ASSERT_ISVALIDFLOATV(x); + ASSERT_ISVALIDFLOATV(y); + ASSERT_ISVALIDFLOATV(z); + // static on zero causes compiler crash on x64 debug_opt + const __m128 zero = V3Zero(); + const __m128 xy = _mm_move_ss(x, y); + const __m128 z0 = _mm_move_ss(zero, z); + + return _mm_shuffle_ps(xy, z0, _MM_SHUFFLE(1, 0, 0, 1)); +} + +PX_FORCE_INLINE Vec3V V3UnitX() +{ + const PX_ALIGN(16, PxF32 x[4]) = { 1.0f, 0.0f, 0.0f, 0.0f }; + const __m128 x128 = _mm_load_ps(x); + return x128; +} + +PX_FORCE_INLINE Vec3V V3UnitY() +{ + const PX_ALIGN(16, PxF32 y[4]) = { 0.0f, 1.0f, 0.0f, 0.0f }; + const __m128 y128 = _mm_load_ps(y); + return y128; +} + +PX_FORCE_INLINE Vec3V V3UnitZ() +{ + const PX_ALIGN(16, PxF32 z[4]) = { 0.0f, 0.0f, 1.0f, 0.0f }; + const __m128 z128 = _mm_load_ps(z); + return z128; +} + +PX_FORCE_INLINE FloatV V3GetX(const Vec3V f) +{ + ASSERT_ISVALIDVEC3V(f); + return _mm_shuffle_ps(f, f, _MM_SHUFFLE(0, 0, 0, 0)); +} + +PX_FORCE_INLINE FloatV V3GetY(const Vec3V f) +{ + ASSERT_ISVALIDVEC3V(f); + return _mm_shuffle_ps(f, f, _MM_SHUFFLE(1, 1, 1, 1)); +} + +PX_FORCE_INLINE FloatV V3GetZ(const Vec3V f) +{ + ASSERT_ISVALIDVEC3V(f); + return _mm_shuffle_ps(f, f, _MM_SHUFFLE(2, 2, 2, 2)); +} + +PX_FORCE_INLINE Vec3V V3SetX(const Vec3V v, const FloatV f) +{ + ASSERT_ISVALIDVEC3V(v); + ASSERT_ISVALIDFLOATV(f); + return V4Sel(BFTTT(), v, f); +} + +PX_FORCE_INLINE Vec3V V3SetY(const Vec3V v, const FloatV f) +{ + ASSERT_ISVALIDVEC3V(v); + ASSERT_ISVALIDFLOATV(f); + return V4Sel(BTFTT(), v, f); +} + +PX_FORCE_INLINE Vec3V V3SetZ(const Vec3V v, const FloatV f) +{ + ASSERT_ISVALIDVEC3V(v); + ASSERT_ISVALIDFLOATV(f); + return V4Sel(BTTFT(), v, f); +} + +PX_FORCE_INLINE Vec3V V3ColX(const Vec3V a, const Vec3V b, const Vec3V c) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + ASSERT_ISVALIDVEC3V(c); + Vec3V r = _mm_shuffle_ps(a, c, _MM_SHUFFLE(3, 0, 3, 0)); + return V3SetY(r, V3GetX(b)); +} + +PX_FORCE_INLINE Vec3V V3ColY(const Vec3V a, const Vec3V b, const Vec3V c) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + ASSERT_ISVALIDVEC3V(c); + Vec3V r = _mm_shuffle_ps(a, c, _MM_SHUFFLE(3, 1, 3, 1)); + return V3SetY(r, V3GetY(b)); +} + +PX_FORCE_INLINE Vec3V V3ColZ(const Vec3V a, const Vec3V b, const Vec3V c) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + ASSERT_ISVALIDVEC3V(c); + Vec3V r = _mm_shuffle_ps(a, c, _MM_SHUFFLE(3, 2, 3, 2)); + return V3SetY(r, V3GetZ(b)); +} + +PX_FORCE_INLINE Vec3V V3Zero() +{ + return _mm_setzero_ps(); +} + +PX_FORCE_INLINE Vec3V V3One() +{ + return V3Load(1.0f); +} + +PX_FORCE_INLINE Vec3V V3Eps() +{ + return V3Load(PX_EPS_REAL); +} + +PX_FORCE_INLINE Vec3V V3Neg(const Vec3V f) +{ + ASSERT_ISVALIDVEC3V(f); + return _mm_sub_ps(_mm_setzero_ps(), f); +} + +PX_FORCE_INLINE Vec3V V3Add(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return _mm_add_ps(a, b); +} + +PX_FORCE_INLINE Vec3V V3Sub(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return _mm_sub_ps(a, b); +} + +PX_FORCE_INLINE Vec3V V3Scale(const Vec3V a, const FloatV b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDFLOATV(b); + return _mm_mul_ps(a, b); +} + +PX_FORCE_INLINE Vec3V V3Mul(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return _mm_mul_ps(a, b); +} + +PX_FORCE_INLINE Vec3V V3ScaleInv(const Vec3V a, const FloatV b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDFLOATV(b); + return _mm_div_ps(a, b); +} + +PX_FORCE_INLINE Vec3V V3Div(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return V4ClearW(_mm_div_ps(a, b)); +} + +PX_FORCE_INLINE Vec3V V3ScaleInvFast(const Vec3V a, const FloatV b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDFLOATV(b); + return _mm_mul_ps(a, _mm_rcp_ps(b)); +} + +PX_FORCE_INLINE Vec3V V3DivFast(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return V4ClearW(_mm_mul_ps(a, _mm_rcp_ps(b))); +} + +PX_FORCE_INLINE Vec3V V3Recip(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + const __m128 zero = V3Zero(); + const __m128 tttf = BTTTF(); + const __m128 recipA = _mm_div_ps(V3One(), a); + return V4Sel(tttf, recipA, zero); +} + +PX_FORCE_INLINE Vec3V V3RecipFast(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + const __m128 zero = V3Zero(); + const __m128 tttf = BTTTF(); + const __m128 recipA = _mm_rcp_ps(a); + return V4Sel(tttf, recipA, zero); +} + +PX_FORCE_INLINE Vec3V V3Rsqrt(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + const __m128 zero = V3Zero(); + const __m128 tttf = BTTTF(); + const __m128 recipA = _mm_div_ps(V3One(), _mm_sqrt_ps(a)); + return V4Sel(tttf, recipA, zero); +} + +PX_FORCE_INLINE Vec3V V3RsqrtFast(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + const __m128 zero = V3Zero(); + const __m128 tttf = BTTTF(); + const __m128 recipA = _mm_rsqrt_ps(a); + return V4Sel(tttf, recipA, zero); +} + +PX_FORCE_INLINE Vec3V V3ScaleAdd(const Vec3V a, const FloatV b, const Vec3V c) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDFLOATV(b); + ASSERT_ISVALIDVEC3V(c); + return V3Add(V3Scale(a, b), c); +} + +PX_FORCE_INLINE Vec3V V3NegScaleSub(const Vec3V a, const FloatV b, const Vec3V c) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDFLOATV(b); + ASSERT_ISVALIDVEC3V(c); + return V3Sub(c, V3Scale(a, b)); +} + +PX_FORCE_INLINE Vec3V V3MulAdd(const Vec3V a, const Vec3V b, const Vec3V c) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + ASSERT_ISVALIDVEC3V(c); + return V3Add(V3Mul(a, b), c); +} + +PX_FORCE_INLINE Vec3V V3NegMulSub(const Vec3V a, const Vec3V b, const Vec3V c) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + ASSERT_ISVALIDVEC3V(c); + return V3Sub(c, V3Mul(a, b)); +} + +PX_FORCE_INLINE Vec3V V3Abs(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + return V3Max(a, V3Neg(a)); +} + +PX_FORCE_INLINE FloatV V3Dot(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + + const __m128 t0 = _mm_mul_ps(a, b); // aw*bw | az*bz | ay*by | ax*bx + const __m128 t1 = _mm_shuffle_ps(t0, t0, _MM_SHUFFLE(1,0,3,2)); // ay*by | ax*bx | aw*bw | az*bz + const __m128 t2 = _mm_add_ps(t0, t1); // ay*by + aw*bw | ax*bx + az*bz | aw*bw + ay*by | az*bz + ax*bx + const __m128 t3 = _mm_shuffle_ps(t2, t2, _MM_SHUFFLE(2,3,0,1)); // ax*bx + az*bz | ay*by + aw*bw | az*bz + ax*bx | aw*bw + ay*by + return _mm_add_ps(t3, t2); // ax*bx + az*bz + ay*by + aw*bw + // ay*by + aw*bw + ax*bx + az*bz + // az*bz + ax*bx + aw*bw + ay*by + // aw*bw + ay*by + az*bz + ax*bx +} + +PX_FORCE_INLINE Vec3V V3Cross(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + const __m128 r1 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 1, 0, 2)); // z,x,y,w + const __m128 r2 = _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 0, 2, 1)); // y,z,x,w + const __m128 l1 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 0, 2, 1)); // y,z,x,w + const __m128 l2 = _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 1, 0, 2)); // z,x,y,w + return _mm_sub_ps(_mm_mul_ps(l1, l2), _mm_mul_ps(r1, r2)); +} + +PX_FORCE_INLINE VecCrossV V3PrepareCross(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + VecCrossV v; + v.mR1 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 1, 0, 2)); // z,x,y,w + v.mL1 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 0, 2, 1)); // y,z,x,w + return v; +} + +PX_FORCE_INLINE Vec3V V3Cross(const VecCrossV& a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(b); + const __m128 r2 = _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 0, 2, 1)); // y,z,x,w + const __m128 l2 = _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 1, 0, 2)); // z,x,y,w + return _mm_sub_ps(_mm_mul_ps(a.mL1, l2), _mm_mul_ps(a.mR1, r2)); +} + +PX_FORCE_INLINE Vec3V V3Cross(const Vec3V a, const VecCrossV& b) +{ + ASSERT_ISVALIDVEC3V(a); + const __m128 r2 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 0, 2, 1)); // y,z,x,w + const __m128 l2 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 1, 0, 2)); // z,x,y,w + return _mm_sub_ps(_mm_mul_ps(b.mR1, r2), _mm_mul_ps(b.mL1, l2)); +} + +PX_FORCE_INLINE Vec3V V3Cross(const VecCrossV& a, const VecCrossV& b) +{ + return _mm_sub_ps(_mm_mul_ps(a.mL1, b.mR1), _mm_mul_ps(a.mR1, b.mL1)); +} + +PX_FORCE_INLINE FloatV V3Length(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + return _mm_sqrt_ps(V3Dot(a, a)); +} + +PX_FORCE_INLINE FloatV V3LengthSq(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + return V3Dot(a, a); +} + +PX_FORCE_INLINE Vec3V V3Normalize(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISFINITELENGTH(a); + return V3ScaleInv(a, _mm_sqrt_ps(V3Dot(a, a))); +} + +PX_FORCE_INLINE Vec3V V3NormalizeFast(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISFINITELENGTH(a); + return V3Scale(a, _mm_rsqrt_ps(V3Dot(a, a))); +} + +PX_FORCE_INLINE Vec3V V3NormalizeSafe(const Vec3V a, const Vec3V unsafeReturnValue) +{ + ASSERT_ISVALIDVEC3V(a); + const __m128 eps = FEps(); + const __m128 length = V3Length(a); + const __m128 isGreaterThanZero = FIsGrtr(length, eps); + return V3Sel(isGreaterThanZero, V3ScaleInv(a, length), unsafeReturnValue); +} + +PX_FORCE_INLINE Vec3V V3Sel(const BoolV c, const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(_mm_or_ps(_mm_andnot_ps(c, b), _mm_and_ps(c, a))); + return _mm_or_ps(_mm_andnot_ps(c, b), _mm_and_ps(c, a)); +} + +PX_FORCE_INLINE BoolV V3IsGrtr(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return _mm_cmpgt_ps(a, b); +} + +PX_FORCE_INLINE BoolV V3IsGrtrOrEq(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return _mm_cmpge_ps(a, b); +} + +PX_FORCE_INLINE BoolV V3IsEq(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return _mm_cmpeq_ps(a, b); +} + +PX_FORCE_INLINE Vec3V V3Max(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return _mm_max_ps(a, b); +} + +PX_FORCE_INLINE Vec3V V3Min(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return _mm_min_ps(a, b); +} + +PX_FORCE_INLINE FloatV V3ExtractMax(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + const __m128 shuf1 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 0, 0, 0)); + const __m128 shuf2 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1)); + const __m128 shuf3 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 2, 2, 2)); + return _mm_max_ps(_mm_max_ps(shuf1, shuf2), shuf3); +} + +PX_FORCE_INLINE FloatV V3ExtractMin(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + const __m128 shuf1 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 0, 0, 0)); + const __m128 shuf2 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1)); + const __m128 shuf3 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 2, 2, 2)); + return _mm_min_ps(_mm_min_ps(shuf1, shuf2), shuf3); +} + +//// if(a > 0.0f) return 1.0f; else if a == 0.f return 0.f, else return -1.f; +// PX_FORCE_INLINE Vec3V V3MathSign(const Vec3V a) +//{ +// VECMATHAOS_ASSERT(isValidVec3V(a)); +// +// const __m128i ai = _mm_cvtps_epi32(a); +// const __m128i bi = _mm_cvtps_epi32(V3Neg(a)); +// const __m128 aa = _mm_cvtepi32_ps(_mm_srai_epi32(ai, 31)); +// const __m128 bb = _mm_cvtepi32_ps(_mm_srai_epi32(bi, 31)); +// return _mm_or_ps(aa, bb); +//} + +// return (a >= 0.0f) ? 1.0f : -1.0f; +PX_FORCE_INLINE Vec3V V3Sign(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + const __m128 zero = V3Zero(); + const __m128 one = V3One(); + const __m128 none = V3Neg(one); + return V3Sel(V3IsGrtrOrEq(a, zero), one, none); +} + +PX_FORCE_INLINE Vec3V V3Clamp(const Vec3V a, const Vec3V minV, const Vec3V maxV) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(minV); + ASSERT_ISVALIDVEC3V(maxV); + return V3Max(V3Min(a, maxV), minV); +} + +PX_FORCE_INLINE PxU32 V3AllGrtr(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return internalWindowsSimd::BAllTrue3_R(V4IsGrtr(a, b)); +} + +PX_FORCE_INLINE PxU32 V3AllGrtrOrEq(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return internalWindowsSimd::BAllTrue3_R(V4IsGrtrOrEq(a, b)); +} + +PX_FORCE_INLINE PxU32 V3AllEq(const Vec3V a, const Vec3V b) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(b); + return internalWindowsSimd::BAllTrue3_R(V4IsEq(a, b)); +} + +PX_FORCE_INLINE Vec3V V3Round(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + + // return _mm_round_ps(a, 0x0); + const Vec3V half = V3Load(0.5f); + const __m128 signBit = _mm_cvtepi32_ps(_mm_srli_epi32(_mm_cvtps_epi32(a), 31)); + const Vec3V aRound = V3Sub(V3Add(a, half), signBit); + __m128i tmp = _mm_cvttps_epi32(aRound); + return _mm_cvtepi32_ps(tmp); +} + +PX_FORCE_INLINE Vec3V V3Sin(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + + // Modulo the range of the given angles such that -XM_2PI <= Angles < XM_2PI + const Vec4V recipTwoPi = V4LoadA(g_PXReciprocalTwoPi.f); + const Vec4V twoPi = V4LoadA(g_PXTwoPi.f); + const Vec3V tmp = V3Scale(a, recipTwoPi); + const Vec3V b = V3Round(tmp); + const Vec3V V1 = V3NegScaleSub(b, twoPi, a); + + // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - + // V^15 / 15! + V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI) + const Vec3V V2 = V3Mul(V1, V1); + const Vec3V V3 = V3Mul(V2, V1); + const Vec3V V5 = V3Mul(V3, V2); + const Vec3V V7 = V3Mul(V5, V2); + const Vec3V V9 = V3Mul(V7, V2); + const Vec3V V11 = V3Mul(V9, V2); + const Vec3V V13 = V3Mul(V11, V2); + const Vec3V V15 = V3Mul(V13, V2); + const Vec3V V17 = V3Mul(V15, V2); + const Vec3V V19 = V3Mul(V17, V2); + const Vec3V V21 = V3Mul(V19, V2); + const Vec3V V23 = V3Mul(V21, V2); + + const Vec4V sinCoefficients0 = V4LoadA(g_PXSinCoefficients0.f); + const Vec4V sinCoefficients1 = V4LoadA(g_PXSinCoefficients1.f); + const Vec4V sinCoefficients2 = V4LoadA(g_PXSinCoefficients2.f); + + const FloatV S1 = V4GetY(sinCoefficients0); + const FloatV S2 = V4GetZ(sinCoefficients0); + const FloatV S3 = V4GetW(sinCoefficients0); + const FloatV S4 = V4GetX(sinCoefficients1); + const FloatV S5 = V4GetY(sinCoefficients1); + const FloatV S6 = V4GetZ(sinCoefficients1); + const FloatV S7 = V4GetW(sinCoefficients1); + const FloatV S8 = V4GetX(sinCoefficients2); + const FloatV S9 = V4GetY(sinCoefficients2); + const FloatV S10 = V4GetZ(sinCoefficients2); + const FloatV S11 = V4GetW(sinCoefficients2); + + Vec3V Result; + Result = V3ScaleAdd(V3, S1, V1); + Result = V3ScaleAdd(V5, S2, Result); + Result = V3ScaleAdd(V7, S3, Result); + Result = V3ScaleAdd(V9, S4, Result); + Result = V3ScaleAdd(V11, S5, Result); + Result = V3ScaleAdd(V13, S6, Result); + Result = V3ScaleAdd(V15, S7, Result); + Result = V3ScaleAdd(V17, S8, Result); + Result = V3ScaleAdd(V19, S9, Result); + Result = V3ScaleAdd(V21, S10, Result); + Result = V3ScaleAdd(V23, S11, Result); + + ASSERT_ISVALIDVEC3V(Result); + return Result; +} + +PX_FORCE_INLINE Vec3V V3Cos(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + + // Modulo the range of the given angles such that -XM_2PI <= Angles < XM_2PI + const Vec4V recipTwoPi = V4LoadA(g_PXReciprocalTwoPi.f); + const Vec4V twoPi = V4LoadA(g_PXTwoPi.f); + const Vec3V tmp = V3Scale(a, recipTwoPi); + const Vec3V b = V3Round(tmp); + const Vec3V V1 = V3NegScaleSub(b, twoPi, a); + + // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + V^12 / 12! - + // V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI) + const Vec3V V2 = V3Mul(V1, V1); + const Vec3V V4 = V3Mul(V2, V2); + const Vec3V V6 = V3Mul(V4, V2); + const Vec3V V8 = V3Mul(V4, V4); + const Vec3V V10 = V3Mul(V6, V4); + const Vec3V V12 = V3Mul(V6, V6); + const Vec3V V14 = V3Mul(V8, V6); + const Vec3V V16 = V3Mul(V8, V8); + const Vec3V V18 = V3Mul(V10, V8); + const Vec3V V20 = V3Mul(V10, V10); + const Vec3V V22 = V3Mul(V12, V10); + + const Vec4V cosCoefficients0 = V4LoadA(g_PXCosCoefficients0.f); + const Vec4V cosCoefficients1 = V4LoadA(g_PXCosCoefficients1.f); + const Vec4V cosCoefficients2 = V4LoadA(g_PXCosCoefficients2.f); + + const FloatV C1 = V4GetY(cosCoefficients0); + const FloatV C2 = V4GetZ(cosCoefficients0); + const FloatV C3 = V4GetW(cosCoefficients0); + const FloatV C4 = V4GetX(cosCoefficients1); + const FloatV C5 = V4GetY(cosCoefficients1); + const FloatV C6 = V4GetZ(cosCoefficients1); + const FloatV C7 = V4GetW(cosCoefficients1); + const FloatV C8 = V4GetX(cosCoefficients2); + const FloatV C9 = V4GetY(cosCoefficients2); + const FloatV C10 = V4GetZ(cosCoefficients2); + const FloatV C11 = V4GetW(cosCoefficients2); + + Vec3V Result; + Result = V3ScaleAdd(V2, C1, V3One()); + Result = V3ScaleAdd(V4, C2, Result); + Result = V3ScaleAdd(V6, C3, Result); + Result = V3ScaleAdd(V8, C4, Result); + Result = V3ScaleAdd(V10, C5, Result); + Result = V3ScaleAdd(V12, C6, Result); + Result = V3ScaleAdd(V14, C7, Result); + Result = V3ScaleAdd(V16, C8, Result); + Result = V3ScaleAdd(V18, C9, Result); + Result = V3ScaleAdd(V20, C10, Result); + Result = V3ScaleAdd(V22, C11, Result); + + ASSERT_ISVALIDVEC3V(Result); + return Result; +} + +PX_FORCE_INLINE Vec3V V3PermYZZ(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + return _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 2, 2, 1)); +} + +PX_FORCE_INLINE Vec3V V3PermXYX(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + return _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 0, 1, 0)); +} + +PX_FORCE_INLINE Vec3V V3PermYZX(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + return _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 0, 2, 1)); +} + +PX_FORCE_INLINE Vec3V V3PermZXY(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + return _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 1, 0, 2)); +} + +PX_FORCE_INLINE Vec3V V3PermZZY(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + return _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 1, 2, 2)); +} + +PX_FORCE_INLINE Vec3V V3PermYXX(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + return _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 0, 0, 1)); +} + +PX_FORCE_INLINE Vec3V V3Perm_Zero_1Z_0Y(const Vec3V v0, const Vec3V v1) +{ + ASSERT_ISVALIDVEC3V(v0); + ASSERT_ISVALIDVEC3V(v1); + return _mm_shuffle_ps(v1, v0, _MM_SHUFFLE(3, 1, 2, 3)); +} + +PX_FORCE_INLINE Vec3V V3Perm_0Z_Zero_1X(const Vec3V v0, const Vec3V v1) +{ + ASSERT_ISVALIDVEC3V(v0); + ASSERT_ISVALIDVEC3V(v1); + return _mm_shuffle_ps(v0, v1, _MM_SHUFFLE(3, 0, 3, 2)); +} + +PX_FORCE_INLINE Vec3V V3Perm_1Y_0X_Zero(const Vec3V v0, const Vec3V v1) +{ + ASSERT_ISVALIDVEC3V(v0); + ASSERT_ISVALIDVEC3V(v1); + // There must be a better way to do this. + Vec3V v2 = V3Zero(); + FloatV y1 = V3GetY(v1); + FloatV x0 = V3GetX(v0); + v2 = V3SetX(v2, y1); + return V3SetY(v2, x0); +} + +PX_FORCE_INLINE FloatV V3SumElems(const Vec3V a) +{ + ASSERT_ISVALIDVEC3V(a); + const __m128 shuf1 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 0, 0, 0)); // z,y,x,w + const __m128 shuf2 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1)); // y,x,w,z + const __m128 shuf3 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 2, 2, 2)); // x,w,z,y + return _mm_add_ps(_mm_add_ps(shuf1, shuf2), shuf3); +} + +PX_FORCE_INLINE PxU32 V3OutOfBounds(const Vec3V a, const Vec3V min, const Vec3V max) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(min); + ASSERT_ISVALIDVEC3V(max); + const BoolV c = BOr(V3IsGrtr(a, max), V3IsGrtr(min, a)); + return PxU32(!BAllEqFFFF(c)); +} + +PX_FORCE_INLINE PxU32 V3InBounds(const Vec3V a, const Vec3V min, const Vec3V max) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(min); + ASSERT_ISVALIDVEC3V(max); + const BoolV c = BAnd(V3IsGrtrOrEq(a, min), V3IsGrtrOrEq(max, a)); + return BAllEqTTTT(c); +} + +PX_FORCE_INLINE PxU32 V3OutOfBounds(const Vec3V a, const Vec3V bounds) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(bounds); + return V3OutOfBounds(a, V3Neg(bounds), bounds); +} + +PX_FORCE_INLINE PxU32 V3InBounds(const Vec3V a, const Vec3V bounds) +{ + ASSERT_ISVALIDVEC3V(a); + ASSERT_ISVALIDVEC3V(bounds); + return V3InBounds(a, V3Neg(bounds), bounds); +} + +PX_FORCE_INLINE void V3Transpose(Vec3V& col0, Vec3V& col1, Vec3V& col2) +{ + ASSERT_ISVALIDVEC3V(col0); + ASSERT_ISVALIDVEC3V(col1); + ASSERT_ISVALIDVEC3V(col2); + const Vec3V col3 = _mm_setzero_ps(); + Vec3V tmp0 = _mm_unpacklo_ps(col0, col1); + Vec3V tmp2 = _mm_unpacklo_ps(col2, col3); + Vec3V tmp1 = _mm_unpackhi_ps(col0, col1); + Vec3V tmp3 = _mm_unpackhi_ps(col2, col3); + col0 = _mm_movelh_ps(tmp0, tmp2); + col1 = _mm_movehl_ps(tmp2, tmp0); + col2 = _mm_movelh_ps(tmp1, tmp3); +} + +////////////////////////////////// +// VEC4V +////////////////////////////////// + +PX_FORCE_INLINE Vec4V V4Splat(const FloatV f) +{ + ASSERT_ISVALIDFLOATV(f); + // return _mm_shuffle_ps(f, f, _MM_SHUFFLE(0,0,0,0)); + return f; +} + +PX_FORCE_INLINE Vec4V V4Merge(const FloatV* const floatVArray) +{ + ASSERT_ISVALIDFLOATV(floatVArray[0]); + ASSERT_ISVALIDFLOATV(floatVArray[1]); + ASSERT_ISVALIDFLOATV(floatVArray[2]); + ASSERT_ISVALIDFLOATV(floatVArray[3]); + const __m128 xw = _mm_move_ss(floatVArray[1], floatVArray[0]); // y, y, y, x + const __m128 yz = _mm_move_ss(floatVArray[2], floatVArray[3]); // z, z, z, w + return _mm_shuffle_ps(xw, yz, _MM_SHUFFLE(0, 2, 1, 0)); +} + +PX_FORCE_INLINE Vec4V V4Merge(const FloatVArg x, const FloatVArg y, const FloatVArg z, const FloatVArg w) +{ + ASSERT_ISVALIDFLOATV(x); + ASSERT_ISVALIDFLOATV(y); + ASSERT_ISVALIDFLOATV(z); + ASSERT_ISVALIDFLOATV(w); + const __m128 xw = _mm_move_ss(y, x); // y, y, y, x + const __m128 yz = _mm_move_ss(z, w); // z, z, z, w + return _mm_shuffle_ps(xw, yz, _MM_SHUFFLE(0, 2, 1, 0)); +} + +PX_FORCE_INLINE Vec4V V4MergeW(const Vec4VArg x, const Vec4VArg y, const Vec4VArg z, const Vec4VArg w) +{ + const Vec4V xz = _mm_unpackhi_ps(x, z); + const Vec4V yw = _mm_unpackhi_ps(y, w); + return _mm_unpackhi_ps(xz, yw); +} + +PX_FORCE_INLINE Vec4V V4MergeZ(const Vec4VArg x, const Vec4VArg y, const Vec4VArg z, const Vec4VArg w) +{ + const Vec4V xz = _mm_unpackhi_ps(x, z); + const Vec4V yw = _mm_unpackhi_ps(y, w); + return _mm_unpacklo_ps(xz, yw); +} + +PX_FORCE_INLINE Vec4V V4MergeY(const Vec4VArg x, const Vec4VArg y, const Vec4VArg z, const Vec4VArg w) +{ + const Vec4V xz = _mm_unpacklo_ps(x, z); + const Vec4V yw = _mm_unpacklo_ps(y, w); + return _mm_unpackhi_ps(xz, yw); +} + +PX_FORCE_INLINE Vec4V V4MergeX(const Vec4VArg x, const Vec4VArg y, const Vec4VArg z, const Vec4VArg w) +{ + const Vec4V xz = _mm_unpacklo_ps(x, z); + const Vec4V yw = _mm_unpacklo_ps(y, w); + return _mm_unpacklo_ps(xz, yw); +} + +PX_FORCE_INLINE Vec4V V4UnpackXY(const Vec4VArg a, const Vec4VArg b) +{ + return _mm_unpacklo_ps(a, b); +} + +PX_FORCE_INLINE Vec4V V4UnpackZW(const Vec4VArg a, const Vec4VArg b) +{ + return _mm_unpackhi_ps(a, b); +} + +PX_FORCE_INLINE Vec4V V4PermYXWZ(const Vec4V a) +{ + return _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 3, 0, 1)); +} + +PX_FORCE_INLINE Vec4V V4PermXZXZ(const Vec4V a) +{ + return _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 0, 2, 0)); +} + +PX_FORCE_INLINE Vec4V V4PermYWYW(const Vec4V a) +{ + return _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 1, 3, 1)); +} + +PX_FORCE_INLINE Vec4V V4PermYZXW(const Vec4V a) +{ + return _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 0, 2, 1)); +} + +template <PxU8 x, PxU8 y, PxU8 z, PxU8 w> +PX_FORCE_INLINE Vec4V V4Perm(const Vec4V a) +{ + return _mm_shuffle_ps(a, a, _MM_SHUFFLE(w, z, y, x)); +} + +PX_FORCE_INLINE Vec4V V4UnitW() +{ + const PX_ALIGN(16, PxF32 w[4]) = { 0.0f, 0.0f, 0.0f, 1.0f }; + const __m128 w128 = _mm_load_ps(w); + return w128; +} + +PX_FORCE_INLINE Vec4V V4UnitX() +{ + const PX_ALIGN(16, PxF32 x[4]) = { 1.0f, 0.0f, 0.0f, 0.0f }; + const __m128 x128 = _mm_load_ps(x); + return x128; +} + +PX_FORCE_INLINE Vec4V V4UnitY() +{ + const PX_ALIGN(16, PxF32 y[4]) = { 0.0f, 1.0f, 0.0f, 0.0f }; + const __m128 y128 = _mm_load_ps(y); + return y128; +} + +PX_FORCE_INLINE Vec4V V4UnitZ() +{ + const PX_ALIGN(16, PxF32 z[4]) = { 0.0f, 0.0f, 1.0f, 0.0f }; + const __m128 z128 = _mm_load_ps(z); + return z128; +} + +PX_FORCE_INLINE FloatV V4GetW(const Vec4V f) +{ + return _mm_shuffle_ps(f, f, _MM_SHUFFLE(3, 3, 3, 3)); +} + +PX_FORCE_INLINE FloatV V4GetX(const Vec4V f) +{ + return _mm_shuffle_ps(f, f, _MM_SHUFFLE(0, 0, 0, 0)); +} + +PX_FORCE_INLINE FloatV V4GetY(const Vec4V f) +{ + return _mm_shuffle_ps(f, f, _MM_SHUFFLE(1, 1, 1, 1)); +} + +PX_FORCE_INLINE FloatV V4GetZ(const Vec4V f) +{ + return _mm_shuffle_ps(f, f, _MM_SHUFFLE(2, 2, 2, 2)); +} + +PX_FORCE_INLINE Vec4V V4SetW(const Vec4V v, const FloatV f) +{ + ASSERT_ISVALIDFLOATV(f); + return V4Sel(BTTTF(), v, f); +} + +PX_FORCE_INLINE Vec4V V4ClearW(const Vec4V v) +{ + return _mm_and_ps(v, (VecI32V&)internalWindowsSimd::gMaskXYZ); +} + +PX_FORCE_INLINE Vec4V V4SetX(const Vec4V v, const FloatV f) +{ + ASSERT_ISVALIDFLOATV(f); + return V4Sel(BFTTT(), v, f); +} + +PX_FORCE_INLINE Vec4V V4SetY(const Vec4V v, const FloatV f) +{ + ASSERT_ISVALIDFLOATV(f); + return V4Sel(BTFTT(), v, f); +} + +PX_FORCE_INLINE Vec4V V4SetZ(const Vec4V v, const FloatV f) +{ + ASSERT_ISVALIDFLOATV(f); + return V4Sel(BTTFT(), v, f); +} + +PX_FORCE_INLINE Vec4V V4Zero() +{ + return _mm_setzero_ps(); +} + +PX_FORCE_INLINE Vec4V V4One() +{ + return V4Load(1.0f); +} + +PX_FORCE_INLINE Vec4V V4Eps() +{ + return V4Load(PX_EPS_REAL); +} + +PX_FORCE_INLINE Vec4V V4Neg(const Vec4V f) +{ + return _mm_sub_ps(_mm_setzero_ps(), f); +} + +PX_FORCE_INLINE Vec4V V4Add(const Vec4V a, const Vec4V b) +{ + return _mm_add_ps(a, b); +} + +PX_FORCE_INLINE Vec4V V4Sub(const Vec4V a, const Vec4V b) +{ + return _mm_sub_ps(a, b); +} + +PX_FORCE_INLINE Vec4V V4Scale(const Vec4V a, const FloatV b) +{ + return _mm_mul_ps(a, b); +} + +PX_FORCE_INLINE Vec4V V4Mul(const Vec4V a, const Vec4V b) +{ + return _mm_mul_ps(a, b); +} + +PX_FORCE_INLINE Vec4V V4ScaleInv(const Vec4V a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(b); + return _mm_div_ps(a, b); +} + +PX_FORCE_INLINE Vec4V V4Div(const Vec4V a, const Vec4V b) +{ + return _mm_div_ps(a, b); +} + +PX_FORCE_INLINE Vec4V V4ScaleInvFast(const Vec4V a, const FloatV b) +{ + ASSERT_ISVALIDFLOATV(b); + return _mm_mul_ps(a, _mm_rcp_ps(b)); +} + +PX_FORCE_INLINE Vec4V V4DivFast(const Vec4V a, const Vec4V b) +{ + return _mm_mul_ps(a, _mm_rcp_ps(b)); +} + +PX_FORCE_INLINE Vec4V V4Recip(const Vec4V a) +{ + return _mm_div_ps(V4One(), a); +} + +PX_FORCE_INLINE Vec4V V4RecipFast(const Vec4V a) +{ + return _mm_rcp_ps(a); +} + +PX_FORCE_INLINE Vec4V V4Rsqrt(const Vec4V a) +{ + return _mm_div_ps(V4One(), _mm_sqrt_ps(a)); +} + +PX_FORCE_INLINE Vec4V V4RsqrtFast(const Vec4V a) +{ + return _mm_rsqrt_ps(a); +} + +PX_FORCE_INLINE Vec4V V4Sqrt(const Vec4V a) +{ + return _mm_sqrt_ps(a); +} + +PX_FORCE_INLINE Vec4V V4ScaleAdd(const Vec4V a, const FloatV b, const Vec4V c) +{ + ASSERT_ISVALIDFLOATV(b); + return V4Add(V4Scale(a, b), c); +} + +PX_FORCE_INLINE Vec4V V4NegScaleSub(const Vec4V a, const FloatV b, const Vec4V c) +{ + ASSERT_ISVALIDFLOATV(b); + return V4Sub(c, V4Scale(a, b)); +} + +PX_FORCE_INLINE Vec4V V4MulAdd(const Vec4V a, const Vec4V b, const Vec4V c) +{ + return V4Add(V4Mul(a, b), c); +} + +PX_FORCE_INLINE Vec4V V4NegMulSub(const Vec4V a, const Vec4V b, const Vec4V c) +{ + return V4Sub(c, V4Mul(a, b)); +} + +PX_FORCE_INLINE Vec4V V4Abs(const Vec4V a) +{ + return V4Max(a, V4Neg(a)); +} + +PX_FORCE_INLINE FloatV V4SumElements(const Vec4V a) +{ + const Vec4V xy = V4UnpackXY(a, a); // x,x,y,y + const Vec4V zw = V4UnpackZW(a, a); // z,z,w,w + const Vec4V xz_yw = V4Add(xy, zw); // x+z,x+z,y+w,y+w + const FloatV xz = V4GetX(xz_yw); // x+z + const FloatV yw = V4GetZ(xz_yw); // y+w + return FAdd(xz, yw); // sum +} + +PX_FORCE_INLINE FloatV V4Dot(const Vec4V a, const Vec4V b) +{ + const __m128 dot1 = _mm_mul_ps(a, b); // x,y,z,w + const __m128 shuf1 = _mm_shuffle_ps(dot1, dot1, _MM_SHUFFLE(2, 1, 0, 3)); // w,x,y,z + const __m128 shuf2 = _mm_shuffle_ps(dot1, dot1, _MM_SHUFFLE(1, 0, 3, 2)); // z,w,x,y + const __m128 shuf3 = _mm_shuffle_ps(dot1, dot1, _MM_SHUFFLE(0, 3, 2, 1)); // y,z,w,x + return _mm_add_ps(_mm_add_ps(shuf2, shuf3), _mm_add_ps(dot1, shuf1)); + + // PT: this version has two less instructions but we should check its accuracy + // aw*bw | az*bz | ay*by | ax*bx + // const __m128 t0 = _mm_mul_ps(a, b); + // ay*by | ax*bx | aw*bw | az*bz + // const __m128 t1 = _mm_shuffle_ps(t0, t0, _MM_SHUFFLE(1,0,3,2)); + // ay*by + aw*bw | ax*bx + az*bz | aw*bw + ay*by | az*bz + ax*bx + // const __m128 t2 = _mm_add_ps(t0, t1); + // ax*bx + az*bz | ay*by + aw*bw | az*bz + ax*bx | aw*bw + ay*by + // const __m128 t3 = _mm_shuffle_ps(t2, t2, _MM_SHUFFLE(2,3,0,1)); + // ax*bx + az*bz + ay*by + aw*bw + // return _mm_add_ps(t3, t2); + // ay*by + aw*bw + ax*bx + az*bz + // az*bz + ax*bx + aw*bw + ay*by + // aw*bw + ay*by + az*bz + ax*bx +} + +PX_FORCE_INLINE FloatV V4Dot3(const Vec4V a, const Vec4V b) +{ + const __m128 dot1 = _mm_mul_ps(a, b); // aw*bw | az*bz | ay*by | ax*bx + const __m128 shuf1 = _mm_shuffle_ps(dot1, dot1, _MM_SHUFFLE(0, 0, 0, 0)); // ax*bx | ax*bx | ax*bx | ax*bx + const __m128 shuf2 = _mm_shuffle_ps(dot1, dot1, _MM_SHUFFLE(1, 1, 1, 1)); // ay*by | ay*by | ay*by | ay*by + const __m128 shuf3 = _mm_shuffle_ps(dot1, dot1, _MM_SHUFFLE(2, 2, 2, 2)); // az*bz | az*bz | az*bz | az*bz + return _mm_add_ps(_mm_add_ps(shuf1, shuf2), shuf3); // ax*bx + ay*by + az*bz in each component +} + +PX_FORCE_INLINE Vec4V V4Cross(const Vec4V a, const Vec4V b) +{ + const __m128 r1 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 1, 0, 2)); // z,x,y,w + const __m128 r2 = _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 0, 2, 1)); // y,z,x,w + const __m128 l1 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 0, 2, 1)); // y,z,x,w + const __m128 l2 = _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 1, 0, 2)); // z,x,y,w + return _mm_sub_ps(_mm_mul_ps(l1, l2), _mm_mul_ps(r1, r2)); +} + +PX_FORCE_INLINE FloatV V4Length(const Vec4V a) +{ + return _mm_sqrt_ps(V4Dot(a, a)); +} + +PX_FORCE_INLINE FloatV V4LengthSq(const Vec4V a) +{ + return V4Dot(a, a); +} + +PX_FORCE_INLINE Vec4V V4Normalize(const Vec4V a) +{ + ASSERT_ISFINITELENGTH(a); + return V4ScaleInv(a, _mm_sqrt_ps(V4Dot(a, a))); +} + +PX_FORCE_INLINE Vec4V V4NormalizeFast(const Vec4V a) +{ + ASSERT_ISFINITELENGTH(a); + return V4ScaleInvFast(a, _mm_sqrt_ps(V4Dot(a, a))); +} + +PX_FORCE_INLINE Vec4V V4NormalizeSafe(const Vec4V a, const Vec4V unsafeReturnValue) +{ + const __m128 eps = V3Eps(); + const __m128 length = V4Length(a); + const __m128 isGreaterThanZero = V4IsGrtr(length, eps); + return V4Sel(isGreaterThanZero, V4ScaleInv(a, length), unsafeReturnValue); +} + +PX_FORCE_INLINE Vec4V V4Sel(const BoolV c, const Vec4V a, const Vec4V b) +{ + return _mm_or_ps(_mm_andnot_ps(c, b), _mm_and_ps(c, a)); +} + +PX_FORCE_INLINE BoolV V4IsGrtr(const Vec4V a, const Vec4V b) +{ + return _mm_cmpgt_ps(a, b); +} + +PX_FORCE_INLINE BoolV V4IsGrtrOrEq(const Vec4V a, const Vec4V b) +{ + return _mm_cmpge_ps(a, b); +} + +PX_FORCE_INLINE BoolV V4IsEq(const Vec4V a, const Vec4V b) +{ + return _mm_cmpeq_ps(a, b); +} + +PX_FORCE_INLINE BoolV V4IsEqU32(const VecU32V a, const VecU32V b) +{ + return internalWindowsSimd::m128_I2F( + _mm_cmpeq_epi32(internalWindowsSimd::m128_F2I(a), internalWindowsSimd::m128_F2I(b))); +} + +PX_FORCE_INLINE Vec4V V4Max(const Vec4V a, const Vec4V b) +{ + return _mm_max_ps(a, b); +} + +PX_FORCE_INLINE Vec4V V4Min(const Vec4V a, const Vec4V b) +{ + return _mm_min_ps(a, b); +} + +PX_FORCE_INLINE FloatV V4ExtractMax(const Vec4V a) +{ + const __m128 shuf1 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 1, 0, 3)); + const __m128 shuf2 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 0, 3, 2)); + const __m128 shuf3 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 3, 2, 1)); + + return _mm_max_ps(_mm_max_ps(a, shuf1), _mm_max_ps(shuf2, shuf3)); +} + +PX_FORCE_INLINE FloatV V4ExtractMin(const Vec4V a) +{ + const __m128 shuf1 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 1, 0, 3)); + const __m128 shuf2 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 0, 3, 2)); + const __m128 shuf3 = _mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 3, 2, 1)); + + return _mm_min_ps(_mm_min_ps(a, shuf1), _mm_min_ps(shuf2, shuf3)); +} + +PX_FORCE_INLINE Vec4V V4Clamp(const Vec4V a, const Vec4V minV, const Vec4V maxV) +{ + return V4Max(V4Min(a, maxV), minV); +} + +PX_FORCE_INLINE PxU32 V4AllGrtr(const Vec4V a, const Vec4V b) +{ + return internalWindowsSimd::BAllTrue4_R(V4IsGrtr(a, b)); +} + +PX_FORCE_INLINE PxU32 V4AllGrtrOrEq(const Vec4V a, const Vec4V b) +{ + return internalWindowsSimd::BAllTrue4_R(V4IsGrtrOrEq(a, b)); +} + +PX_FORCE_INLINE PxU32 V4AllGrtrOrEq3(const Vec4V a, const Vec4V b) +{ + return internalWindowsSimd::BAllTrue3_R(V4IsGrtrOrEq(a, b)); +} + +PX_FORCE_INLINE PxU32 V4AllEq(const Vec4V a, const Vec4V b) +{ + return internalWindowsSimd::BAllTrue4_R(V4IsEq(a, b)); +} + +PX_FORCE_INLINE PxU32 V4AnyGrtr3(const Vec4V a, const Vec4V b) +{ + return internalWindowsSimd::BAnyTrue3_R(V4IsGrtr(a, b)); +} + +PX_FORCE_INLINE Vec4V V4Round(const Vec4V a) +{ + // return _mm_round_ps(a, 0x0); + const Vec4V half = V4Load(0.5f); + const __m128 signBit = _mm_cvtepi32_ps(_mm_srli_epi32(_mm_cvtps_epi32(a), 31)); + const Vec4V aRound = V4Sub(V4Add(a, half), signBit); + const __m128i tmp = _mm_cvttps_epi32(aRound); + return _mm_cvtepi32_ps(tmp); +} + +PX_FORCE_INLINE Vec4V V4Sin(const Vec4V a) +{ + const Vec4V recipTwoPi = V4LoadA(g_PXReciprocalTwoPi.f); + const Vec4V twoPi = V4LoadA(g_PXTwoPi.f); + const Vec4V tmp = V4Mul(a, recipTwoPi); + const Vec4V b = V4Round(tmp); + const Vec4V V1 = V4NegMulSub(twoPi, b, a); + + // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - + // V^15 / 15! + V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI) + const Vec4V V2 = V4Mul(V1, V1); + const Vec4V V3 = V4Mul(V2, V1); + const Vec4V V5 = V4Mul(V3, V2); + const Vec4V V7 = V4Mul(V5, V2); + const Vec4V V9 = V4Mul(V7, V2); + const Vec4V V11 = V4Mul(V9, V2); + const Vec4V V13 = V4Mul(V11, V2); + const Vec4V V15 = V4Mul(V13, V2); + const Vec4V V17 = V4Mul(V15, V2); + const Vec4V V19 = V4Mul(V17, V2); + const Vec4V V21 = V4Mul(V19, V2); + const Vec4V V23 = V4Mul(V21, V2); + + const Vec4V sinCoefficients0 = V4LoadA(g_PXSinCoefficients0.f); + const Vec4V sinCoefficients1 = V4LoadA(g_PXSinCoefficients1.f); + const Vec4V sinCoefficients2 = V4LoadA(g_PXSinCoefficients2.f); + + const FloatV S1 = V4GetY(sinCoefficients0); + const FloatV S2 = V4GetZ(sinCoefficients0); + const FloatV S3 = V4GetW(sinCoefficients0); + const FloatV S4 = V4GetX(sinCoefficients1); + const FloatV S5 = V4GetY(sinCoefficients1); + const FloatV S6 = V4GetZ(sinCoefficients1); + const FloatV S7 = V4GetW(sinCoefficients1); + const FloatV S8 = V4GetX(sinCoefficients2); + const FloatV S9 = V4GetY(sinCoefficients2); + const FloatV S10 = V4GetZ(sinCoefficients2); + const FloatV S11 = V4GetW(sinCoefficients2); + + Vec4V Result; + Result = V4MulAdd(S1, V3, V1); + Result = V4MulAdd(S2, V5, Result); + Result = V4MulAdd(S3, V7, Result); + Result = V4MulAdd(S4, V9, Result); + Result = V4MulAdd(S5, V11, Result); + Result = V4MulAdd(S6, V13, Result); + Result = V4MulAdd(S7, V15, Result); + Result = V4MulAdd(S8, V17, Result); + Result = V4MulAdd(S9, V19, Result); + Result = V4MulAdd(S10, V21, Result); + Result = V4MulAdd(S11, V23, Result); + + return Result; +} + +PX_FORCE_INLINE Vec4V V4Cos(const Vec4V a) +{ + const Vec4V recipTwoPi = V4LoadA(g_PXReciprocalTwoPi.f); + const FloatV twoPi = V4LoadA(g_PXTwoPi.f); + const Vec4V tmp = V4Mul(a, recipTwoPi); + const Vec4V b = V4Round(tmp); + const Vec4V V1 = V4NegMulSub(twoPi, b, a); + + // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + V^12 / 12! - + // V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI) + const Vec4V V2 = V4Mul(V1, V1); + const Vec4V V4 = V4Mul(V2, V2); + const Vec4V V6 = V4Mul(V4, V2); + const Vec4V V8 = V4Mul(V4, V4); + const Vec4V V10 = V4Mul(V6, V4); + const Vec4V V12 = V4Mul(V6, V6); + const Vec4V V14 = V4Mul(V8, V6); + const Vec4V V16 = V4Mul(V8, V8); + const Vec4V V18 = V4Mul(V10, V8); + const Vec4V V20 = V4Mul(V10, V10); + const Vec4V V22 = V4Mul(V12, V10); + + const Vec4V cosCoefficients0 = V4LoadA(g_PXCosCoefficients0.f); + const Vec4V cosCoefficients1 = V4LoadA(g_PXCosCoefficients1.f); + const Vec4V cosCoefficients2 = V4LoadA(g_PXCosCoefficients2.f); + + const FloatV C1 = V4GetY(cosCoefficients0); + const FloatV C2 = V4GetZ(cosCoefficients0); + const FloatV C3 = V4GetW(cosCoefficients0); + const FloatV C4 = V4GetX(cosCoefficients1); + const FloatV C5 = V4GetY(cosCoefficients1); + const FloatV C6 = V4GetZ(cosCoefficients1); + const FloatV C7 = V4GetW(cosCoefficients1); + const FloatV C8 = V4GetX(cosCoefficients2); + const FloatV C9 = V4GetY(cosCoefficients2); + const FloatV C10 = V4GetZ(cosCoefficients2); + const FloatV C11 = V4GetW(cosCoefficients2); + + Vec4V Result; + Result = V4MulAdd(C1, V2, V4One()); + Result = V4MulAdd(C2, V4, Result); + Result = V4MulAdd(C3, V6, Result); + Result = V4MulAdd(C4, V8, Result); + Result = V4MulAdd(C5, V10, Result); + Result = V4MulAdd(C6, V12, Result); + Result = V4MulAdd(C7, V14, Result); + Result = V4MulAdd(C8, V16, Result); + Result = V4MulAdd(C9, V18, Result); + Result = V4MulAdd(C10, V20, Result); + Result = V4MulAdd(C11, V22, Result); + + return Result; +} + +PX_FORCE_INLINE void V4Transpose(Vec4V& col0, Vec4V& col1, Vec4V& col2, Vec4V& col3) +{ + Vec4V tmp0 = _mm_unpacklo_ps(col0, col1); + Vec4V tmp2 = _mm_unpacklo_ps(col2, col3); + Vec4V tmp1 = _mm_unpackhi_ps(col0, col1); + Vec4V tmp3 = _mm_unpackhi_ps(col2, col3); + col0 = _mm_movelh_ps(tmp0, tmp2); + col1 = _mm_movehl_ps(tmp2, tmp0); + col2 = _mm_movelh_ps(tmp1, tmp3); + col3 = _mm_movehl_ps(tmp3, tmp1); +} + +////////////////////////////////// +// BoolV +////////////////////////////////// + +PX_FORCE_INLINE BoolV BFFFF() +{ + return _mm_setzero_ps(); +} + +PX_FORCE_INLINE BoolV BFFFT() +{ + /*const PX_ALIGN(16, PxU32 f[4])={0,0,0,0xFFFFFFFF}; + const __m128 ffft=_mm_load_ps((float*)&f); + return ffft;*/ + return internalWindowsSimd::m128_I2F(_mm_set_epi32(-1, 0, 0, 0)); +} + +PX_FORCE_INLINE BoolV BFFTF() +{ + /*const PX_ALIGN(16, PxU32 f[4])={0,0,0xFFFFFFFF,0}; + const __m128 fftf=_mm_load_ps((float*)&f); + return fftf;*/ + return internalWindowsSimd::m128_I2F(_mm_set_epi32(0, -1, 0, 0)); +} + +PX_FORCE_INLINE BoolV BFFTT() +{ + /*const PX_ALIGN(16, PxU32 f[4])={0,0,0xFFFFFFFF,0xFFFFFFFF}; + const __m128 fftt=_mm_load_ps((float*)&f); + return fftt;*/ + return internalWindowsSimd::m128_I2F(_mm_set_epi32(-1, -1, 0, 0)); +} + +PX_FORCE_INLINE BoolV BFTFF() +{ + /*const PX_ALIGN(16, PxU32 f[4])={0,0xFFFFFFFF,0,0}; + const __m128 ftff=_mm_load_ps((float*)&f); + return ftff;*/ + return internalWindowsSimd::m128_I2F(_mm_set_epi32(0, 0, -1, 0)); +} + +PX_FORCE_INLINE BoolV BFTFT() +{ + /*const PX_ALIGN(16, PxU32 f[4])={0,0xFFFFFFFF,0,0xFFFFFFFF}; + const __m128 ftft=_mm_load_ps((float*)&f); + return ftft;*/ + return internalWindowsSimd::m128_I2F(_mm_set_epi32(-1, 0, -1, 0)); +} + +PX_FORCE_INLINE BoolV BFTTF() +{ + /*const PX_ALIGN(16, PxU32 f[4])={0,0xFFFFFFFF,0xFFFFFFFF,0}; + const __m128 fttf=_mm_load_ps((float*)&f); + return fttf;*/ + return internalWindowsSimd::m128_I2F(_mm_set_epi32(0, -1, -1, 0)); +} + +PX_FORCE_INLINE BoolV BFTTT() +{ + /*const PX_ALIGN(16, PxU32 f[4])={0,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF}; + const __m128 fttt=_mm_load_ps((float*)&f); + return fttt;*/ + return internalWindowsSimd::m128_I2F(_mm_set_epi32(-1, -1, -1, 0)); +} + +PX_FORCE_INLINE BoolV BTFFF() +{ + // const PX_ALIGN(16, PxU32 f[4])={0xFFFFFFFF,0,0,0}; + // const __m128 tfff=_mm_load_ps((float*)&f); + // return tfff; + return internalWindowsSimd::m128_I2F(_mm_set_epi32(0, 0, 0, -1)); +} + +PX_FORCE_INLINE BoolV BTFFT() +{ + /*const PX_ALIGN(16, PxU32 f[4])={0xFFFFFFFF,0,0,0xFFFFFFFF}; + const __m128 tfft=_mm_load_ps((float*)&f); + return tfft;*/ + return internalWindowsSimd::m128_I2F(_mm_set_epi32(-1, 0, 0, -1)); +} + +PX_FORCE_INLINE BoolV BTFTF() +{ + /*const PX_ALIGN(16, PxU32 f[4])={0xFFFFFFFF,0,0xFFFFFFFF,0}; + const __m128 tftf=_mm_load_ps((float*)&f); + return tftf;*/ + return internalWindowsSimd::m128_I2F(_mm_set_epi32(0, -1, 0, -1)); +} + +PX_FORCE_INLINE BoolV BTFTT() +{ + /*const PX_ALIGN(16, PxU32 f[4])={0xFFFFFFFF,0,0xFFFFFFFF,0xFFFFFFFF}; + const __m128 tftt=_mm_load_ps((float*)&f); + return tftt;*/ + return internalWindowsSimd::m128_I2F(_mm_set_epi32(-1, -1, 0, -1)); +} + +PX_FORCE_INLINE BoolV BTTFF() +{ + /*const PX_ALIGN(16, PxU32 f[4])={0xFFFFFFFF,0xFFFFFFFF,0,0}; + const __m128 ttff=_mm_load_ps((float*)&f); + return ttff;*/ + return internalWindowsSimd::m128_I2F(_mm_set_epi32(0, 0, -1, -1)); +} + +PX_FORCE_INLINE BoolV BTTFT() +{ + /*const PX_ALIGN(16, PxU32 f[4])={0xFFFFFFFF,0xFFFFFFFF,0,0xFFFFFFFF}; + const __m128 ttft=_mm_load_ps((float*)&f); + return ttft;*/ + return internalWindowsSimd::m128_I2F(_mm_set_epi32(-1, 0, -1, -1)); +} + +PX_FORCE_INLINE BoolV BTTTF() +{ + /*const PX_ALIGN(16, PxU32 f[4])={0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0}; + const __m128 tttf=_mm_load_ps((float*)&f); + return tttf;*/ + return internalWindowsSimd::m128_I2F(_mm_set_epi32(0, -1, -1, -1)); +} + +PX_FORCE_INLINE BoolV BTTTT() +{ + /*const PX_ALIGN(16, PxU32 f[4])={0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF}; + const __m128 tttt=_mm_load_ps((float*)&f); + return tttt;*/ + return internalWindowsSimd::m128_I2F(_mm_set_epi32(-1, -1, -1, -1)); +} + +PX_FORCE_INLINE BoolV BXMask() +{ + /*const PX_ALIGN(16, PxU32 f[4])={0xFFFFFFFF,0,0,0}; + const __m128 tfff=_mm_load_ps((float*)&f); + return tfff;*/ + return internalWindowsSimd::m128_I2F(_mm_set_epi32(0, 0, 0, -1)); +} + +PX_FORCE_INLINE BoolV BYMask() +{ + /*const PX_ALIGN(16, PxU32 f[4])={0,0xFFFFFFFF,0,0}; + const __m128 ftff=_mm_load_ps((float*)&f); + return ftff;*/ + return internalWindowsSimd::m128_I2F(_mm_set_epi32(0, 0, -1, 0)); +} + +PX_FORCE_INLINE BoolV BZMask() +{ + /*const PX_ALIGN(16, PxU32 f[4])={0,0,0xFFFFFFFF,0}; + const __m128 fftf=_mm_load_ps((float*)&f); + return fftf;*/ + return internalWindowsSimd::m128_I2F(_mm_set_epi32(0, -1, 0, 0)); +} + +PX_FORCE_INLINE BoolV BWMask() +{ + /*const PX_ALIGN(16, PxU32 f[4])={0,0,0,0xFFFFFFFF}; + const __m128 ffft=_mm_load_ps((float*)&f); + return ffft;*/ + return internalWindowsSimd::m128_I2F(_mm_set_epi32(-1, 0, 0, 0)); +} + +PX_FORCE_INLINE BoolV BGetX(const BoolV f) +{ + return _mm_shuffle_ps(f, f, _MM_SHUFFLE(0, 0, 0, 0)); +} + +PX_FORCE_INLINE BoolV BGetY(const BoolV f) +{ + return _mm_shuffle_ps(f, f, _MM_SHUFFLE(1, 1, 1, 1)); +} + +PX_FORCE_INLINE BoolV BGetZ(const BoolV f) +{ + return _mm_shuffle_ps(f, f, _MM_SHUFFLE(2, 2, 2, 2)); +} + +PX_FORCE_INLINE BoolV BGetW(const BoolV f) +{ + return _mm_shuffle_ps(f, f, _MM_SHUFFLE(3, 3, 3, 3)); +} + +PX_FORCE_INLINE BoolV BSetX(const BoolV v, const BoolV f) +{ + return V4Sel(BFTTT(), v, f); +} + +PX_FORCE_INLINE BoolV BSetY(const BoolV v, const BoolV f) +{ + return V4Sel(BTFTT(), v, f); +} + +PX_FORCE_INLINE BoolV BSetZ(const BoolV v, const BoolV f) +{ + return V4Sel(BTTFT(), v, f); +} + +PX_FORCE_INLINE BoolV BSetW(const BoolV v, const BoolV f) +{ + return V4Sel(BTTTF(), v, f); +} + +template <int index> +BoolV BSplatElement(BoolV a) +{ + return internalWindowsSimd::m128_I2F( + _mm_shuffle_epi32(internalWindowsSimd::m128_F2I(a), _MM_SHUFFLE(index, index, index, index))); +} + +PX_FORCE_INLINE BoolV BAnd(const BoolV a, const BoolV b) +{ + return _mm_and_ps(a, b); +} + +PX_FORCE_INLINE BoolV BNot(const BoolV a) +{ + const BoolV bAllTrue(BTTTT()); + return _mm_xor_ps(a, bAllTrue); +} + +PX_FORCE_INLINE BoolV BAndNot(const BoolV a, const BoolV b) +{ + return _mm_andnot_ps(b, a); +} + +PX_FORCE_INLINE BoolV BOr(const BoolV a, const BoolV b) +{ + return _mm_or_ps(a, b); +} + +PX_FORCE_INLINE BoolV BAllTrue4(const BoolV a) +{ + const BoolV bTmp = + _mm_and_ps(_mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 1, 0, 1)), _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 3, 2, 3))); + return _mm_and_ps(_mm_shuffle_ps(bTmp, bTmp, _MM_SHUFFLE(0, 0, 0, 0)), + _mm_shuffle_ps(bTmp, bTmp, _MM_SHUFFLE(1, 1, 1, 1))); +} + +PX_FORCE_INLINE BoolV BAnyTrue4(const BoolV a) +{ + const BoolV bTmp = + _mm_or_ps(_mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 1, 0, 1)), _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 3, 2, 3))); + return _mm_or_ps(_mm_shuffle_ps(bTmp, bTmp, _MM_SHUFFLE(0, 0, 0, 0)), + _mm_shuffle_ps(bTmp, bTmp, _MM_SHUFFLE(1, 1, 1, 1))); +} + +PX_FORCE_INLINE BoolV BAllTrue3(const BoolV a) +{ + const BoolV bTmp = + _mm_and_ps(_mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 1, 0, 1)), _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 2, 2, 2))); + return _mm_and_ps(_mm_shuffle_ps(bTmp, bTmp, _MM_SHUFFLE(0, 0, 0, 0)), + _mm_shuffle_ps(bTmp, bTmp, _MM_SHUFFLE(1, 1, 1, 1))); +} + +PX_FORCE_INLINE BoolV BAnyTrue3(const BoolV a) +{ + const BoolV bTmp = + _mm_or_ps(_mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 1, 0, 1)), _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 2, 2, 2))); + return _mm_or_ps(_mm_shuffle_ps(bTmp, bTmp, _MM_SHUFFLE(0, 0, 0, 0)), + _mm_shuffle_ps(bTmp, bTmp, _MM_SHUFFLE(1, 1, 1, 1))); +} + +PX_FORCE_INLINE PxU32 BAllEq(const BoolV a, const BoolV b) +{ + const BoolV bTest = internalWindowsSimd::m128_I2F( + _mm_cmpeq_epi32(internalWindowsSimd::m128_F2I(a), internalWindowsSimd::m128_F2I(b))); + return internalWindowsSimd::BAllTrue4_R(bTest); +} + +PX_FORCE_INLINE PxU32 BAllEqTTTT(const BoolV a) +{ + return PxU32(_mm_movemask_ps(a)==15); +} + +PX_FORCE_INLINE PxU32 BAllEqFFFF(const BoolV a) +{ + return PxU32(_mm_movemask_ps(a)==0); +} + +PX_FORCE_INLINE PxU32 BGetBitMask(const BoolV a) +{ + return PxU32(_mm_movemask_ps(a)); +} + +////////////////////////////////// +// MAT33V +////////////////////////////////// + +PX_FORCE_INLINE Vec3V M33MulV3(const Mat33V& a, const Vec3V b) +{ + const FloatV x = V3GetX(b); + const FloatV y = V3GetY(b); + const FloatV z = V3GetZ(b); + const Vec3V v0 = V3Scale(a.col0, x); + const Vec3V v1 = V3Scale(a.col1, y); + const Vec3V v2 = V3Scale(a.col2, z); + const Vec3V v0PlusV1 = V3Add(v0, v1); + return V3Add(v0PlusV1, v2); +} + +PX_FORCE_INLINE Vec3V M33TrnspsMulV3(const Mat33V& a, const Vec3V b) +{ + Vec3V v0 = V3Mul(a.col0, b); + Vec3V v1 = V3Mul(a.col1, b); + Vec3V v2 = V3Mul(a.col2, b); + V3Transpose(v0, v1, v2); + return V3Add(V3Add(v0, v1), v2); +} + +PX_FORCE_INLINE Vec3V M33MulV3AddV3(const Mat33V& A, const Vec3V b, const Vec3V c) +{ + const FloatV x = V3GetX(b); + const FloatV y = V3GetY(b); + const FloatV z = V3GetZ(b); + Vec3V result = V3ScaleAdd(A.col0, x, c); + result = V3ScaleAdd(A.col1, y, result); + return V3ScaleAdd(A.col2, z, result); +} + +PX_FORCE_INLINE Mat33V M33MulM33(const Mat33V& a, const Mat33V& b) +{ + return Mat33V(M33MulV3(a, b.col0), M33MulV3(a, b.col1), M33MulV3(a, b.col2)); +} + +PX_FORCE_INLINE Mat33V M33Add(const Mat33V& a, const Mat33V& b) +{ + return Mat33V(V3Add(a.col0, b.col0), V3Add(a.col1, b.col1), V3Add(a.col2, b.col2)); +} + +PX_FORCE_INLINE Mat33V M33Scale(const Mat33V& a, const FloatV& b) +{ + return Mat33V(V3Scale(a.col0, b), V3Scale(a.col1, b), V3Scale(a.col2, b)); +} + +PX_FORCE_INLINE Mat33V M33Sub(const Mat33V& a, const Mat33V& b) +{ + return Mat33V(V3Sub(a.col0, b.col0), V3Sub(a.col1, b.col1), V3Sub(a.col2, b.col2)); +} + +PX_FORCE_INLINE Mat33V M33Neg(const Mat33V& a) +{ + return Mat33V(V3Neg(a.col0), V3Neg(a.col1), V3Neg(a.col2)); +} + +PX_FORCE_INLINE Mat33V M33Abs(const Mat33V& a) +{ + return Mat33V(V3Abs(a.col0), V3Abs(a.col1), V3Abs(a.col2)); +} + +PX_FORCE_INLINE Mat33V M33Inverse(const Mat33V& a) +{ + const BoolV tfft = BTFFT(); + const BoolV tttf = BTTTF(); + const FloatV zero = V3Zero(); + const Vec3V cross01 = V3Cross(a.col0, a.col1); + const Vec3V cross12 = V3Cross(a.col1, a.col2); + const Vec3V cross20 = V3Cross(a.col2, a.col0); + const FloatV dot = V3Dot(cross01, a.col2); + const FloatV invDet = _mm_rcp_ps(dot); + const Vec3V mergeh = _mm_unpacklo_ps(cross12, cross01); + const Vec3V mergel = _mm_unpackhi_ps(cross12, cross01); + Vec3V colInv0 = _mm_unpacklo_ps(mergeh, cross20); + colInv0 = _mm_or_ps(_mm_andnot_ps(tttf, zero), _mm_and_ps(tttf, colInv0)); + const Vec3V zppd = _mm_shuffle_ps(mergeh, cross20, _MM_SHUFFLE(3, 0, 0, 2)); + const Vec3V pbwp = _mm_shuffle_ps(cross20, mergeh, _MM_SHUFFLE(3, 3, 1, 0)); + const Vec3V colInv1 = _mm_or_ps(_mm_andnot_ps(BTFFT(), pbwp), _mm_and_ps(BTFFT(), zppd)); + const Vec3V xppd = _mm_shuffle_ps(mergel, cross20, _MM_SHUFFLE(3, 0, 0, 0)); + const Vec3V pcyp = _mm_shuffle_ps(cross20, mergel, _MM_SHUFFLE(3, 1, 2, 0)); + const Vec3V colInv2 = _mm_or_ps(_mm_andnot_ps(tfft, pcyp), _mm_and_ps(tfft, xppd)); + + return Mat33V(_mm_mul_ps(colInv0, invDet), _mm_mul_ps(colInv1, invDet), _mm_mul_ps(colInv2, invDet)); +} + +PX_FORCE_INLINE Mat33V M33Trnsps(const Mat33V& a) +{ + Vec3V col0 = a.col0, col1 = a.col1, col2 = a.col2; + V3Transpose(col0, col1, col2); + return Mat33V(col0, col1, col2); +} + +PX_FORCE_INLINE Mat33V M33Identity() +{ + return Mat33V(V3UnitX(), V3UnitY(), V3UnitZ()); +} + +PX_FORCE_INLINE Mat33V M33Diagonal(const Vec3VArg d) +{ + const FloatV x = V3Mul(V3UnitX(), d); + const FloatV y = V3Mul(V3UnitY(), d); + const FloatV z = V3Mul(V3UnitZ(), d); + return Mat33V(x, y, z); +} + +////////////////////////////////// +// MAT34V +////////////////////////////////// + +PX_FORCE_INLINE Vec3V M34MulV3(const Mat34V& a, const Vec3V b) +{ + const FloatV x = V3GetX(b); + const FloatV y = V3GetY(b); + const FloatV z = V3GetZ(b); + const Vec3V v0 = V3Scale(a.col0, x); + const Vec3V v1 = V3Scale(a.col1, y); + const Vec3V v2 = V3Scale(a.col2, z); + const Vec3V v0PlusV1 = V3Add(v0, v1); + const Vec3V v0PlusV1Plusv2 = V3Add(v0PlusV1, v2); + return V3Add(v0PlusV1Plusv2, a.col3); +} + +PX_FORCE_INLINE Vec3V M34Mul33V3(const Mat34V& a, const Vec3V b) +{ + const FloatV x = V3GetX(b); + const FloatV y = V3GetY(b); + const FloatV z = V3GetZ(b); + const Vec3V v0 = V3Scale(a.col0, x); + const Vec3V v1 = V3Scale(a.col1, y); + const Vec3V v2 = V3Scale(a.col2, z); + const Vec3V v0PlusV1 = V3Add(v0, v1); + return V3Add(v0PlusV1, v2); +} + +PX_FORCE_INLINE Vec3V M34TrnspsMul33V3(const Mat34V& a, const Vec3V b) +{ + Vec3V v0 = V3Mul(a.col0, b); + Vec3V v1 = V3Mul(a.col1, b); + Vec3V v2 = V3Mul(a.col2, b); + V3Transpose(v0, v1, v2); + return V3Add(V3Add(v0, v1), v2); +} + +PX_FORCE_INLINE Mat34V M34MulM34(const Mat34V& a, const Mat34V& b) +{ + return Mat34V(M34Mul33V3(a, b.col0), M34Mul33V3(a, b.col1), M34Mul33V3(a, b.col2), M34MulV3(a, b.col3)); +} + +PX_FORCE_INLINE Mat33V M34MulM33(const Mat34V& a, const Mat33V& b) +{ + return Mat33V(M34Mul33V3(a, b.col0), M34Mul33V3(a, b.col1), M34Mul33V3(a, b.col2)); +} + +PX_FORCE_INLINE Mat33V M34Mul33MM34(const Mat34V& a, const Mat34V& b) +{ + return Mat33V(M34Mul33V3(a, b.col0), M34Mul33V3(a, b.col1), M34Mul33V3(a, b.col2)); +} + +PX_FORCE_INLINE Mat34V M34Add(const Mat34V& a, const Mat34V& b) +{ + return Mat34V(V3Add(a.col0, b.col0), V3Add(a.col1, b.col1), V3Add(a.col2, b.col2), V3Add(a.col3, b.col3)); +} + +PX_FORCE_INLINE Mat34V M34Inverse(const Mat34V& a) +{ + Mat34V aInv; + const BoolV tfft = BTFFT(); + const BoolV tttf = BTTTF(); + const FloatV zero = V3Zero(); + const Vec3V cross01 = V3Cross(a.col0, a.col1); + const Vec3V cross12 = V3Cross(a.col1, a.col2); + const Vec3V cross20 = V3Cross(a.col2, a.col0); + const FloatV dot = V3Dot(cross01, a.col2); + const FloatV invDet = _mm_rcp_ps(dot); + const Vec3V mergeh = _mm_unpacklo_ps(cross12, cross01); + const Vec3V mergel = _mm_unpackhi_ps(cross12, cross01); + Vec3V colInv0 = _mm_unpacklo_ps(mergeh, cross20); + colInv0 = _mm_or_ps(_mm_andnot_ps(tttf, zero), _mm_and_ps(tttf, colInv0)); + const Vec3V zppd = _mm_shuffle_ps(mergeh, cross20, _MM_SHUFFLE(3, 0, 0, 2)); + const Vec3V pbwp = _mm_shuffle_ps(cross20, mergeh, _MM_SHUFFLE(3, 3, 1, 0)); + const Vec3V colInv1 = _mm_or_ps(_mm_andnot_ps(BTFFT(), pbwp), _mm_and_ps(BTFFT(), zppd)); + const Vec3V xppd = _mm_shuffle_ps(mergel, cross20, _MM_SHUFFLE(3, 0, 0, 0)); + const Vec3V pcyp = _mm_shuffle_ps(cross20, mergel, _MM_SHUFFLE(3, 1, 2, 0)); + const Vec3V colInv2 = _mm_or_ps(_mm_andnot_ps(tfft, pcyp), _mm_and_ps(tfft, xppd)); + aInv.col0 = _mm_mul_ps(colInv0, invDet); + aInv.col1 = _mm_mul_ps(colInv1, invDet); + aInv.col2 = _mm_mul_ps(colInv2, invDet); + aInv.col3 = M34Mul33V3(aInv, V3Neg(a.col3)); + return aInv; +} + +PX_FORCE_INLINE Mat33V M34Trnsps33(const Mat34V& a) +{ + Vec3V col0 = a.col0, col1 = a.col1, col2 = a.col2; + V3Transpose(col0, col1, col2); + return Mat33V(col0, col1, col2); +} + +////////////////////////////////// +// MAT44V +////////////////////////////////// + +PX_FORCE_INLINE Vec4V M44MulV4(const Mat44V& a, const Vec4V b) +{ + const FloatV x = V4GetX(b); + const FloatV y = V4GetY(b); + const FloatV z = V4GetZ(b); + const FloatV w = V4GetW(b); + + const Vec4V v0 = V4Scale(a.col0, x); + const Vec4V v1 = V4Scale(a.col1, y); + const Vec4V v2 = V4Scale(a.col2, z); + const Vec4V v3 = V4Scale(a.col3, w); + const Vec4V v0PlusV1 = V4Add(v0, v1); + const Vec4V v0PlusV1Plusv2 = V4Add(v0PlusV1, v2); + return V4Add(v0PlusV1Plusv2, v3); +} + +PX_FORCE_INLINE Vec4V M44TrnspsMulV4(const Mat44V& a, const Vec4V b) +{ + Vec4V v0 = V4Mul(a.col0, b); + Vec4V v1 = V4Mul(a.col1, b); + Vec4V v2 = V4Mul(a.col2, b); + Vec4V v3 = V4Mul(a.col3, b); + V4Transpose(v0, v1, v2, v3); + return V4Add(V4Add(v0, v1), V4Add(v2, v3)); +} + +PX_FORCE_INLINE Mat44V M44MulM44(const Mat44V& a, const Mat44V& b) +{ + return Mat44V(M44MulV4(a, b.col0), M44MulV4(a, b.col1), M44MulV4(a, b.col2), M44MulV4(a, b.col3)); +} + +PX_FORCE_INLINE Mat44V M44Add(const Mat44V& a, const Mat44V& b) +{ + return Mat44V(V4Add(a.col0, b.col0), V4Add(a.col1, b.col1), V4Add(a.col2, b.col2), V4Add(a.col3, b.col3)); +} + +PX_FORCE_INLINE Mat44V M44Trnsps(const Mat44V& a) +{ + Vec4V col0 = a.col0, col1 = a.col1, col2 = a.col2, col3 = a.col3; + V4Transpose(col0, col1, col2, col3); + return Mat44V(col0, col1, col2, col3); +} + +PX_FORCE_INLINE Mat44V M44Inverse(const Mat44V& a) +{ + __m128 minor0, minor1, minor2, minor3; + __m128 row0, row1, row2, row3; + __m128 det, tmp1; + + tmp1 = V4Zero(); + row1 = V4Zero(); + row3 = V4Zero(); + + row0 = a.col0; + row1 = _mm_shuffle_ps(a.col1, a.col1, _MM_SHUFFLE(1, 0, 3, 2)); + row2 = a.col2; + row3 = _mm_shuffle_ps(a.col3, a.col3, _MM_SHUFFLE(1, 0, 3, 2)); + + tmp1 = _mm_mul_ps(row2, row3); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1); + minor0 = _mm_mul_ps(row1, tmp1); + minor1 = _mm_mul_ps(row0, tmp1); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E); + minor0 = _mm_sub_ps(_mm_mul_ps(row1, tmp1), minor0); + minor1 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor1); + minor1 = _mm_shuffle_ps(minor1, minor1, 0x4E); + + tmp1 = _mm_mul_ps(row1, row2); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1); + minor0 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor0); + minor3 = _mm_mul_ps(row0, tmp1); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E); + minor0 = _mm_sub_ps(minor0, _mm_mul_ps(row3, tmp1)); + minor3 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor3); + minor3 = _mm_shuffle_ps(minor3, minor3, 0x4E); + + tmp1 = _mm_mul_ps(_mm_shuffle_ps(row1, row1, 0x4E), row3); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1); + row2 = _mm_shuffle_ps(row2, row2, 0x4E); + minor0 = _mm_add_ps(_mm_mul_ps(row2, tmp1), minor0); + minor2 = _mm_mul_ps(row0, tmp1); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E); + minor0 = _mm_sub_ps(minor0, _mm_mul_ps(row2, tmp1)); + minor2 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor2); + minor2 = _mm_shuffle_ps(minor2, minor2, 0x4E); + + tmp1 = _mm_mul_ps(row0, row1); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1); + minor2 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor2); + minor3 = _mm_sub_ps(_mm_mul_ps(row2, tmp1), minor3); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E); + minor2 = _mm_sub_ps(_mm_mul_ps(row3, tmp1), minor2); + minor3 = _mm_sub_ps(minor3, _mm_mul_ps(row2, tmp1)); + + tmp1 = _mm_mul_ps(row0, row3); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1); + minor1 = _mm_sub_ps(minor1, _mm_mul_ps(row2, tmp1)); + minor2 = _mm_add_ps(_mm_mul_ps(row1, tmp1), minor2); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E); + minor1 = _mm_add_ps(_mm_mul_ps(row2, tmp1), minor1); + minor2 = _mm_sub_ps(minor2, _mm_mul_ps(row1, tmp1)); + + tmp1 = _mm_mul_ps(row0, row2); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1); + minor1 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor1); + minor3 = _mm_sub_ps(minor3, _mm_mul_ps(row1, tmp1)); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E); + minor1 = _mm_sub_ps(minor1, _mm_mul_ps(row3, tmp1)); + minor3 = _mm_add_ps(_mm_mul_ps(row1, tmp1), minor3); + + det = _mm_mul_ps(row0, minor0); + det = _mm_add_ps(_mm_shuffle_ps(det, det, 0x4E), det); + det = _mm_add_ss(_mm_shuffle_ps(det, det, 0xB1), det); + tmp1 = _mm_rcp_ss(det); +#if 0 + det = _mm_sub_ss(_mm_add_ss(tmp1, tmp1), _mm_mul_ss(det, _mm_mul_ss(tmp1, tmp1))); + det = _mm_shuffle_ps(det, det, 0x00); +#else + det = _mm_shuffle_ps(tmp1, tmp1, _MM_SHUFFLE(0, 0, 0, 0)); +#endif + + minor0 = _mm_mul_ps(det, minor0); + minor1 = _mm_mul_ps(det, minor1); + minor2 = _mm_mul_ps(det, minor2); + minor3 = _mm_mul_ps(det, minor3); + Mat44V invTrans(minor0, minor1, minor2, minor3); + return M44Trnsps(invTrans); +} + +PX_FORCE_INLINE Vec4V V4LoadXYZW(const PxF32& x, const PxF32& y, const PxF32& z, const PxF32& w) +{ + return _mm_set_ps(w, z, y, x); +} + +PX_FORCE_INLINE VecU32V V4U32Sel(const BoolV c, const VecU32V a, const VecU32V b) +{ + return internalWindowsSimd::m128_I2F( + _mm_or_si128(_mm_andnot_si128(internalWindowsSimd::m128_F2I(c), internalWindowsSimd::m128_F2I(b)), + _mm_and_si128(internalWindowsSimd::m128_F2I(c), internalWindowsSimd::m128_F2I(a)))); +} + +PX_FORCE_INLINE VecU32V V4U32or(VecU32V a, VecU32V b) +{ + return internalWindowsSimd::m128_I2F(_mm_or_si128(internalWindowsSimd::m128_F2I(a), internalWindowsSimd::m128_F2I(b))); +} + +PX_FORCE_INLINE VecU32V V4U32xor(VecU32V a, VecU32V b) +{ + return internalWindowsSimd::m128_I2F( + _mm_xor_si128(internalWindowsSimd::m128_F2I(a), internalWindowsSimd::m128_F2I(b))); +} + +PX_FORCE_INLINE VecU32V V4U32and(VecU32V a, VecU32V b) +{ + return internalWindowsSimd::m128_I2F( + _mm_and_si128(internalWindowsSimd::m128_F2I(a), internalWindowsSimd::m128_F2I(b))); +} + +PX_FORCE_INLINE VecU32V V4U32Andc(VecU32V a, VecU32V b) +{ + return internalWindowsSimd::m128_I2F( + _mm_andnot_si128(internalWindowsSimd::m128_F2I(b), internalWindowsSimd::m128_F2I(a))); +} + +PX_FORCE_INLINE VecI32V U4Load(const PxU32 i) +{ + return _mm_load1_ps((PxF32*)&i); +} + +PX_FORCE_INLINE VecU32V U4LoadU(const PxU32* i) +{ + return _mm_loadu_ps((PxF32*)i); +} + +PX_FORCE_INLINE VecU32V U4LoadA(const PxU32* i) +{ + ASSERT_ISALIGNED16(i); + return _mm_load_ps((PxF32*)i); +} + +PX_FORCE_INLINE VecI32V I4Load(const PxI32 i) +{ + return _mm_load1_ps((PxF32*)&i); +} + +PX_FORCE_INLINE VecI32V I4LoadU(const PxI32* i) +{ + return _mm_loadu_ps((PxF32*)i); +} + +PX_FORCE_INLINE VecI32V I4LoadA(const PxI32* i) +{ + ASSERT_ISALIGNED16(i); + return _mm_load_ps((PxF32*)i); +} + +PX_FORCE_INLINE VecI32V VecI32V_Add(const VecI32VArg a, const VecI32VArg b) +{ + return internalWindowsSimd::m128_I2F( + _mm_add_epi32(internalWindowsSimd::m128_F2I(a), internalWindowsSimd::m128_F2I(b))); +} + +PX_FORCE_INLINE VecI32V VecI32V_Sub(const VecI32VArg a, const VecI32VArg b) +{ + return internalWindowsSimd::m128_I2F( + _mm_sub_epi32(internalWindowsSimd::m128_F2I(a), internalWindowsSimd::m128_F2I(b))); +} + +PX_FORCE_INLINE BoolV VecI32V_IsGrtr(const VecI32VArg a, const VecI32VArg b) +{ + return internalWindowsSimd::m128_I2F( + _mm_cmpgt_epi32(internalWindowsSimd::m128_F2I(a), internalWindowsSimd::m128_F2I(b))); +} + +PX_FORCE_INLINE BoolV VecI32V_IsEq(const VecI32VArg a, const VecI32VArg b) +{ + return internalWindowsSimd::m128_I2F( + _mm_cmpeq_epi32(internalWindowsSimd::m128_F2I(a), internalWindowsSimd::m128_F2I(b))); +} + +PX_FORCE_INLINE VecI32V V4I32Sel(const BoolV c, const VecI32V a, const VecI32V b) +{ + return V4U32Sel(c, a, b); +} + +PX_FORCE_INLINE VecI32V VecI32V_Zero() +{ + return V4Zero(); +} + +PX_FORCE_INLINE VecI32V VecI32V_One() +{ + return I4Load(1); +} + +PX_FORCE_INLINE VecI32V VecI32V_Two() +{ + return I4Load(2); +} + +PX_FORCE_INLINE VecI32V VecI32V_MinusOne() +{ + return I4Load(-1); +} + +PX_FORCE_INLINE VecU32V U4Zero() +{ + return U4Load(0); +} + +PX_FORCE_INLINE VecU32V U4One() +{ + return U4Load(1); +} + +PX_FORCE_INLINE VecU32V U4Two() +{ + return U4Load(2); +} + +PX_FORCE_INLINE VecI32V VecI32V_Sel(const BoolV c, const VecI32VArg a, const VecI32VArg b) +{ + PX_ASSERT(_VecMathTests::allElementsEqualBoolV(c, BTTTT()) || + _VecMathTests::allElementsEqualBoolV(c, BFFFF())); + return _mm_or_ps(_mm_andnot_ps(c, b), _mm_and_ps(c, a)); +} + +PX_FORCE_INLINE VecShiftV VecI32V_PrepareShift(const VecI32VArg shift) +{ + VecShiftV preparedShift; + preparedShift.shift = _mm_or_ps(_mm_andnot_ps(BTFFF(), VecI32V_Zero()), _mm_and_ps(BTFFF(), shift)); + return preparedShift; +} + +PX_FORCE_INLINE VecI32V VecI32V_LeftShift(const VecI32VArg a, const VecShiftVArg count) +{ + return internalWindowsSimd::m128_I2F( + _mm_sll_epi32(internalWindowsSimd::m128_F2I(a), internalWindowsSimd::m128_F2I(count.shift))); +} + +PX_FORCE_INLINE VecI32V VecI32V_RightShift(const VecI32VArg a, const VecShiftVArg count) +{ + return internalWindowsSimd::m128_I2F( + _mm_srl_epi32(internalWindowsSimd::m128_F2I(a), internalWindowsSimd::m128_F2I(count.shift))); +} + +PX_FORCE_INLINE VecI32V VecI32V_And(const VecI32VArg a, const VecI32VArg b) +{ + return _mm_and_ps(a, b); +} + +PX_FORCE_INLINE VecI32V VecI32V_Or(const VecI32VArg a, const VecI32VArg b) +{ + return _mm_or_ps(a, b); +} + +PX_FORCE_INLINE VecI32V VecI32V_GetX(const VecI32VArg a) +{ + return _mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 0, 0, 0)); +} + +PX_FORCE_INLINE VecI32V VecI32V_GetY(const VecI32VArg a) +{ + return _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1)); +} + +PX_FORCE_INLINE VecI32V VecI32V_GetZ(const VecI32VArg a) +{ + return _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 2, 2, 2)); +} + +PX_FORCE_INLINE VecI32V VecI32V_GetW(const VecI32VArg a) +{ + return _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 3, 3, 3)); +} + +PX_FORCE_INLINE void PxI32_From_VecI32V(const VecI32VArg a, PxI32* i) +{ + _mm_store_ss((PxF32*)i, a); +} + +PX_FORCE_INLINE VecI32V VecI32V_From_BoolV(const BoolVArg a) +{ + return a; +} + +PX_FORCE_INLINE VecU32V VecU32V_From_BoolV(const BoolVArg a) +{ + return a; +} + +PX_FORCE_INLINE VecI32V VecI32V_Merge(const VecI32VArg a, const VecI32VArg b, const VecI32VArg c, const VecI32VArg d) +{ + const __m128 xw = _mm_move_ss(b, a); // y, y, y, x + const __m128 yz = _mm_move_ss(c, d); // z, z, z, w + return _mm_shuffle_ps(xw, yz, _MM_SHUFFLE(0, 2, 1, 0)); +} + +PX_FORCE_INLINE void V4U32StoreAligned(VecU32V val, VecU32V* address) +{ + *address = val; +} + +PX_FORCE_INLINE Vec4V V4Andc(const Vec4V a, const VecU32V b) +{ + VecU32V result32(a); + result32 = V4U32Andc(result32, b); + return Vec4V(result32); +} + +PX_FORCE_INLINE VecU32V V4IsGrtrV32u(const Vec4V a, const Vec4V b) +{ + return V4IsGrtr(a, b); +} + +PX_FORCE_INLINE VecU16V V4U16LoadAligned(VecU16V* addr) +{ + return *addr; +} + +PX_FORCE_INLINE VecU16V V4U16LoadUnaligned(VecU16V* addr) +{ + return *addr; +} + +// unsigned compares are not supported on x86 +PX_FORCE_INLINE VecU16V V4U16CompareGt(VecU16V a, VecU16V b) +{ + // _mm_cmpgt_epi16 doesn't work for unsigned values unfortunately + // return m128_I2F(_mm_cmpgt_epi16(internalWindowsSimd::m128_F2I(a), internalWindowsSimd::m128_F2I(b))); + VecU16V result; + result.m128_u16[0] = PxU16((a).m128_u16[0] > (b).m128_u16[0]); + result.m128_u16[1] = PxU16((a).m128_u16[1] > (b).m128_u16[1]); + result.m128_u16[2] = PxU16((a).m128_u16[2] > (b).m128_u16[2]); + result.m128_u16[3] = PxU16((a).m128_u16[3] > (b).m128_u16[3]); + result.m128_u16[4] = PxU16((a).m128_u16[4] > (b).m128_u16[4]); + result.m128_u16[5] = PxU16((a).m128_u16[5] > (b).m128_u16[5]); + result.m128_u16[6] = PxU16((a).m128_u16[6] > (b).m128_u16[6]); + result.m128_u16[7] = PxU16((a).m128_u16[7] > (b).m128_u16[7]); + return result; +} + +PX_FORCE_INLINE VecU16V V4I16CompareGt(VecU16V a, VecU16V b) +{ + return internalWindowsSimd::m128_I2F( + _mm_cmpgt_epi16(internalWindowsSimd::m128_F2I(a), internalWindowsSimd::m128_F2I(b))); +} + +PX_FORCE_INLINE Vec4V Vec4V_From_VecU32V(VecU32V a) +{ + Vec4V result = V4LoadXYZW(PxF32(a.m128_u32[0]), PxF32(a.m128_u32[1]), PxF32(a.m128_u32[2]), PxF32(a.m128_u32[3])); + return result; +} + +PX_FORCE_INLINE Vec4V Vec4V_From_VecI32V(VecI32V a) +{ + return _mm_cvtepi32_ps(internalWindowsSimd::m128_F2I(a)); +} + +PX_FORCE_INLINE VecI32V VecI32V_From_Vec4V(Vec4V a) +{ + return internalWindowsSimd::m128_I2F(_mm_cvttps_epi32(a)); +} + +PX_FORCE_INLINE Vec4V Vec4V_ReinterpretFrom_VecU32V(VecU32V a) +{ + return Vec4V(a); +} + +PX_FORCE_INLINE Vec4V Vec4V_ReinterpretFrom_VecI32V(VecI32V a) +{ + return Vec4V(a); +} + +PX_FORCE_INLINE VecU32V VecU32V_ReinterpretFrom_Vec4V(Vec4V a) +{ + return VecU32V(a); +} + +PX_FORCE_INLINE VecI32V VecI32V_ReinterpretFrom_Vec4V(Vec4V a) +{ + return VecI32V(a); +} + +template <int index> +PX_FORCE_INLINE VecU32V V4U32SplatElement(VecU32V a) +{ + return internalWindowsSimd::m128_I2F( + _mm_shuffle_epi32(internalWindowsSimd::m128_F2I(a), _MM_SHUFFLE(index, index, index, index))); +} + +template <int index> +PX_FORCE_INLINE Vec4V V4SplatElement(Vec4V a) +{ + return internalWindowsSimd::m128_I2F( + _mm_shuffle_epi32(internalWindowsSimd::m128_F2I(a), _MM_SHUFFLE(index, index, index, index))); +} + +PX_FORCE_INLINE VecU32V U4LoadXYZW(PxU32 x, PxU32 y, PxU32 z, PxU32 w) +{ + VecU32V result; + result.m128_u32[0] = x; + result.m128_u32[1] = y; + result.m128_u32[2] = z; + result.m128_u32[3] = w; + return result; +} + +PX_FORCE_INLINE Vec4V V4ConvertFromI32V(const VecI32V in) +{ + return _mm_cvtepi32_ps(internalWindowsSimd::m128_F2I(in)); +} + +#endif // PSFOUNDATION_PSWINDOWSINLINEAOS_H diff --git a/PxShared/src/foundation/include/windows/PsWindowsIntrinsics.h b/PxShared/src/foundation/include/windows/PsWindowsIntrinsics.h new file mode 100644 index 0000000..ca1e9c5 --- /dev/null +++ b/PxShared/src/foundation/include/windows/PsWindowsIntrinsics.h @@ -0,0 +1,190 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSWINDOWSINTRINSICS_H +#define PSFOUNDATION_PSWINDOWSINTRINSICS_H + +#include "Ps.h" +#include "foundation/PxAssert.h" + +// this file is for internal intrinsics - that is, intrinsics that are used in +// cross platform code but do not appear in the API + +#if !PX_WINDOWS_FAMILY +#error "This file should only be included by Windows builds!!" +#endif + +#pragma warning(push) +//'symbol' is not defined as a preprocessor macro, replacing with '0' for 'directives' +#pragma warning(disable : 4668) +#if PX_VC == 10 +#pragma warning(disable : 4987) // nonstandard extension used: 'throw (...)' +#endif +#include <intrin.h> +#pragma warning(pop) + +#pragma warning(push) +#pragma warning(disable : 4985) // 'symbol name': attributes not present on previous declaration +#include <math.h> +#pragma warning(pop) + +#include <float.h> +#include <mmintrin.h> + +#pragma intrinsic(_BitScanForward) +#pragma intrinsic(_BitScanReverse) + +namespace physx +{ +namespace shdfnd +{ + +/* +* Implements a memory barrier +*/ +PX_FORCE_INLINE void memoryBarrier() +{ + _ReadWriteBarrier(); + /* long Barrier; + __asm { + xchg Barrier, eax + }*/ +} + +/*! +Returns the index of the highest set bit. Not valid for zero arg. +*/ +PX_FORCE_INLINE uint32_t highestSetBitUnsafe(uint32_t v) +{ + unsigned long retval; + _BitScanReverse(&retval, v); + return retval; +} + +/*! +Returns the index of the highest set bit. Undefined for zero arg. +*/ +PX_FORCE_INLINE uint32_t lowestSetBitUnsafe(uint32_t v) +{ + unsigned long retval; + _BitScanForward(&retval, v); + return retval; +} + +/*! +Returns the number of leading zeros in v. Returns 32 for v=0. +*/ +PX_FORCE_INLINE uint32_t countLeadingZeros(uint32_t v) +{ + if(v) + { + unsigned long bsr = (unsigned long)-1; + _BitScanReverse(&bsr, v); + return 31 - bsr; + } + else + return 32; +} + +/*! +Prefetch aligned cache size around \c ptr+offset. +*/ +#if !PX_ARM +PX_FORCE_INLINE void prefetchLine(const void* ptr, uint32_t offset = 0) +{ + // cache line on X86/X64 is 64-bytes so a 128-byte prefetch would require 2 prefetches. + // However, we can only dispatch a limited number of prefetch instructions so we opt to prefetch just 1 cache line + /*_mm_prefetch(((const char*)ptr + offset), _MM_HINT_T0);*/ + // We get slightly better performance prefetching to non-temporal addresses instead of all cache levels + _mm_prefetch(((const char*)ptr + offset), _MM_HINT_NTA); +} +#else +PX_FORCE_INLINE void prefetchLine(const void* ptr, uint32_t offset = 0) +{ + // arm does have 32b cache line size + __prefetch(((const char*)ptr + offset)); +} +#endif + +/*! +Prefetch \c count bytes starting at \c ptr. +*/ +#if !PX_ARM +PX_FORCE_INLINE void prefetch(const void* ptr, uint32_t count = 1) +{ + const char* cp = (char*)ptr; + uint64_t p = size_t(ptr); + uint64_t startLine = p >> 6, endLine = (p + count - 1) >> 6; + uint64_t lines = endLine - startLine + 1; + do + { + prefetchLine(cp); + cp += 64; + } while(--lines); +} +#else +PX_FORCE_INLINE void prefetch(const void* ptr, uint32_t count = 1) +{ + const char* cp = (char*)ptr; + uint32_t p = size_t(ptr); + uint32_t startLine = p >> 5, endLine = (p + count - 1) >> 5; + uint32_t lines = endLine - startLine + 1; + do + { + prefetchLine(cp); + cp += 32; + } while(--lines); +} +#endif + +//! \brief platform-specific reciprocal +PX_CUDA_CALLABLE PX_FORCE_INLINE float recipFast(float a) +{ + return 1.0f / a; +} + +//! \brief platform-specific fast reciprocal square root +PX_CUDA_CALLABLE PX_FORCE_INLINE float recipSqrtFast(float a) +{ + return 1.0f / ::sqrtf(a); +} + +//! \brief platform-specific floor +PX_CUDA_CALLABLE PX_FORCE_INLINE float floatFloor(float x) +{ + return ::floorf(x); +} + +#define NS_EXPECT_TRUE(x) x +#define NS_EXPECT_FALSE(x) x + +} // namespace shdfnd +} // namespace physx + +#endif // #ifndef PSFOUNDATION_PSWINDOWSINTRINSICS_H diff --git a/PxShared/src/foundation/include/windows/PsWindowsLoadLibrary.h b/PxShared/src/foundation/include/windows/PsWindowsLoadLibrary.h new file mode 100644 index 0000000..adfd8e4 --- /dev/null +++ b/PxShared/src/foundation/include/windows/PsWindowsLoadLibrary.h @@ -0,0 +1,72 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef PS_WINDOWS_FOUNDATION_LOADLIBRARY_H +#define PS_WINDOWS_FOUNDATION_LOADLIBRARY_H + +#include "foundation/PxPreprocessor.h" +#include "windows/PsWindowsInclude.h" +#include "foundation/windows/PxWindowsFoundationDelayLoadHook.h" + +namespace physx +{ +namespace shdfnd +{ + EXTERN_C IMAGE_DOS_HEADER __ImageBase; + + PX_INLINE FARPROC WINAPI foundationDliNotePreLoadLibrary(const char* libraryName, const physx::PxFoundationDelayLoadHook* delayLoadHook) + { + if(!delayLoadHook) + { + return (FARPROC)::LoadLibraryA(libraryName); + } + else + { + if(strstr(libraryName, "PxFoundation")) + { + if(strstr(libraryName, "DEBUG")) + return (FARPROC)::LoadLibraryA(delayLoadHook->getPxFoundationDEBUGDllName()); + + if(strstr(libraryName, "CHECKED")) + return (FARPROC)::LoadLibraryA(delayLoadHook->getPxFoundationCHECKEDDllName()); + + if(strstr(libraryName, "PROFILE")) + return (FARPROC)::LoadLibraryA(delayLoadHook->getPxFoundationPROFILEDllName()); + + return (FARPROC)::LoadLibraryA(delayLoadHook->getPxFoundationDllName()); + } + } + return NULL; + } +} // namespace shdfnd +} // namespace physx + + +#endif // PS_WINDOWS_FOUNDATION_LOADLIBRARY_H diff --git a/PxShared/src/foundation/include/windows/PsWindowsTrigConstants.h b/PxShared/src/foundation/include/windows/PsWindowsTrigConstants.h new file mode 100644 index 0000000..dda3e3b --- /dev/null +++ b/PxShared/src/foundation/include/windows/PsWindowsTrigConstants.h @@ -0,0 +1,87 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSWINDOWSTRIGCONSTANTS_H +#define PSFOUNDATION_PSWINDOWSTRIGCONSTANTS_H + +#define PX_GLOBALCONST extern const __declspec(selectany) + +__declspec(align(16)) struct PX_VECTORF32 +{ + float f[4]; +}; + +//#define PX_PI 3.141592654f +//#define PX_2PI 6.283185307f +//#define PX_1DIVPI 0.318309886f +//#define PX_1DIV2PI 0.159154943f +//#define PX_PIDIV2 1.570796327f +//#define PX_PIDIV4 0.785398163f + +PX_GLOBALCONST PX_VECTORF32 g_PXSinCoefficients0 = { { 1.0f, -0.166666667f, 8.333333333e-3f, -1.984126984e-4f } }; +PX_GLOBALCONST PX_VECTORF32 +g_PXSinCoefficients1 = { { 2.755731922e-6f, -2.505210839e-8f, 1.605904384e-10f, -7.647163732e-13f } }; +PX_GLOBALCONST PX_VECTORF32 +g_PXSinCoefficients2 = { { 2.811457254e-15f, -8.220635247e-18f, 1.957294106e-20f, -3.868170171e-23f } }; +PX_GLOBALCONST PX_VECTORF32 g_PXCosCoefficients0 = { { 1.0f, -0.5f, 4.166666667e-2f, -1.388888889e-3f } }; +PX_GLOBALCONST PX_VECTORF32 +g_PXCosCoefficients1 = { { 2.480158730e-5f, -2.755731922e-7f, 2.087675699e-9f, -1.147074560e-11f } }; +PX_GLOBALCONST PX_VECTORF32 +g_PXCosCoefficients2 = { { 4.779477332e-14f, -1.561920697e-16f, 4.110317623e-19f, -8.896791392e-22f } }; +PX_GLOBALCONST PX_VECTORF32 g_PXTanCoefficients0 = { { 1.0f, 0.333333333f, 0.133333333f, 5.396825397e-2f } }; +PX_GLOBALCONST PX_VECTORF32 +g_PXTanCoefficients1 = { { 2.186948854e-2f, 8.863235530e-3f, 3.592128167e-3f, 1.455834485e-3f } }; +PX_GLOBALCONST PX_VECTORF32 +g_PXTanCoefficients2 = { { 5.900274264e-4f, 2.391290764e-4f, 9.691537707e-5f, 3.927832950e-5f } }; +PX_GLOBALCONST PX_VECTORF32 +g_PXASinCoefficients0 = { { -0.05806367563904f, -0.41861972469416f, 0.22480114791621f, 2.17337241360606f } }; +PX_GLOBALCONST PX_VECTORF32 +g_PXASinCoefficients1 = { { 0.61657275907170f, 4.29696498283455f, -1.18942822255452f, -6.53784832094831f } }; +PX_GLOBALCONST PX_VECTORF32 +g_PXASinCoefficients2 = { { -1.36926553863413f, -4.48179294237210f, 1.41810672941833f, 5.48179257935713f } }; +PX_GLOBALCONST PX_VECTORF32 g_PXATanCoefficients0 = { { 1.0f, 0.333333334f, 0.2f, 0.142857143f } }; +PX_GLOBALCONST PX_VECTORF32 +g_PXATanCoefficients1 = { { 1.111111111e-1f, 9.090909091e-2f, 7.692307692e-2f, 6.666666667e-2f } }; +PX_GLOBALCONST PX_VECTORF32 +g_PXATanCoefficients2 = { { 5.882352941e-2f, 5.263157895e-2f, 4.761904762e-2f, 4.347826087e-2f } }; +PX_GLOBALCONST PX_VECTORF32 +g_PXSinEstCoefficients = { { 1.0f, -1.66521856991541e-1f, 8.199913018755e-3f, -1.61475937228e-4f } }; +PX_GLOBALCONST PX_VECTORF32 +g_PXCosEstCoefficients = { { 1.0f, -4.95348008918096e-1f, 3.878259962881e-2f, -9.24587976263e-4f } }; +PX_GLOBALCONST PX_VECTORF32 g_PXTanEstCoefficients = { { 2.484f, -1.954923183e-1f, 2.467401101f, PxInvPi } }; +PX_GLOBALCONST PX_VECTORF32 +g_PXATanEstCoefficients = { { 7.689891418951e-1f, 1.104742493348f, 8.661844266006e-1f, PxPiDivTwo } }; +PX_GLOBALCONST PX_VECTORF32 +g_PXASinEstCoefficients = { { -1.36178272886711f, 2.37949493464538f, -8.08228565650486e-1f, 2.78440142746736e-1f } }; +PX_GLOBALCONST PX_VECTORF32 g_PXASinEstConstants = { { 1.00000011921f, PxPiDivTwo, 0.0f, 0.0f } }; +PX_GLOBALCONST PX_VECTORF32 g_PXPiConstants0 = { { PxPi, PxTwoPi, PxInvPi, PxInvTwoPi } }; +PX_GLOBALCONST PX_VECTORF32 g_PXReciprocalTwoPi = { { PxInvTwoPi, PxInvTwoPi, PxInvTwoPi, PxInvTwoPi } }; +PX_GLOBALCONST PX_VECTORF32 g_PXTwoPi = { { PxTwoPi, PxTwoPi, PxTwoPi, PxTwoPi } }; + +#endif diff --git a/PxShared/src/foundation/src/PsAllocator.cpp b/PxShared/src/foundation/src/PsAllocator.cpp new file mode 100644 index 0000000..3952b27 --- /dev/null +++ b/PxShared/src/foundation/src/PsAllocator.cpp @@ -0,0 +1,124 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "PsFoundation.h" +#include "PsAllocator.h" +#include "PsHashMap.h" +#include "PsArray.h" +#include "PsMutex.h" + +namespace physx +{ +namespace shdfnd +{ + +#if PX_USE_NAMED_ALLOCATOR +namespace +{ +typedef HashMap<const NamedAllocator*, const char*, Hash<const NamedAllocator*>, NonTrackingAllocator> AllocNameMap; +PX_INLINE AllocNameMap& getMap() +{ + return getFoundation().getNamedAllocMap(); +} +PX_INLINE Foundation::Mutex& getMutex() +{ + return getFoundation().getNamedAllocMutex(); +} +} + +NamedAllocator::NamedAllocator(const PxEMPTY) +{ + Foundation::Mutex::ScopedLock lock(getMutex()); + getMap().insert(this, 0); +} + +NamedAllocator::NamedAllocator(const char* name) +{ + Foundation::Mutex::ScopedLock lock(getMutex()); + getMap().insert(this, name); +} + +NamedAllocator::NamedAllocator(const NamedAllocator& other) +{ + Foundation::Mutex::ScopedLock lock(getMutex()); + const AllocNameMap::Entry* e = getMap().find(&other); + PX_ASSERT(e); + const char* name = e->second; // The copy is important because insert might invalidate the referenced hash entry + getMap().insert(this, name); +} + +NamedAllocator::~NamedAllocator() +{ + Foundation::Mutex::ScopedLock lock(getMutex()); + bool erased = getMap().erase(this); + PX_UNUSED(erased); + PX_ASSERT(erased); +} + +NamedAllocator& NamedAllocator::operator=(const NamedAllocator& other) +{ + Foundation::Mutex::ScopedLock lock(getMutex()); + const AllocNameMap::Entry* e = getMap().find(&other); + PX_ASSERT(e); + getMap()[this] = e->second; + return *this; +} + +void* NamedAllocator::allocate(size_t size, const char* filename, int line) +{ + if(!size) + return 0; + Foundation::Mutex::ScopedLock lock(getMutex()); + const AllocNameMap::Entry* e = getMap().find(this); + PX_ASSERT(e); + return getAllocator().allocate(size, e->second, filename, line); +} + +void NamedAllocator::deallocate(void* ptr) +{ + if(ptr) + getAllocator().deallocate(ptr); +} + +#endif // PX_DEBUG + +void* Allocator::allocate(size_t size, const char* file, int line) +{ + if(!size) + return 0; + return getAllocator().allocate(size, "", file, line); +} +void Allocator::deallocate(void* ptr) +{ + if(ptr) + getAllocator().deallocate(ptr); +} + +} // namespace shdfnd +} // namespace physx diff --git a/PxShared/src/foundation/src/PsAssert.cpp b/PxShared/src/foundation/src/PsAssert.cpp new file mode 100644 index 0000000..3070383 --- /dev/null +++ b/PxShared/src/foundation/src/PsAssert.cpp @@ -0,0 +1,90 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "foundation/PxAssert.h" + +#include <stdio.h> +#include "PsString.h" + +#if PX_WINDOWS_FAMILY +#include <crtdbg.h> +#elif PX_NX +#include "nx/PsNXAbort.h" +#endif + +namespace +{ +class DefaultAssertHandler : public physx::PxAssertHandler +{ + virtual void operator()(const char* expr, const char* file, int line, bool& ignore) + { + PX_UNUSED(ignore); // is used only in debug windows config + char buffer[1024]; +#if PX_WINDOWS_FAMILY + sprintf_s(buffer, "%s(%d) : Assertion failed: %s\n", file, line, expr); +#else + sprintf(buffer, "%s(%d) : Assertion failed: %s\n", file, line, expr); +#endif + physx::shdfnd::printString(buffer); +#if PX_WINDOWS_FAMILY&& PX_DEBUG + // _CrtDbgReport returns -1 on error, 1 on 'retry', 0 otherwise including 'ignore'. + // Hitting 'abort' will terminate the process immediately. + int result = _CrtDbgReport(_CRT_ASSERT, file, line, NULL, "%s", buffer); + int mode = _CrtSetReportMode(_CRT_ASSERT, _CRTDBG_REPORT_MODE); + ignore = _CRTDBG_MODE_WNDW == mode && result == 0; + if(ignore) + return; + __debugbreak(); +#elif PX_WINDOWS_FAMILY&& PX_CHECKED + __debugbreak(); +#elif PX_NX + abort(buffer); +#else + abort(); +#endif + } +}; + +DefaultAssertHandler sAssertHandler; +physx::PxAssertHandler* sAssertHandlerPtr = &sAssertHandler; +} + +namespace physx +{ + +PxAssertHandler& PxGetAssertHandler() +{ + return *sAssertHandlerPtr; +} + +void PxSetAssertHandler(PxAssertHandler& handler) +{ + sAssertHandlerPtr = &handler; +} +} // end of physx namespace diff --git a/PxShared/src/foundation/src/PsFoundation.cpp b/PxShared/src/foundation/src/PsFoundation.cpp new file mode 100644 index 0000000..e559b57 --- /dev/null +++ b/PxShared/src/foundation/src/PsFoundation.cpp @@ -0,0 +1,278 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "foundation/PxProfiler.h" +#include "foundation/PxErrorCallback.h" +#include "foundation/PxFoundationVersion.h" +#include "PsFoundation.h" +#include "PsString.h" +#include "PsAllocator.h" + +namespace physx +{ +namespace shdfnd +{ + +Foundation::Foundation(PxErrorCallback& errc, PxAllocatorCallback& alloc) +: mAllocatorCallback(alloc) +, mErrorCallback(errc) +, mBroadcastingAllocator(alloc, errc) +, mBroadcastingError(errc) +, +#if PX_CHECKED + mReportAllocationNames(true) +, +#else + mReportAllocationNames(false) +, +#endif + mErrorMask(PxErrorCode::Enum(~0)) +, mErrorMutex(PX_DEBUG_EXP("Foundation::mErrorMutex")) +, mNamedAllocMutex(PX_DEBUG_EXP("Foundation::mNamedAllocMutex")) +, mTempAllocMutex(PX_DEBUG_EXP("Foundation::mTempAllocMutex")) +{ +} + +Foundation::~Foundation() +{ + // deallocate temp buffer allocations + Allocator alloc; + for(PxU32 i = 0; i < mTempAllocFreeTable.size(); ++i) + { + for(TempAllocatorChunk* ptr = mTempAllocFreeTable[i]; ptr;) + { + TempAllocatorChunk* next = ptr->mNext; + alloc.deallocate(ptr); + ptr = next; + } + } + mTempAllocFreeTable.reset(); +} + +Foundation& Foundation::getInstance() +{ + PX_ASSERT(mInstance); + return *mInstance; +} + +PxU32 Foundation::getWarnOnceTimestamp() +{ + PX_ASSERT(mInstance != NULL); + return mWarnOnceTimestap; +} + +void Foundation::error(PxErrorCode::Enum c, const char* file, int line, const char* messageFmt, ...) +{ + va_list va; + va_start(va, messageFmt); + errorImpl(c, file, line, messageFmt, va); + va_end(va); +} + +void Foundation::errorImpl(PxErrorCode::Enum e, const char* file, int line, const char* messageFmt, va_list va) +{ + PX_ASSERT(messageFmt); + if(e & mErrorMask) + { + // this function is reentrant but user's error callback may not be, so... + Mutex::ScopedLock lock(mErrorMutex); + + // using a static fixed size buffer here because: + // 1. vsnprintf return values differ between platforms + // 2. va_start is only usable in functions with ellipses + // 3. ellipses (...) cannot be passed to called function + // which would be necessary to dynamically grow the buffer here + + static const size_t bufSize = 1024; + char stringBuffer[bufSize]; + shdfnd::vsnprintf(stringBuffer, bufSize, messageFmt, va); + + mBroadcastingError.reportError(e, stringBuffer, file, line); + } +} + +Foundation* Foundation::createInstance(PxU32 version, PxErrorCallback& errc, PxAllocatorCallback& alloc) +{ + if(version != PX_FOUNDATION_VERSION) + { + char* buffer = new char[256]; + physx::shdfnd::snprintf(buffer, 256, "Wrong version: foundation version is 0x%08x, tried to create 0x%08x", + PX_FOUNDATION_VERSION, version); + errc.reportError(PxErrorCode::eINVALID_PARAMETER, buffer, __FILE__, __LINE__); + return 0; + } + + if(!mInstance) + { + // if we don't assign this here, the Foundation object can't create member + // subobjects which require the allocator + + mInstance = reinterpret_cast<Foundation*>(alloc.allocate(sizeof(Foundation), "Foundation", __FILE__, __LINE__)); + + if(mInstance) + { + PX_PLACEMENT_NEW(mInstance, Foundation)(errc, alloc); + + PX_ASSERT(mRefCount == 0); + mRefCount = 1; + + // skip 0 which marks uninitialized timestaps in PX_WARN_ONCE + mWarnOnceTimestap = (mWarnOnceTimestap == PX_MAX_U32) ? 1 : mWarnOnceTimestap + 1; + + return mInstance; + } + else + { + errc.reportError(PxErrorCode::eINTERNAL_ERROR, "Memory allocation for foundation object failed.", __FILE__, + __LINE__); + } + } + else + { + errc.reportError(PxErrorCode::eINVALID_OPERATION, + "Foundation object exists already. Only one instance per process can be created.", __FILE__, + __LINE__); + } + + return 0; +} + +void Foundation::destroyInstance() +{ + PX_ASSERT(mInstance != NULL); + + if(mRefCount == 1) + { + PxAllocatorCallback& alloc = mInstance->getAllocatorCallback(); + mInstance->~Foundation(); + alloc.deallocate(mInstance); + mInstance = 0; + mRefCount = 0; + } + else + { + mInstance->error(PxErrorCode::eINVALID_OPERATION, __FILE__, __LINE__, + "Foundation destruction failed due to pending module references. Close/release all depending " + "modules first."); + } +} + +void Foundation::incRefCount() +{ + PX_ASSERT(mInstance != NULL); + + if(mRefCount > 0) + { + mRefCount++; + } + else + { + mInstance->error(PxErrorCode::eINVALID_OPERATION, __FILE__, __LINE__, + "Foundation: Invalid registration detected."); + } +} + +void Foundation::decRefCount() +{ + PX_ASSERT(mInstance != NULL); + + if(mRefCount > 0) + { + mRefCount--; + } + else + { + mInstance->error(PxErrorCode::eINVALID_OPERATION, __FILE__, __LINE__, + "Foundation: Invalid deregistration detected."); + } +} + +void Foundation::release() +{ + Foundation::destroyInstance(); +} + +PxAllocatorCallback& getAllocator() +{ + return getFoundation().getAllocator(); +} + +Foundation* Foundation::mInstance = NULL; +PxU32 Foundation::mRefCount = 0; +PxU32 Foundation::mWarnOnceTimestap = 0; + +void Foundation::registerAllocationListener(physx::shdfnd::AllocationListener& listener) +{ + Mutex::ScopedLock lock(mListenerMutex); + mBroadcastingAllocator.registerListener(listener); +} + +void Foundation::deregisterAllocationListener(physx::shdfnd::AllocationListener& listener) +{ + Mutex::ScopedLock lock(mListenerMutex); + mBroadcastingAllocator.deregisterListener(listener); +} + +void Foundation::registerErrorCallback(PxErrorCallback& callback) +{ + Mutex::ScopedLock lock(mListenerMutex); + mBroadcastingError.registerListener(callback); +} + +void Foundation::deregisterErrorCallback(PxErrorCallback& callback) +{ + Mutex::ScopedLock lock(mListenerMutex); + mBroadcastingError.deregisterListener(callback); +} + +physx::PxProfilerCallback* gProfilerCallback = NULL; + +} // namespace shdfnd +} // namespace physx + +physx::PxFoundation* PxCreateFoundation(physx::PxU32 version, physx::PxAllocatorCallback& allocator, + physx::PxErrorCallback& errorCallback) +{ + return physx::shdfnd::Foundation::createInstance(version, errorCallback, allocator); +} + +physx::PxFoundation& PxGetFoundation() +{ + return physx::shdfnd::Foundation::getInstance(); +} + +physx::PxProfilerCallback* PxGetProfilerCallback() +{ + return physx::shdfnd::gProfilerCallback; +} + +void PxSetProfilerCallback(physx::PxProfilerCallback* profiler) +{ + physx::shdfnd::gProfilerCallback = profiler; +} diff --git a/PxShared/src/foundation/src/PsMathUtils.cpp b/PxShared/src/foundation/src/PsMathUtils.cpp new file mode 100644 index 0000000..b900fdd --- /dev/null +++ b/PxShared/src/foundation/src/PsMathUtils.cpp @@ -0,0 +1,212 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "foundation/PxMat33.h" +#include "foundation/PxMathUtils.h" +#include "foundation/PxVec4.h" +#include "foundation/PxAssert.h" +#include "PsMathUtils.h" +#include "PsUtilities.h" +#include "PsBasicTemplates.h" + +using namespace physx; +using namespace physx::shdfnd; +using namespace physx::intrinsics; + +PX_FOUNDATION_API PxQuat physx::PxShortestRotation(const PxVec3& v0, const PxVec3& v1) +{ + const PxReal d = v0.dot(v1); + const PxVec3 cross = v0.cross(v1); + + PxQuat q = d > -1 ? PxQuat(cross.x, cross.y, cross.z, 1 + d) : PxAbs(v0.x) < 0.1f ? PxQuat(0.0f, v0.z, -v0.y, 0.0f) + : PxQuat(v0.y, -v0.x, 0.0f, 0.0f); + + return q.getNormalized(); +} + +namespace +{ +// indexed rotation around axis, with sine and cosine of half-angle +PxQuat indexedRotation(PxU32 axis, PxReal s, PxReal c) +{ + PxReal v[3] = { 0, 0, 0 }; + v[axis] = s; + return PxQuat(v[0], v[1], v[2], c); +} +} + +PX_FOUNDATION_API PxVec3 physx::PxDiagonalize(const PxMat33& m, PxQuat& massFrame) +{ + // jacobi rotation using quaternions (from an idea of Stan Melax, with fix for precision issues) + + const PxU32 MAX_ITERS = 24; + + PxQuat q = PxQuat(PxIdentity); + + PxMat33 d; + for(PxU32 i = 0; i < MAX_ITERS; i++) + { + PxMat33 axes(q); + d = axes.getTranspose() * m * axes; + + PxReal d0 = PxAbs(d[1][2]), d1 = PxAbs(d[0][2]), d2 = PxAbs(d[0][1]); + PxU32 a = PxU32(d0 > d1 && d0 > d2 ? 0 : d1 > d2 ? 1 : 2); // rotation axis index, from largest off-diagonal + // element + + PxU32 a1 = shdfnd::getNextIndex3(a), a2 = shdfnd::getNextIndex3(a1); + if(d[a1][a2] == 0.0f || PxAbs(d[a1][a1] - d[a2][a2]) > 2e6f * PxAbs(2.0f * d[a1][a2])) + break; + + PxReal w = (d[a1][a1] - d[a2][a2]) / (2.0f * d[a1][a2]); // cot(2 * phi), where phi is the rotation angle + PxReal absw = PxAbs(w); + + PxQuat r; + if(absw > 1000) + r = indexedRotation(a, 1 / (4 * w), 1.f); // h will be very close to 1, so use small angle approx instead + else + { + PxReal t = 1 / (absw + PxSqrt(w * w + 1)); // absolute value of tan phi + PxReal h = 1 / PxSqrt(t * t + 1); // absolute value of cos phi + + PX_ASSERT(h != 1); // |w|<1000 guarantees this with typical IEEE754 machine eps (approx 6e-8) + r = indexedRotation(a, PxSqrt((1 - h) / 2) * PxSign(w), PxSqrt((1 + h) / 2)); + } + + q = (q * r).getNormalized(); + } + + massFrame = q; + return PxVec3(d.column0.x, d.column1.y, d.column2.z); +} + +/** +\brief computes a oriented bounding box around the scaled basis. +\param basis Input = skewed basis, Output = (normalized) orthogonal basis. +\return Bounding box extent. +*/ +PxVec3 physx::shdfnd::optimizeBoundingBox(PxMat33& basis) +{ + PxVec3* PX_RESTRICT vec = &basis[0]; // PT: don't copy vectors if not needed... + + // PT: since we store the magnitudes to memory, we can avoid the FCMPs afterwards + PxVec3 magnitude(vec[0].magnitudeSquared(), vec[1].magnitudeSquared(), vec[2].magnitudeSquared()); + + // find indices sorted by magnitude + unsigned int i = magnitude[1] > magnitude[0] ? 1 : 0u; + unsigned int j = magnitude[2] > magnitude[1 - i] ? 2 : 1 - i; + const unsigned int k = 3 - i - j; + + if(magnitude[i] < magnitude[j]) + swap(i, j); + + PX_ASSERT(magnitude[i] >= magnitude[j] && magnitude[i] >= magnitude[k] && magnitude[j] >= magnitude[k]); + + // ortho-normalize basis + + PxReal invSqrt = PxRecipSqrt(magnitude[i]); + magnitude[i] *= invSqrt; + vec[i] *= invSqrt; // normalize the first axis + PxReal dotij = vec[i].dot(vec[j]); + PxReal dotik = vec[i].dot(vec[k]); + magnitude[i] += PxAbs(dotij) + PxAbs(dotik); // elongate the axis by projection of the other two + vec[j] -= vec[i] * dotij; // orthogonize the two remaining axii relative to vec[i] + vec[k] -= vec[i] * dotik; + + magnitude[j] = vec[j].normalize(); + PxReal dotjk = vec[j].dot(vec[k]); + magnitude[j] += PxAbs(dotjk); // elongate the axis by projection of the other one + vec[k] -= vec[j] * dotjk; // orthogonize vec[k] relative to vec[j] + + magnitude[k] = vec[k].normalize(); + + return magnitude; +} + +PxQuat physx::shdfnd::slerp(const PxReal t, const PxQuat& left, const PxQuat& right) +{ + const PxReal quatEpsilon = (PxReal(1.0e-8f)); + + PxReal cosine = left.dot(right); + PxReal sign = PxReal(1); + if(cosine < 0) + { + cosine = -cosine; + sign = PxReal(-1); + } + + PxReal sine = PxReal(1) - cosine * cosine; + + if(sine >= quatEpsilon * quatEpsilon) + { + sine = PxSqrt(sine); + const PxReal angle = PxAtan2(sine, cosine); + const PxReal i_sin_angle = PxReal(1) / sine; + + const PxReal leftw = PxSin(angle * (PxReal(1) - t)) * i_sin_angle; + const PxReal rightw = PxSin(angle * t) * i_sin_angle * sign; + + return left * leftw + right * rightw; + } + + return left; +} + +void physx::shdfnd::integrateTransform(const PxTransform& curTrans, const PxVec3& linvel, const PxVec3& angvel, + PxReal timeStep, PxTransform& result) +{ + result.p = curTrans.p + linvel * timeStep; + + // from void DynamicsContext::integrateAtomPose(PxsRigidBody* atom, Cm::BitMap &shapeChangedMap) const: + // Integrate the rotation using closed form quaternion integrator + PxReal w = angvel.magnitudeSquared(); + + if(w != 0.0f) + { + w = PxSqrt(w); + if(w != 0.0f) + { + const PxReal v = timeStep * w * 0.5f; + const PxReal q = PxCos(v); + const PxReal s = PxSin(v) / w; + + const PxVec3 pqr = angvel * s; + const PxQuat quatVel(pqr.x, pqr.y, pqr.z, 0); + PxQuat out; // need to have temporary, otherwise we may overwrite input if &curTrans == &result. + out = quatVel * curTrans.q; + out.x += curTrans.q.x * q; + out.y += curTrans.q.y * q; + out.z += curTrans.q.z * q; + out.w += curTrans.q.w * q; + result.q = out; + return; + } + } + // orientation stays the same - convert from quat to matrix: + result.q = curTrans.q; +} diff --git a/PxShared/src/foundation/src/PsString.cpp b/PxShared/src/foundation/src/PsString.cpp new file mode 100644 index 0000000..adb29d6 --- /dev/null +++ b/PxShared/src/foundation/src/PsString.cpp @@ -0,0 +1,185 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "PsString.h" +#include <stdarg.h> +#include <stdio.h> +#include <string.h> + +#if PX_WINDOWS_FAMILY +#pragma warning(push) +#pragma warning(disable : 4996) // unsafe string functions +#endif + +#if PX_PS4 || PX_APPLE_FAMILY +#pragma clang diagnostic push +// error : format string is not a string literal +#pragma clang diagnostic ignored "-Wformat-nonliteral" +#endif + +namespace physx +{ +namespace shdfnd +{ +// cross-platform implementations + +int32_t strcmp(const char* str1, const char* str2) +{ + return ::strcmp(str1, str2); +} + +int32_t strncmp(const char* str1, const char* str2, size_t count) +{ + return ::strncmp(str1, str2, count); +} + +int32_t snprintf(char* dst, size_t dstSize, const char* format, ...) +{ + va_list arg; + va_start(arg, format); + int32_t r = shdfnd::vsnprintf(dst, dstSize, format, arg); + va_end(arg); + return r; +} + +int32_t sscanf(const char* buffer, const char* format, ...) +{ + va_list arg; + va_start(arg, format); +#if (PX_VC < 12) && !PX_LINUX + int32_t r = ::sscanf(buffer, format, arg); +#else + int32_t r = ::vsscanf(buffer, format, arg); +#endif + va_end(arg); + + return r; +} + +size_t strlcpy(char* dst, size_t dstSize, const char* src) +{ + size_t i = 0; + if(dst && dstSize) + { + for(; i + 1 < dstSize && src[i]; i++) // copy up to dstSize-1 bytes + dst[i] = src[i]; + dst[i] = 0; // always null-terminate + } + + while(src[i]) // read any remaining characters in the src string to get the length + i++; + + return i; +} + +size_t strlcat(char* dst, size_t dstSize, const char* src) +{ + size_t i = 0, s = 0; + if(dst && dstSize) + { + s = strlen(dst); + for(; i + s + 1 < dstSize && src[i]; i++) // copy until total is at most dstSize-1 + dst[i + s] = src[i]; + dst[i + s] = 0; // always null-terminate + } + + while(src[i]) // read any remaining characters in the src string to get the length + i++; + + return i + s; +} + +void strlwr(char* str) +{ + for(; *str; str++) + if(*str >= 'A' && *str <= 'Z') + *str += 32; +} + +void strupr(char* str) +{ + for(; *str; str++) + if(*str >= 'a' && *str <= 'z') + *str -= 32; +} + +int32_t vsnprintf(char* dst, size_t dstSize, const char* src, va_list arg) +{ + +#if PX_VC // MSVC is not C99-compliant... + int32_t result = dst ? ::vsnprintf(dst, dstSize, src, arg) : -1; + if(dst && (result == int32_t(dstSize) || result < 0)) + dst[dstSize - 1] = 0; // string was truncated or there wasn't room for the NULL + if(result < 0) + result = _vscprintf(src, arg); // work out how long the answer would have been. +#else + int32_t result = ::vsnprintf(dst, dstSize, src, arg); +#endif + return result; +} + +int32_t stricmp(const char* str, const char* str1) +{ +#if PX_VC + return (::_stricmp(str, str1)); +#else + return (::strcasecmp(str, str1)); +#endif +} + +int32_t strnicmp(const char* str, const char* str1, size_t n) +{ +#if PX_VC + return (::_strnicmp(str, str1, n)); +#else + return (::strncasecmp(str, str1, n)); +#endif +} + +void printFormatted(const char* format, ...) +{ + char buf[MAX_PRINTFORMATTED_LENGTH]; + + va_list arg; + va_start(arg, format); + vsnprintf(buf, MAX_PRINTFORMATTED_LENGTH, format, arg); + va_end(arg); + + printString(buf); +} +} +} + +#if PX_PS4 || PX_APPLE_FAMILY +#pragma clang diagnostic pop +#endif + +#if PX_WINDOWS_FAMILY +#pragma warning(pop) +#endif diff --git a/PxShared/src/foundation/src/PsTempAllocator.cpp b/PxShared/src/foundation/src/PsTempAllocator.cpp new file mode 100644 index 0000000..c917a65 --- /dev/null +++ b/PxShared/src/foundation/src/PsTempAllocator.cpp @@ -0,0 +1,129 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "foundation/PxMath.h" + +#include "PsFoundation.h" +#include "PsTempAllocator.h" +#include "PsArray.h" +#include "PsMutex.h" +#include "PsAtomic.h" +#include "PsIntrinsics.h" +#include "PsBitUtils.h" + +#if PX_VC +#pragma warning(disable : 4706) // assignment within conditional expression +#endif + +namespace physx +{ +namespace shdfnd +{ +namespace +{ +typedef TempAllocatorChunk Chunk; +typedef Array<Chunk*, NonTrackingAllocator> AllocFreeTable; + +PX_INLINE Foundation::AllocFreeTable& getFreeTable() +{ + return getFoundation().getTempAllocFreeTable(); +} +PX_INLINE Foundation::Mutex& getMutex() +{ + return getFoundation().getTempAllocMutex(); +} + +const PxU32 sMinIndex = 8; // 256B min +const PxU32 sMaxIndex = 17; // 128kB max +} + +void* TempAllocator::allocate(size_t size, const char* filename, int line) +{ + if(!size) + return 0; + + uint32_t index = PxMax(highestSetBit(uint32_t(size) + sizeof(Chunk) - 1), sMinIndex); + + Chunk* chunk = 0; + if(index < sMaxIndex) + { + Foundation::Mutex::ScopedLock lock(getMutex()); + + // find chunk up to 16x bigger than necessary + Chunk** it = getFreeTable().begin() + index - sMinIndex; + Chunk** end = PxMin(it + 3, getFreeTable().end()); + while(it < end && !(*it)) + ++it; + + if(it < end) + { + // pop top off freelist + chunk = *it; + *it = chunk->mNext; + index = uint32_t(it - getFreeTable().begin() + sMinIndex); + } + else + // create new chunk + chunk = reinterpret_cast<Chunk*>(NonTrackingAllocator().allocate(size_t(2 << index), filename, line)); + } + else + { + // too big for temp allocation, forward to base allocator + chunk = reinterpret_cast<Chunk*>(NonTrackingAllocator().allocate(size + sizeof(Chunk), filename, line)); + } + + chunk->mIndex = index; + void* ret = chunk + 1; + PX_ASSERT((size_t(ret) & 0xf) == 0); // SDK types require at minimum 16 byte allignment. + return ret; +} + +void TempAllocator::deallocate(void* ptr) +{ + if(!ptr) + return; + + Chunk* chunk = reinterpret_cast<Chunk*>(ptr) - 1; + uint32_t index = chunk->mIndex; + + if(index >= sMaxIndex) + return NonTrackingAllocator().deallocate(chunk); + + Foundation::Mutex::ScopedLock lock(getMutex()); + + index -= sMinIndex; + if(getFreeTable().size() <= index) + getFreeTable().resize(index + 1); + + chunk->mNext = getFreeTable()[index]; + getFreeTable()[index] = chunk; +} + +} // namespace shdfnd +} // namespace physx diff --git a/PxShared/src/foundation/src/PsUtilities.cpp b/PxShared/src/foundation/src/PsUtilities.cpp new file mode 100644 index 0000000..c6a6dc4 --- /dev/null +++ b/PxShared/src/foundation/src/PsUtilities.cpp @@ -0,0 +1,73 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "foundation/PxMat33.h" +#include "foundation/PxQuat.h" +#include "foundation/PxTransform.h" +#include "PsUtilities.h" +#include "PsUserAllocated.h" +#include "PsFPU.h" + +namespace physx +{ +namespace shdfnd +{ + +bool checkValid(const float& f) +{ + return PxIsFinite(f); +} +bool checkValid(const PxVec3& v) +{ + return PxIsFinite(v.x) && PxIsFinite(v.y) && PxIsFinite(v.z); +} + +bool checkValid(const PxTransform& t) +{ + return checkValid(t.p) && checkValid(t.q); +} + +bool checkValid(const PxQuat& q) +{ + return PxIsFinite(q.x) && PxIsFinite(q.y) && PxIsFinite(q.z) && PxIsFinite(q.w); +} +bool checkValid(const PxMat33& m) +{ + return PxIsFinite(m(0, 0)) && PxIsFinite(m(1, 0)) && PxIsFinite(m(2, 0)) && PxIsFinite(m(0, 1)) && + PxIsFinite(m(1, 1)) && PxIsFinite(m(2, 1)) && PxIsFinite(m(0, 3)) && PxIsFinite(m(1, 3)) && + PxIsFinite(m(2, 3)); +} +bool checkValid(const char* string) +{ + static const PxU32 maxLength = 4096; + return strnlen(string, maxLength) != maxLength; +} + +} // namespace shdfnd +} // namespace physx diff --git a/PxShared/src/foundation/src/nx/PsNXAtomic.cpp b/PxShared/src/foundation/src/nx/PsNXAtomic.cpp new file mode 100644 index 0000000..1ab5e6b --- /dev/null +++ b/PxShared/src/foundation/src/nx/PsNXAtomic.cpp @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2008-2015, NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA CORPORATION and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA CORPORATION is strictly prohibited. + */ +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#define PAUSE() asm ("nop") + +#include "PsAtomic.h" + +namespace physx +{ +namespace shdfnd +{ + +PxI32 atomicExchange(volatile PxI32* val,PxI32 val2) +{ + PxI32 newVal, oldVal; + + do + { + PAUSE(); + oldVal = *val; + newVal = val2; + } + while (atomicCompareExchange(val, newVal, oldVal) != oldVal); + + return oldVal; +} + +PxI32 atomicCompareExchange(volatile PxI32* dest, PxI32 exch, PxI32 comp) +{ + return __sync_val_compare_and_swap(dest, comp, exch); +} + +void* atomicCompareExchangePointer(volatile void** dest, void* exch, void* comp) +{ + return __sync_val_compare_and_swap((void**)dest, comp, exch); +} + +PxI32 atomicIncrement(volatile PxI32* val) +{ + return __sync_add_and_fetch(val, 1); +} + +PxI32 atomicDecrement(volatile PxI32* val) +{ + return __sync_sub_and_fetch(val, 1); +} + +PxI32 atomicAdd(volatile PxI32* val, PxI32 delta) +{ + return __sync_add_and_fetch(val, delta); +} + +PxI32 atomicMax(volatile PxI32* val, PxI32 val2) +{ + PxI32 oldVal, newVal; + + do + { + PAUSE(); + oldVal = *val; + + if (val2 > oldVal) + newVal = val2; + else + newVal = oldVal; + + } + while (atomicCompareExchange(val, newVal, oldVal) != oldVal); + + return *val; +} + +} // namespace shdfnd +} // namespace physx diff --git a/PxShared/src/foundation/src/nx/PsNXCpu.cpp b/PxShared/src/foundation/src/nx/PsNXCpu.cpp new file mode 100644 index 0000000..47c1354 --- /dev/null +++ b/PxShared/src/foundation/src/nx/PsNXCpu.cpp @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2008-2015, NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA CORPORATION and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA CORPORATION is strictly prohibited. + */ +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#include "PsCpu.h" + +#define cpuid(op, reg) reg[0]=reg[1]=reg[2]=reg[3]=0; + +namespace physx { namespace shdfnd { + + physx::PxU8 Cpu::getCpuId() + { + PxU32 cpuInfo[4]; + cpuid(1, cpuInfo); + return static_cast<physx::PxU8>( cpuInfo[1] >> 24 ); // APIC Physical ID + } +}} diff --git a/PxShared/src/foundation/src/nx/PsNXFPU.cpp b/PxShared/src/foundation/src/nx/PsNXFPU.cpp new file mode 100644 index 0000000..a94354f --- /dev/null +++ b/PxShared/src/foundation/src/nx/PsNXFPU.cpp @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2008-2015, NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA CORPORATION and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA CORPORATION is strictly prohibited. + */ +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. +#include "PsFPU.h" + +#include <cfenv> + +physx::shdfnd::FPUGuard::FPUGuard() +{ + PX_COMPILE_TIME_ASSERT(sizeof(fenv_t) <= sizeof(mControlWords)); + + fegetenv(reinterpret_cast<fenv_t*>(mControlWords)); + fesetenv(FE_DFL_ENV); + + // NX does not seem to support fedisableexcept + //fedisableexcept(FE_ALL_EXCEPT); + + fesetround(FE_TONEAREST); // since this does not seem to be the default mode +} + +physx::shdfnd::FPUGuard::~FPUGuard() +{ + fesetenv(reinterpret_cast<fenv_t*>(mControlWords)); +} + +PX_FOUNDATION_API void physx::shdfnd::enableFPExceptions() +{ + // NX does not seem to support feenableexcept + //feclearexcept(FE_ALL_EXCEPT); + //feenableexcept(FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW); +} + +PX_FOUNDATION_API void physx::shdfnd::disableFPExceptions() +{ + // NX does not seem to support fedisableexcept + //fedisableexcept(FE_ALL_EXCEPT); +} diff --git a/PxShared/src/foundation/src/nx/PsNXMutex.cpp b/PxShared/src/foundation/src/nx/PsNXMutex.cpp new file mode 100644 index 0000000..6d3334a --- /dev/null +++ b/PxShared/src/foundation/src/nx/PsNXMutex.cpp @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2008-2015, NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA CORPORATION and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA CORPORATION is strictly prohibited. + */ +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#include <atomic> +#include "nn/os/os_Mutex.h" +#include "PsFoundation.h" +#include "PsAllocator.h" +#include "PsMutex.h" +#include "PsThread.h" + +namespace physx +{ +namespace shdfnd +{ + +namespace +{ + struct MutexNXImpl + { + nn::os::MutexType lock; + Thread::Id owner; + }; + + MutexNXImpl* getMutex(MutexImpl* impl) + { + return reinterpret_cast<MutexNXImpl*>(impl); + } +} + +MutexImpl::MutexImpl() +{ + nn::os::InitializeMutex(&getMutex(this)->lock, true, 0); +} + +MutexImpl::~MutexImpl() +{ + nn::os::FinalizeMutex(&getMutex(this)->lock); +} + +void MutexImpl::lock() +{ + nn::os::LockMutex(&getMutex(this)->lock); + +#ifdef PX_DEBUG + getMutex(this)->owner = Thread::getId(); +#endif +} + +bool MutexImpl::trylock() +{ + bool success = nn::os::TryLockMutex(&getMutex(this)->lock); +#ifdef PX_DEBUG + if (success) + getMutex(this)->owner = Thread::getId(); +#endif + return success; +} + +void MutexImpl::unlock() +{ +#ifdef PX_DEBUG + // ensure we are already holding the lock + if (getMutex(this)->owner != Thread::getId()) + { + getFoundation().error(PxErrorCode::eINVALID_OPERATION, __FILE__, __LINE__, "Mutex must be unlocked only by thread that has already acquired lock"); + return; + } +#endif + + nn::os::UnlockMutex(&getMutex(this)->lock); +} + +static const PxU32 gSize = sizeof(MutexNXImpl); + +const PxU32& MutexImpl::getSize() { return gSize; } + +class ReadWriteLockImpl +{ + PX_NOCOPY(ReadWriteLockImpl) +public: + ReadWriteLockImpl() : readerCount(0) {} + Mutex mutex; + std::atomic<int> readerCount; //handle recursive writer locking +}; + +ReadWriteLock::ReadWriteLock() +{ + mImpl = reinterpret_cast<ReadWriteLockImpl*>(PX_ALLOC(sizeof(ReadWriteLockImpl), PX_DEBUG_EXP("ReadWriteLockImpl"))); + PX_PLACEMENT_NEW(mImpl, ReadWriteLockImpl); +} + +ReadWriteLock::~ReadWriteLock() +{ + mImpl->~ReadWriteLockImpl(); + PX_FREE( mImpl ); +} + +void ReadWriteLock::lockReader() +{ + mImpl->mutex.lock(); + + mImpl->readerCount.fetch_add(1); + + mImpl->mutex.unlock(); +} + +void ReadWriteLock::lockWriter() +{ + mImpl->mutex.lock(); + + // spin lock until no readers + while (mImpl->readerCount); +} + +void ReadWriteLock::unlockReader() +{ + mImpl->readerCount.fetch_sub(1); +} + +void ReadWriteLock::unlockWriter() +{ + mImpl->mutex.unlock(); +} + +} // namespace shdfnd +} // namespace physx + diff --git a/PxShared/src/foundation/src/nx/PsNXPrintString.cpp b/PxShared/src/foundation/src/nx/PsNXPrintString.cpp new file mode 100644 index 0000000..be5f5ec --- /dev/null +++ b/PxShared/src/foundation/src/nx/PsNXPrintString.cpp @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2008-2015, NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA CORPORATION and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA CORPORATION is strictly prohibited. + */ +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "PsString.h" + +#include <cstdio> +#include <cstdarg> + +#include "nn/nn_Log.h" + +namespace physx +{ +namespace shdfnd +{ + +void printString(const char* str) +{ + NN_LOG(str); + NN_LOG("\n"); +} + +} // namespace shdfnd +} // namespace physx diff --git a/PxShared/src/foundation/src/nx/PsNXSList.cpp b/PxShared/src/foundation/src/nx/PsNXSList.cpp new file mode 100644 index 0000000..b6258cf --- /dev/null +++ b/PxShared/src/foundation/src/nx/PsNXSList.cpp @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2008-2015, NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA CORPORATION and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA CORPORATION is strictly prohibited. + */ +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#include "PsThread.h" // for PxSpinLockPause() +#include "PsSList.h" + +namespace physx +{ +namespace shdfnd +{ + namespace + { + struct ScopedSpinLock + { + private: + PX_NOCOPY(ScopedSpinLock) + + public: + // !!!pthread version need to check + PX_FORCE_INLINE ScopedSpinLock(volatile PxI32& lock): mLock(lock) + { + while (__sync_lock_test_and_set(&mLock, 1)) + { + // spinning without atomics is usually + // causing less bus traffic. -> only one + // CPU is modifying the cache line. + while(lock) + PxSpinLockPause(); + } + } + + PX_FORCE_INLINE ~ScopedSpinLock() + { + __sync_lock_release(&mLock); + } + private: + volatile PxI32& mLock; + }; + + struct SListDetail + { + SListEntry* head; + volatile PxI32 lock; + }; + + template <typename T> + SListDetail* getDetail(T* impl) + { + return reinterpret_cast<SListDetail*>(impl); + } + } + + SListImpl::SListImpl() + { + getDetail(this)->head = NULL; + getDetail(this)->lock = 0; // 0 == unlocked + } + + SListImpl::~SListImpl() + { + } + + void SListImpl::push(SListEntry* entry) + { + ScopedSpinLock lock(getDetail(this)->lock); + entry->mNext = getDetail(this)->head; + getDetail(this)->head = entry; + } + + SListEntry* SListImpl::pop() + { + ScopedSpinLock lock(getDetail(this)->lock); + SListEntry* result = getDetail(this)->head; + if( result != NULL ) + getDetail(this)->head = result->mNext; + return result; + } + + SListEntry* SListImpl::flush() + { + ScopedSpinLock lock(getDetail(this)->lock); + SListEntry* result = getDetail(this)->head; + getDetail(this)->head = NULL; + return result; + } + + static const PxU32 gSize = sizeof(SListDetail); + + const PxU32& SListImpl::getSize() + { + return gSize; + } + +} // namespace shdfnd +} // namespace physx diff --git a/PxShared/src/foundation/src/nx/PsNXSocket.cpp b/PxShared/src/foundation/src/nx/PsNXSocket.cpp new file mode 100644 index 0000000..735ab20 --- /dev/null +++ b/PxShared/src/foundation/src/nx/PsNXSocket.cpp @@ -0,0 +1,417 @@ +/* + * Copyright (c) 2008-2015, NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA CORPORATION and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA CORPORATION is strictly prohibited. + */ +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "PsFoundation.h" +#include "PsSocket.h" +#include "PsThread.h" +#include "PsArray.h" +#include "foundation/PxMemory.h" + +#include <nn/socket.h> +#include <nn/nn_Log.h> + +#define SOCKET_ERROR -1 + +namespace physx +{ +namespace shdfnd +{ + +const PxU32 Socket::DEFAULT_BUFFER_SIZE = 32768; + +class SocketImpl +{ +public: + SocketImpl(bool isBlocking); + virtual ~SocketImpl(); + + bool init(); + bool connect(const char* host, PxU16 port, PxU32 timeout); + void disconnect(); + bool listen(PxU16 /*port*/) + { + return false; // not implemented on this platform + } + bool accept(bool /*block*/) + { + return false; // not implemented on this platform + } + + void setBlocking(bool blocking); + + virtual PxU32 write(const PxU8* data, PxU32 length); + virtual bool flush(); + + PxU32 read(PxU8* data, PxU32 length); + + static void* allocate(size_t size) { return PX_ALLOC(size, "Socket"); } + static void deallocate(void* mem, size_t) { PX_FREE(mem); } + + PX_FORCE_INLINE bool isBlocking() const { return mIsBlocking; } + PX_FORCE_INLINE bool isConnected() const { return mIsConnected; } + PX_FORCE_INLINE const char* getHost() const { return mHost; } + PX_FORCE_INLINE PxU16 getPort() const { return mPort; } + +protected: + int mSocket; + const char* mHost; + PxU16 mPort; + bool mIsConnected; + bool mIsBlocking; +}; + + +class BufferedSocketImpl: public SocketImpl +{ +public: + BufferedSocketImpl(bool isBlocking); + virtual ~BufferedSocketImpl() {}; + bool flush(); + PxU32 write(const PxU8* data, PxU32 length); + +private: + PxU32 mBufferPos; + PxU8 mBuffer[Socket::DEFAULT_BUFFER_SIZE]; +}; + +BufferedSocketImpl::BufferedSocketImpl(bool isBlocking) + : SocketImpl(isBlocking) + , mBufferPos(0) +{} + + +SocketImpl::SocketImpl(bool isBlocking) + : mSocket(SOCKET_ERROR) + , mHost(NULL) + , mPort(0) + , mIsConnected(false) + , mIsBlocking(isBlocking) +{ +} + + +SocketImpl::~SocketImpl() +{ +} + + +bool SocketImpl::init() +{ + mSocket = nn::socket::Socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + return (mSocket != SOCKET_ERROR); +} + + +void SocketImpl::setBlocking(bool blocking) +{ + if (blocking != mIsBlocking) + { + int mode = nn::socket::Fcntl(mSocket, F_GETFL, 0); + if (!blocking) + mode |= O_NONBLOCK; + else + mode &= ~O_NONBLOCK; + int ret = nn::socket::Fcntl(mSocket, F_SETFL, mode); + if (ret != SOCKET_ERROR) + mIsBlocking = blocking; + } +} + + +bool SocketImpl::flush() +{ + return true; +}; + + +bool SocketImpl::connect(const char* host, PxU16 port, PxU32 timeout) +{ + if (!init()) + return false; + + setBlocking(false); + + sockaddr_in socketAddress; + socketAddress.sin_family = AF_INET; + socketAddress.sin_port = nn::socket::InetHtons(port); + + // get host part + int result = nn::socket::InetPton(AF_INET, host, &socketAddress.sin_addr); + PX_UNUSED(result); + PX_ASSERT(result != SOCKET_ERROR); + + if (nn::socket::Connect(mSocket, (sockaddr*)&socketAddress, sizeof(socketAddress)) < 0) + { + if (nn::socket::GetLastErrno() != EINPROGRESS) + { + disconnect(); + return false; + } + + //Use poll function call to monitor the connect call. + pollfd socketDesc; + socketDesc.fd = mSocket; + socketDesc.events = POLLOUT; + socketDesc.revents = 0; + int pollret = nn::socket::Poll(&socketDesc, 1, static_cast<int>(timeout)); + if (pollret != 1 || (socketDesc.revents & POLLERR) || !(socketDesc.revents & POLLOUT)) + { + disconnect(); + return false; + } + + // check if we are really connected, above code seems to return + // true if host is a unix machine even if the connection was + // not accepted. + char buffer; + if(nn::socket::Recv(mSocket, (void*)&buffer, 0, 0) < 0) + { + if(nn::socket::GetLastErrno() != EWOULDBLOCK) + { + disconnect(); + return false; + } + } + } + + setBlocking(mIsBlocking); + mIsConnected = true; + mPort = port; + mHost = host; + + return true; +} + + +void SocketImpl::disconnect() +{ + if (mSocket != SOCKET_ERROR) + { + int result; + if (mIsConnected) + { + setBlocking(true); + result = nn::socket::Shutdown(mSocket, SHUT_RDWR); + PX_UNUSED(result); + PX_ASSERT(result != SOCKET_ERROR); + } + + result = nn::socket::Close(mSocket); + PX_UNUSED(result); + PX_ASSERT(result != SOCKET_ERROR); + mSocket = SOCKET_ERROR; + } + + mIsConnected = false; + mPort = 0; + mHost = NULL; +} + + +PxU32 SocketImpl::write(const PxU8* data, PxU32 length) +{ + int sent = 0; + while((sent = nn::socket::Send(mSocket, (const void*)data, (size_t)length, 0)) == SOCKET_ERROR) + { + if(nn::socket::GetLastErrno() != EWOULDBLOCK) + { + mIsConnected = false; + return 0; + } + } + + return (PxU32)sent; +} + + +PxU32 SocketImpl::read(PxU8* data, PxU32 length) +{ + int bytesReceived = 0; + + // If out of receive buffer, increase it + while((bytesReceived = nn::socket::Recv(mSocket, (void*)data, (size_t)length, 0)) == SOCKET_ERROR && + nn::socket::GetLastErrno() == ENOBUFS) + { + int iBuffSize = (int)length; + + // terminate the loop if we cannot increase the buffer size + if(nn::socket::SetSockOpt(mSocket, SOL_SOCKET, SO_RCVBUF, (void*)&iBuffSize, sizeof(int)) != 0) + break; + } + + if(bytesReceived <= 0) + { + bytesReceived = 0; + mIsConnected = false; + } + + return PxU32(bytesReceived); +} + + +bool BufferedSocketImpl::flush() +{ + PxU32 totalBytesWritten = 0; + PxI32 bytesWritten = 1; + while(totalBytesWritten < mBufferPos && bytesWritten > 0) + { + bytesWritten = SocketImpl::write(mBuffer+totalBytesWritten, mBufferPos-totalBytesWritten); + if(bytesWritten > 0) + totalBytesWritten += bytesWritten; + } + bool ret = (totalBytesWritten == mBufferPos); + mBufferPos = 0; + return ret; +} + + +PxU32 BufferedSocketImpl::write(const PxU8* data, PxU32 length) +{ + PxU32 bytesWritten = 0; + while(length > (Socket::DEFAULT_BUFFER_SIZE - mBufferPos)) + { + PxU32 currentChunk = Socket::DEFAULT_BUFFER_SIZE - mBufferPos; + PxMemCopy(mBuffer+mBufferPos, data+bytesWritten, currentChunk); + mBufferPos = Socket::DEFAULT_BUFFER_SIZE; + if(!flush()) + { + disconnect(); + return bytesWritten; + } + bytesWritten += currentChunk; + length -= currentChunk; + } + if(length > 0) + { + PxMemCopy(mBuffer+mBufferPos, data+bytesWritten, length); + bytesWritten += length; + mBufferPos += length; + } + if(mBufferPos == Socket::DEFAULT_BUFFER_SIZE) + { + if (!flush()) + { + disconnect(); + return bytesWritten; + } + } + return bytesWritten; +} + + +Socket::Socket(bool inEnableBuffering, bool blocking) +{ + if (inEnableBuffering) + { + void* mem = PX_ALLOC(sizeof(BufferedSocketImpl), PX_DEBUG_EXP("BufferedSocketImpl")); + mImpl = PX_PLACEMENT_NEW(mem, BufferedSocketImpl)(blocking); + } + else + { + void* mem = PX_ALLOC(sizeof(SocketImpl), PX_DEBUG_EXP("SocketImpl")); + mImpl = PX_PLACEMENT_NEW(mem, SocketImpl)(blocking); + } +} + + +Socket::~Socket() +{ + mImpl->flush(); + mImpl->disconnect(); + mImpl->~SocketImpl(); + PX_FREE(mImpl); +} + + +bool Socket::connect(const char* host, PxU16 port, PxU32 timeout) +{ + return mImpl->connect(host, port, timeout); +} + + +bool Socket::listen(PxU16 port) +{ + return mImpl->listen(port); +} + + +bool Socket::accept(bool block) +{ + return mImpl->accept(block); +} + + +void Socket::disconnect() +{ + mImpl->disconnect(); +} + + +bool Socket::isConnected() const +{ + return mImpl->isConnected(); +} + + +const char* Socket::getHost() const +{ + return mImpl->getHost(); +} + + +PxU16 Socket::getPort() const +{ + return mImpl->getPort(); +} + + +bool Socket::flush() +{ + if(!mImpl->isConnected()) + return false; + return mImpl->flush(); +} + + +PxU32 Socket::write(const PxU8* data, PxU32 length) +{ + if(!mImpl->isConnected()) + return 0; + return mImpl->write(data, length); +} + + +PxU32 Socket::read(PxU8* data, PxU32 length) +{ + if(!mImpl->isConnected()) + return 0; + return mImpl->read(data, length); +} + + +void Socket::setBlocking(bool blocking) +{ + if(!mImpl->isConnected()) + return; + mImpl->setBlocking(blocking); +} + + +bool Socket::isBlocking() const +{ + if(!mImpl->isConnected()) + return true; + return mImpl->isBlocking(); +} + +} // namespace shdfnd +} // namespace physx diff --git a/PxShared/src/foundation/src/nx/PsNXSync.cpp b/PxShared/src/foundation/src/nx/PsNXSync.cpp new file mode 100644 index 0000000..0a9566d --- /dev/null +++ b/PxShared/src/foundation/src/nx/PsNXSync.cpp @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2008-2015, NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA CORPORATION and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA CORPORATION is strictly prohibited. + */ +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#include "nn/os/os_ConditionVariable.h" +#include "nn/os/os_Tick.h" +#include "nn/nn_TimeSpan.h" +#include "nn/os/os_Mutex.h" +#include "foundation/PxAssert.h" +#include "PsSync.h" + + +namespace physx +{ +namespace shdfnd +{ + namespace + { + class _SyncImpl + { + public: + nn::os::ConditionVariableType syncCondVar; // note: nn::os::EventType is not used because nn::os::ClearEvent() is not atomic + nn::os::MutexType syncMutex; + volatile PxI32 setCounter; + volatile bool is_set; + }; + + _SyncImpl* getSync(SyncImpl* impl) + { + return reinterpret_cast<_SyncImpl*>(impl); + } + } + + static const PxU32 gSize = sizeof(_SyncImpl); + const PxU32& SyncImpl::getSize() { return gSize; } + + struct PxNXScopedLock + { + private: + PX_NOCOPY(PxNXScopedLock) + + public: + PxNXScopedLock(nn::os::MutexType& lock) : mLock(lock) { nn::os::LockMutex(&mLock); } + ~PxNXScopedLock() { nn::os::UnlockMutex(&mLock); } + + private: + nn::os::MutexType& mLock; + }; + + SyncImpl::SyncImpl() + { + _SyncImpl* syncImpl = getSync(this); + nn::os::InitializeMutex(&syncImpl->syncMutex, false, 0); // non-recursive is correct even if it might be slightly confusing the way it is used but nn::os::WaitConditionVariable() unlocks and locks again + nn::os::InitializeConditionVariable(&syncImpl->syncCondVar); + syncImpl->setCounter = 0; + syncImpl->is_set = false; + } + + SyncImpl::~SyncImpl() + { + _SyncImpl* syncImpl = getSync(this); + nn::os::FinalizeConditionVariable(&syncImpl->syncCondVar); + nn::os::FinalizeMutex(&syncImpl->syncMutex); + } + + void SyncImpl::reset() + { + _SyncImpl* syncImpl = getSync(this); + PxNXScopedLock lock(syncImpl->syncMutex); + syncImpl->is_set = false; + } + + void SyncImpl::set() + { + _SyncImpl* syncImpl = getSync(this); + PxNXScopedLock lock(syncImpl->syncMutex); + if(!syncImpl->is_set) + { + syncImpl->setCounter++; + syncImpl->is_set = true; + nn::os::BroadcastConditionVariable(&syncImpl->syncCondVar); + } + } + + bool SyncImpl::wait(PxU32 milliseconds) + { + _SyncImpl* syncImpl = getSync(this); + PxNXScopedLock lock(syncImpl->syncMutex); + PxI32 lastSetCounter = syncImpl->setCounter; + if(!getSync(this)->is_set) + { + if(milliseconds == static_cast<PxU32>(-1)) + { + // have to loop here and check is_set since WaitConditionVariable can return even + // if it was not signaled by BroadcastConditionVariable + while((!syncImpl->is_set) && (lastSetCounter == syncImpl->setCounter)) + nn::os::WaitConditionVariable(&syncImpl->syncCondVar, &syncImpl->syncMutex); + PX_ASSERT(syncImpl->is_set || (lastSetCounter != syncImpl->setCounter)); + } + else + { + const int64_t ticksToWait = (static_cast<int64_t>(milliseconds) * nn::os::GetSystemTickFrequency()) / 1000; + const int64_t targetCounter = nn::os::GetSystemTick().GetInt64Value() + ticksToWait; + const int64_t targetCounterWithMargin = targetCounter - (targetCounter >> 4) + 1; // allow for a bit of error in the wait time (around 6%) + + // have to loop here and check is_set since TimedWaitConditionVariable can return even + // if it was not signaled by BroadcastConditionVariable. Note: to keep it simple, the elapsed + // time is not taken into account in such a case and the original wait time will be used again. + bool timeLimitOk = true; + while((!syncImpl->is_set) && (lastSetCounter == syncImpl->setCounter) && timeLimitOk) + { + nn::os::TimedWaitConditionVariable(&syncImpl->syncCondVar, &syncImpl->syncMutex, nn::os::ConvertToTimeSpan(nn::os::Tick(ticksToWait))); + timeLimitOk = nn::os::GetSystemTick().GetInt64Value() <= targetCounterWithMargin; + } + PX_ASSERT(syncImpl->is_set || (lastSetCounter != syncImpl->setCounter) || (!timeLimitOk)); + } + } + + return syncImpl->is_set || (lastSetCounter != syncImpl->setCounter); + } + +} // namespace shdfnd +} // namespace physx diff --git a/PxShared/src/foundation/src/nx/PsNXThread.cpp b/PxShared/src/foundation/src/nx/PsNXThread.cpp new file mode 100644 index 0000000..a792a66 --- /dev/null +++ b/PxShared/src/foundation/src/nx/PsNXThread.cpp @@ -0,0 +1,404 @@ +/* + * Copyright (c) 2008-2015, NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA CORPORATION and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA CORPORATION is strictly prohibited. + */ +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include <atomic> +#include "PsFoundation.h" +#include "nn/os/os_Thread.h" +#include "nn/nn_TimeSpan.h" + +#include "PsBitUtils.h" +#include "PsThread.h" +#include "foundation/PxAssert.h" + + +namespace physx +{ +namespace shdfnd +{ + +namespace +{ + class _ThreadImpl + { + PX_NOCOPY(_ThreadImpl) + + public: + enum State + { + NotStarted, + Started, + Stopped + }; + + nn::os::ThreadType nativeThread; + + PxU8* stackMemory; + int threadAffinity; + + std::atomic<int> quitNow; + State state; + + ThreadImpl::ExecuteFn fn; + void* arg; + + static const int sInvalidAffinityMask = 0xffffffff; + }; + + _ThreadImpl* getThread(ThreadImpl* impl) + { + return reinterpret_cast<_ThreadImpl*>(impl); + } + + PX_FORCE_INLINE void initThreadImpl(_ThreadImpl* threadImpl) + { + threadImpl->nativeThread._basePriority = nn::os::DefaultThreadPriority; + threadImpl->nativeThread._namePointer = NULL; + threadImpl->state = _ThreadImpl::NotStarted; + threadImpl->quitNow = 0; + threadImpl->threadAffinity = _ThreadImpl::sInvalidAffinityMask; + threadImpl->fn = NULL; + threadImpl->arg = NULL; + threadImpl->stackMemory = NULL; + } + + void ThreadStart(void* ptrArg) + { + // then run either the passed in function or execute from the derived class (Runnable). + _ThreadImpl* impl = getThread(reinterpret_cast<ThreadImpl*>(ptrArg)); + if(impl->fn) + (*impl->fn)(impl->arg); + else if(impl->arg) + (static_cast<Runnable*>(impl->arg))->execute(); + } + + PX_FORCE_INLINE void* allocateStackMemory(size_t size) + { + size_t pad = (nn::os::StackRegionAlignment - 1) + sizeof(size_t); // store offset for delete. + PxU8* base = reinterpret_cast<PxU8*>(::malloc(size + pad)); + if (!base) + return NULL; + + PxU8* ptr = reinterpret_cast<PxU8*>(size_t(base + pad) & ~(nn::os::StackRegionAlignment - 1)); // aligned pointer + (reinterpret_cast<size_t*>(ptr))[-1] = static_cast<size_t>(ptr - base); // store offset + + return ptr; + } + + PX_FORCE_INLINE void freeStackMemory(void* ptr) + { + if (ptr == NULL) + return; + + PxU8* base = reinterpret_cast<PxU8*>(ptr) - (reinterpret_cast<size_t*>(ptr))[-1]; + ::free(base); + } +} + +static const PxU32 gSize = sizeof(_ThreadImpl); +const PxU32& ThreadImpl::getSize() { return gSize; } + + +ThreadImpl::Id ThreadImpl::getId() +{ + return reinterpret_cast<Id>(nn::os::GetCurrentThread()); +} + +ThreadImpl::ThreadImpl() +{ + initThreadImpl(getThread(this)); +} + +ThreadImpl::ThreadImpl(ExecuteFn fn, void* arg) +{ + _ThreadImpl* tImpl = getThread(this); + + initThreadImpl(tImpl); + + tImpl->fn = fn; + tImpl->arg = arg; + + start(0, NULL); +} + +ThreadImpl::~ThreadImpl() +{ + _ThreadImpl* tImpl = getThread(this); + + if ((tImpl->state != _ThreadImpl::NotStarted) && (tImpl->stackMemory != NULL)) + { + nn::os::DestroyThread(&tImpl->nativeThread); + freeStackMemory(reinterpret_cast<void*>(tImpl->stackMemory)); + } +} + +PxU32 ThreadImpl::getDefaultStackSize() +{ + const PxU32 defaultSize = 524288; + PX_COMPILE_TIME_ASSERT((defaultSize % nn::os::StackRegionAlignment) == 0); + return defaultSize; +} + +void ThreadImpl::start(PxU32 stackSize, Runnable* runnable) +{ + _ThreadImpl* tImpl = getThread(this); + if(tImpl->state != _ThreadImpl::NotStarted) + return; + tImpl->state = _ThreadImpl::Started; + + PxU32 newStackSize = getDefaultStackSize(); + if(stackSize != 0) + newStackSize = stackSize; + + PX_ASSERT((newStackSize % nn::os::StackRegionAlignment) == 0); + + // need to provide stack memory as well + // (for other platforms, the system allocates the stack memory, hence it seems ok to not use the user allocator for this) + PxU8* mem = reinterpret_cast<PxU8*>(allocateStackMemory(newStackSize)); + + if (mem) + { + tImpl->stackMemory = mem; + + if(runnable && !tImpl->arg && ! tImpl->fn) + tImpl->arg = runnable; + + const int priority = tImpl->nativeThread._basePriority; + PX_ASSERT((priority <= nn::os::LowestThreadPriority) && (priority >= nn::os::HighestThreadPriority)); + + nn::Result result = nn::os::CreateThread(&tImpl->nativeThread, ThreadStart, this, mem, newStackSize, priority); + + if (result.IsSuccess()) + { + if (tImpl->threadAffinity != _ThreadImpl::sInvalidAffinityMask) + nn::os::SetThreadCoreMask(&tImpl->nativeThread, nn::os::IdealCoreDontCare, static_cast<nn::Bit64>(tImpl->threadAffinity)); + + if (tImpl->nativeThread._namePointer) + nn::os::SetThreadNamePointer(&tImpl->nativeThread, tImpl->nativeThread._namePointer); + + nn::os::StartThread(&tImpl->nativeThread); + } + else + { + freeStackMemory(reinterpret_cast<void*>(mem)); + tImpl->stackMemory = NULL; + PX_ALWAYS_ASSERT(); + } + } +} + +void ThreadImpl::signalQuit() +{ + getThread(this)->quitNow.fetch_add(1); +} + +bool ThreadImpl::waitForQuit() +{ + _ThreadImpl* tImpl = getThread(this); + + if(tImpl->state == _ThreadImpl::NotStarted) + return false; + + nn::os::WaitThread(&tImpl->nativeThread); + + return true; +} + +bool ThreadImpl::quitIsSignalled() +{ + _ThreadImpl* tImpl = getThread(this); + int expected = 0; + return !tImpl->quitNow.compare_exchange_strong(expected, 0); +} + +void ThreadImpl::quit() +{ + getThread(this)->state = _ThreadImpl::Stopped; + + // nothing to call. The thread will return and that will trigger all waiting threads to be informed. +} + +void ThreadImpl::kill() +{ + PX_ASSERT(!"kill() is not implemented for this platform"); + + // nn::os::DestroyThread() waits for the thread to exit, which does seem the wrong behavior for kill() +} + +void ThreadImpl::sleep(PxU32 ms) +{ + nn::os::SleepThread(nn::TimeSpan::FromMilliSeconds(ms)); +} + +void ThreadImpl::yield() +{ + nn::os::YieldThread(); +} + +PxU32 ThreadImpl::setAffinityMask(PxU32 mask) +{ + PX_ASSERT((mask & (~nn::os::GetThreadAvailableCoreMask())) == 0); + + _ThreadImpl* tImpl = getThread(this); + + if (tImpl->state == _ThreadImpl::NotStarted) + { + const int previousMask = tImpl->threadAffinity; + tImpl->threadAffinity = static_cast<int>(mask); + + if (previousMask == _ThreadImpl::sInvalidAffinityMask) + return 0; + else + return static_cast<PxU32>(previousMask); + } + else + { + nn::Bit64 affMask; + nn::os::GetThreadCoreMask(NULL, &affMask, &tImpl->nativeThread); + + nn::os::SetThreadCoreMask(&tImpl->nativeThread, nn::os::IdealCoreDontCare, static_cast<nn::Bit64>(mask)); + + return static_cast<PxU32>(affMask); + } +} + +void ThreadImpl::setName(const char* name) +{ + // important: The memory has to be allocated and managed by the caller + + _ThreadImpl* tImpl = getThread(this); + if(tImpl->state == _ThreadImpl::Started) + nn::os::SetThreadNamePointer(&tImpl->nativeThread, name); + else + tImpl->nativeThread._namePointer = name; +} + +void ThreadImpl::setPriority(ThreadPriority::Enum prio) +{ + _ThreadImpl* tImpl = getThread(this); + + int convertedPriority = ThreadPriority::eNORMAL; // compiler complains if no default is set (even though all values are covered in the switch statement) + switch(prio) + { + case ThreadPriority::eHIGH: + convertedPriority = nn::os::HighestThreadPriority; + break; + case ThreadPriority::eABOVE_NORMAL: + convertedPriority = (nn::os::HighestThreadPriority + nn::os::DefaultThreadPriority) / 2; + break; + case ThreadPriority::eNORMAL: + convertedPriority = nn::os::DefaultThreadPriority; + break; + case ThreadPriority::eBELOW_NORMAL: + convertedPriority = (nn::os::LowestThreadPriority + nn::os::DefaultThreadPriority) / 2; + break; + case ThreadPriority::eLOW: + convertedPriority = nn::os::LowestThreadPriority; + break; + case ThreadPriority::eFORCE_DWORD: + PX_ALWAYS_ASSERT(); + convertedPriority = nn::os::DefaultThreadPriority; + break; + } + + if (tImpl->state == _ThreadImpl::Started) + nn::os::ChangeThreadPriority(&tImpl->nativeThread, convertedPriority); + else + tImpl->nativeThread._basePriority = convertedPriority; +} + +ThreadPriority::Enum ThreadImpl::getPriority(Id tID) +{ + const nn::os::ThreadType* thread = reinterpret_cast<const nn::os::ThreadType*>(tID); + int prio = nn::os::GetThreadCurrentPriority(thread); + ThreadPriority::Enum convertedPriority; + + switch(prio) + { + case nn::os::HighestThreadPriority: + convertedPriority = ThreadPriority::eHIGH; + break; + case ((nn::os::HighestThreadPriority + nn::os::DefaultThreadPriority) / 2): + convertedPriority = ThreadPriority::eABOVE_NORMAL; + break; + case nn::os::DefaultThreadPriority: + convertedPriority = ThreadPriority::eNORMAL; + break; + case ((nn::os::LowestThreadPriority + nn::os::DefaultThreadPriority) / 2): + convertedPriority = ThreadPriority::eBELOW_NORMAL; + break; + case nn::os::LowestThreadPriority: + convertedPriority = ThreadPriority::eLOW; + break; + default: + PX_ALWAYS_ASSERT(); + convertedPriority = ThreadPriority::eNORMAL; + break; + } + + return convertedPriority; +} + +PxU32 ThreadImpl::getNbPhysicalCores() +{ + nn::Bit64 mask = nn::os::GetThreadAvailableCoreMask(); + PX_ASSERT(mask > 0); + + const uint32_t count = bitCount(static_cast<const uint32_t>(mask)); + return count; +} + + +PxU32 TlsAlloc() +{ + // note: only nn::os::TlsSlotCountMax Tls slots are available (which used to be 16) + + nn::os::TlsSlot tlsSlot; + nn::Result result = nn::os::AllocateTlsSlot(&tlsSlot, NULL); + + if (result.IsSuccess()) + return static_cast<PxU32>(tlsSlot._innerValue); + else + { + PX_ALWAYS_ASSERT(); + return 0xffffffff; + } +} + +void TlsFree(PxU32 index) +{ + nn::os::TlsSlot tlsSlot; + tlsSlot._innerValue = static_cast<uint32_t>(index); + + nn::os::FreeTlsSlot(tlsSlot); +} + +void* TlsGet(PxU32 index) +{ + nn::os::TlsSlot tlsSlot; + tlsSlot._innerValue = static_cast<uint32_t>(index); + + return reinterpret_cast<void*>(nn::os::GetTlsValue(tlsSlot)); +} + +PxU32 TlsSet(PxU32 index, void* value) +{ + nn::os::TlsSlot tlsSlot; + tlsSlot._innerValue = static_cast<uint32_t>(index); + + nn::os::SetTlsValue(tlsSlot, reinterpret_cast<uintptr_t>(value)); + + return 1; +} + + +} // namespace shdfnd +} // namespace physx diff --git a/PxShared/src/foundation/src/nx/PsNXTime.cpp b/PxShared/src/foundation/src/nx/PsNXTime.cpp new file mode 100644 index 0000000..977e6ce --- /dev/null +++ b/PxShared/src/foundation/src/nx/PsNXTime.cpp @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2008-2015, NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA CORPORATION and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA CORPORATION is strictly prohibited. + */ +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#include "PsTime.h" +#include "nn/os/os_Tick.h" + + +namespace physx +{ +namespace shdfnd +{ + +namespace +{ + static double sRecipFrequency = 1.0 / nn::os::GetSystemTickFrequency(); +} + +static const CounterFrequencyToTensOfNanos gCounterFreq = Time::getCounterFrequency(); + +const CounterFrequencyToTensOfNanos& Time::getBootCounterFrequency() +{ + return gCounterFreq; +} + +CounterFrequencyToTensOfNanos Time::getCounterFrequency() +{ + return CounterFrequencyToTensOfNanos( Time::sNumTensOfNanoSecondsInASecond, static_cast<PxU64>(nn::os::GetSystemTickFrequency())); +} + + +PxU64 Time::getCurrentCounterValue() +{ + nn::os::Tick tick = nn::os::GetSystemTick(); + return static_cast<PxU64>(tick.GetInt64Value()); +} + +Time::Time(): mTickCount(0) +{ + getElapsedSeconds(); +} + +Time::Second Time::getElapsedSeconds() +{ + PxI64 lastTickCount = mTickCount; + mTickCount = static_cast<PxI64>(nn::os::GetSystemTick().GetInt64Value()); + return (mTickCount - lastTickCount) * sRecipFrequency; +} + +Time::Second Time::peekElapsedSeconds() +{ + return (static_cast<PxI64>(nn::os::GetSystemTick().GetInt64Value()) - mTickCount) * sRecipFrequency; +} + +Time::Second Time::getLastTime() const +{ + return mTickCount * sRecipFrequency; +} + +} // namespace shdfnd +} // namespace physx diff --git a/PxShared/src/foundation/src/unix/PsUnixAtomic.cpp b/PxShared/src/foundation/src/unix/PsUnixAtomic.cpp new file mode 100644 index 0000000..d4e933b --- /dev/null +++ b/PxShared/src/foundation/src/unix/PsUnixAtomic.cpp @@ -0,0 +1,102 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. +#include "Ps.h" +#include "PsAtomic.h" + +#if ! PX_EMSCRIPTEN +#define PAUSE() asm("nop") +#else +#define PAUSE() +#endif + +namespace physx +{ +namespace shdfnd +{ + +void* atomicCompareExchangePointer(volatile void** dest, void* exch, void* comp) +{ + return __sync_val_compare_and_swap(const_cast<void**>(dest), comp, exch); +} + +int32_t atomicCompareExchange(volatile int32_t* dest, int32_t exch, int32_t comp) +{ + return __sync_val_compare_and_swap(dest, comp, exch); +} + +int32_t atomicIncrement(volatile int32_t* val) +{ + return __sync_add_and_fetch(val, 1); +} + +int32_t atomicDecrement(volatile int32_t* val) +{ + return __sync_sub_and_fetch(val, 1); +} + +int32_t atomicAdd(volatile int32_t* val, int32_t delta) +{ + return __sync_add_and_fetch(val, delta); +} + +int32_t atomicMax(volatile int32_t* val, int32_t val2) +{ + int32_t oldVal, newVal; + + do + { + PAUSE(); + oldVal = *val; + + if(val2 > oldVal) + newVal = val2; + else + newVal = oldVal; + + } while(atomicCompareExchange(val, newVal, oldVal) != oldVal); + + return *val; +} + +int32_t atomicExchange(volatile int32_t* val, int32_t val2) +{ + int32_t newVal, oldVal; + + do + { + PAUSE(); + oldVal = *val; + newVal = val2; + } while(atomicCompareExchange(val, newVal, oldVal) != oldVal); + + return oldVal; +} + +} // namespace shdfnd +} // namespace physx diff --git a/PxShared/src/foundation/src/unix/PsUnixCpu.cpp b/PxShared/src/foundation/src/unix/PsUnixCpu.cpp new file mode 100644 index 0000000..0139fe4 --- /dev/null +++ b/PxShared/src/foundation/src/unix/PsUnixCpu.cpp @@ -0,0 +1,58 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "foundation/PxSimpleTypes.h" +#include "PsCpu.h" + +#if PX_X86 && !defined(__EMSCRIPTEN__) +#define cpuid(op, reg) \ + __asm__ __volatile__("pushl %%ebx \n\t" /* save %ebx */ \ + "cpuid \n\t" \ + "movl %%ebx, %1 \n\t" /* save what cpuid just put in %ebx */ \ + "popl %%ebx \n\t" /* restore the old %ebx */ \ + : "=a"(reg[0]), "=r"(reg[1]), "=c"(reg[2]), "=d"(reg[3]) \ + : "a"(op) \ + : "cc") +#else +#define cpuid(op, reg) reg[0] = reg[1] = reg[2] = reg[3] = 0; +#endif + +namespace physx +{ +namespace shdfnd +{ + +uint8_t Cpu::getCpuId() +{ + uint32_t cpuInfo[4]; + cpuid(1, cpuInfo); + return static_cast<uint8_t>(cpuInfo[1] >> 24); // APIC Physical ID +} +} +} diff --git a/PxShared/src/foundation/src/unix/PsUnixFPU.cpp b/PxShared/src/foundation/src/unix/PsUnixFPU.cpp new file mode 100644 index 0000000..e12fa5f --- /dev/null +++ b/PxShared/src/foundation/src/unix/PsUnixFPU.cpp @@ -0,0 +1,117 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. +#include "PsFPU.h" + +#if !(defined(__CYGWIN__) || PX_ANDROID || PX_PS4) +#include <fenv.h> +PX_COMPILE_TIME_ASSERT(8 * sizeof(uint32_t) >= sizeof(fenv_t)); +#endif + +#if PX_OSX +// osx defines SIMD as standard for floating point operations. +#include <xmmintrin.h> +#endif + +physx::shdfnd::FPUGuard::FPUGuard() +{ +#if defined(__CYGWIN__) +#pragma message "FPUGuard::FPUGuard() is not implemented" +#elif PX_ANDROID +// not supported unless ARM_HARD_FLOAT is enabled. +#elif PX_PS4 + // not supported + PX_UNUSED(mControlWords); +#elif PX_OSX + mControlWords[0] = _mm_getcsr(); + // set default (disable exceptions: _MM_MASK_MASK) and FTZ (_MM_FLUSH_ZERO_ON), DAZ (_MM_DENORMALS_ZERO_ON: (1<<6)) + _mm_setcsr(_MM_MASK_MASK | _MM_FLUSH_ZERO_ON | (1 << 6)); +#elif defined(__EMSCRIPTEN__) +// not supported +#else + PX_COMPILE_TIME_ASSERT(sizeof(fenv_t) <= sizeof(mControlWords)); + + fegetenv(reinterpret_cast<fenv_t*>(mControlWords)); + fesetenv(FE_DFL_ENV); + +#if PX_LINUX + // need to explicitly disable exceptions because fesetenv does not modify + // the sse control word on 32bit linux (64bit is fine, but do it here just be sure) + fedisableexcept(FE_ALL_EXCEPT); +#endif + +#endif +} + +physx::shdfnd::FPUGuard::~FPUGuard() +{ +#if defined(__CYGWIN__) +#pragma message "FPUGuard::~FPUGuard() is not implemented" +#elif PX_ANDROID +// not supported unless ARM_HARD_FLOAT is enabled. +#elif PX_PS4 +// not supported +#elif PX_OSX + // restore control word and clear exception flags + // (setting exception state flags cause exceptions on the first following fp operation) + _mm_setcsr(mControlWords[0] & ~_MM_EXCEPT_MASK); +#elif defined(__EMSCRIPTEN__) +// not supported +#else + fesetenv(reinterpret_cast<fenv_t*>(mControlWords)); +#endif +} + +PX_FOUNDATION_API void physx::shdfnd::enableFPExceptions() +{ +#if PX_LINUX && !defined(__EMSCRIPTEN__) + feclearexcept(FE_ALL_EXCEPT); + feenableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW); +#elif PX_OSX + // clear any pending exceptions + // (setting exception state flags cause exceptions on the first following fp operation) + uint32_t control = _mm_getcsr() & ~_MM_EXCEPT_MASK; + + // enable all fp exceptions except inexact and underflow (common, benign) + // note: denorm has to be disabled as well because underflow can create denorms + _mm_setcsr((control & ~_MM_MASK_MASK) | _MM_MASK_INEXACT | _MM_MASK_UNDERFLOW | _MM_MASK_DENORM); + +#endif +} + +PX_FOUNDATION_API void physx::shdfnd::disableFPExceptions() +{ +#if PX_LINUX && !defined(__EMSCRIPTEN__) + fedisableexcept(FE_ALL_EXCEPT); +#elif PX_OSX + // clear any pending exceptions + // (setting exception state flags cause exceptions on the first following fp operation) + uint32_t control = _mm_getcsr() & ~_MM_EXCEPT_MASK; + _mm_setcsr(control | _MM_MASK_MASK); +#endif +} diff --git a/PxShared/src/foundation/src/unix/PsUnixMutex.cpp b/PxShared/src/foundation/src/unix/PsUnixMutex.cpp new file mode 100644 index 0000000..092ec1e --- /dev/null +++ b/PxShared/src/foundation/src/unix/PsUnixMutex.cpp @@ -0,0 +1,171 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "foundation/PxAssert.h" +#include "foundation/PxErrorCallback.h" + +#include "Ps.h" +#include "PsFoundation.h" +#include "PsUserAllocated.h" +#include "PsMutex.h" +#include "PsAtomic.h" +#include "PsThread.h" + +#include <pthread.h> + +namespace physx +{ +namespace shdfnd +{ + +namespace +{ +struct MutexUnixImpl +{ + pthread_mutex_t lock; + Thread::Id owner; +}; + +MutexUnixImpl* getMutex(MutexImpl* impl) +{ + return reinterpret_cast<MutexUnixImpl*>(impl); +} +} + +MutexImpl::MutexImpl() +{ + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); +#if !PX_ANDROID + // mimic default windows behavior where applicable + pthread_mutexattr_setprotocol(&attr, PTHREAD_PRIO_INHERIT); +#endif + pthread_mutex_init(&getMutex(this)->lock, &attr); + pthread_mutexattr_destroy(&attr); +} + +MutexImpl::~MutexImpl() +{ + pthread_mutex_destroy(&getMutex(this)->lock); +} + +void MutexImpl::lock() +{ + int err = pthread_mutex_lock(&getMutex(this)->lock); + PX_ASSERT(!err); + PX_UNUSED(err); + +#if PX_DEBUG + getMutex(this)->owner = Thread::getId(); +#endif +} + +bool MutexImpl::trylock() +{ + bool success = !pthread_mutex_trylock(&getMutex(this)->lock); +#if PX_DEBUG + if(success) + getMutex(this)->owner = Thread::getId(); +#endif + return success; +} + +void MutexImpl::unlock() +{ +#if PX_DEBUG + if(getMutex(this)->owner != Thread::getId()) + { + shdfnd::getFoundation().error(PxErrorCode::eINVALID_OPERATION, __FILE__, __LINE__, + "Mutex must be unlocked only by thread that has already acquired lock"); + return; + } +#endif + + int err = pthread_mutex_unlock(&getMutex(this)->lock); + PX_ASSERT(!err); + PX_UNUSED(err); +} + +const uint32_t gSize = sizeof(MutexUnixImpl); +const uint32_t& MutexImpl::getSize() +{ + return gSize; +} + +class ReadWriteLockImpl +{ + public: + Mutex mutex; + volatile int readerCounter; +}; + +ReadWriteLock::ReadWriteLock() +{ + mImpl = reinterpret_cast<ReadWriteLockImpl*>(PX_ALLOC(sizeof(ReadWriteLockImpl), "ReadWriteLockImpl")); + PX_PLACEMENT_NEW(mImpl, ReadWriteLockImpl); + + mImpl->readerCounter = 0; +} + +ReadWriteLock::~ReadWriteLock() +{ + mImpl->~ReadWriteLockImpl(); + PX_FREE(mImpl); +} + +void ReadWriteLock::lockReader() +{ + mImpl->mutex.lock(); + + atomicIncrement(&mImpl->readerCounter); + + mImpl->mutex.unlock(); +} + +void ReadWriteLock::lockWriter() +{ + mImpl->mutex.lock(); + + while(mImpl->readerCounter != 0) + ; +} + +void ReadWriteLock::unlockReader() +{ + atomicDecrement(&mImpl->readerCounter); +} + +void ReadWriteLock::unlockWriter() +{ + mImpl->mutex.unlock(); +} + +} // namespace shdfnd +} // namespace physx diff --git a/PxShared/src/foundation/src/unix/PsUnixPrintString.cpp b/PxShared/src/foundation/src/unix/PsUnixPrintString.cpp new file mode 100644 index 0000000..3c937a4 --- /dev/null +++ b/PxShared/src/foundation/src/unix/PsUnixPrintString.cpp @@ -0,0 +1,52 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "PsString.h" +#include <stdio.h> + +#if PX_ANDROID +#include <android/log.h> +#endif + +namespace physx +{ +namespace shdfnd +{ + +void printString(const char* str) +{ +#if PX_ANDROID + __android_log_print(ANDROID_LOG_INFO, "PsPrintString", "%s", str); +#else + puts(str); +#endif +} + +} // namespace shdfnd +} // namespace physx diff --git a/PxShared/src/foundation/src/unix/PsUnixSList.cpp b/PxShared/src/foundation/src/unix/PsUnixSList.cpp new file mode 100644 index 0000000..5dd8ac3 --- /dev/null +++ b/PxShared/src/foundation/src/unix/PsUnixSList.cpp @@ -0,0 +1,158 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "PsAllocator.h" +#include "PsAtomic.h" +#include "PsSList.h" +#include "PsThread.h" +#include <pthread.h> + +#if PX_IOS || PX_EMSCRIPTEN +#define USE_MUTEX +#endif + +namespace physx +{ +namespace shdfnd +{ +namespace +{ +#if defined(USE_MUTEX) +class ScopedMutexLock +{ + pthread_mutex_t& mMutex; + + public: + PX_INLINE ScopedMutexLock(pthread_mutex_t& mutex) : mMutex(mutex) + { + pthread_mutex_lock(&mMutex); + } + + PX_INLINE ~ScopedMutexLock() + { + pthread_mutex_unlock(&mMutex); + } +}; + +typedef ScopedMutexLock ScopedLock; +#else +struct ScopedSpinLock +{ + PX_FORCE_INLINE ScopedSpinLock(volatile int32_t& lock) : mLock(lock) + { + while(__sync_lock_test_and_set(&mLock, 1)) + { + // spinning without atomics is usually + // causing less bus traffic. -> only one + // CPU is modifying the cache line. + while(lock) + PxSpinLockPause(); + } + } + + PX_FORCE_INLINE ~ScopedSpinLock() + { + __sync_lock_release(&mLock); + } + + private: + volatile int32_t& mLock; +}; + +typedef ScopedSpinLock ScopedLock; +#endif + +struct SListDetail +{ + SListEntry* head; +#if defined(USE_MUTEX) + pthread_mutex_t lock; +#else + volatile int32_t lock; +#endif +}; + +template <typename T> +SListDetail* getDetail(T* impl) +{ + return reinterpret_cast<SListDetail*>(impl); +} +} + +SListImpl::SListImpl() +{ + getDetail(this)->head = NULL; + +#if defined(USE_MUTEX) + pthread_mutex_init(&getDetail(this)->lock, NULL); +#else + getDetail(this)->lock = 0; // 0 == unlocked +#endif +} + +SListImpl::~SListImpl() +{ +#if defined(USE_MUTEX) + pthread_mutex_destroy(&getDetail(this)->lock); +#endif +} + +void SListImpl::push(SListEntry* entry) +{ + ScopedLock lock(getDetail(this)->lock); + entry->mNext = getDetail(this)->head; + getDetail(this)->head = entry; +} + +SListEntry* SListImpl::pop() +{ + ScopedLock lock(getDetail(this)->lock); + SListEntry* result = getDetail(this)->head; + if(result != NULL) + getDetail(this)->head = result->mNext; + return result; +} + +SListEntry* SListImpl::flush() +{ + ScopedLock lock(getDetail(this)->lock); + SListEntry* result = getDetail(this)->head; + getDetail(this)->head = NULL; + return result; +} + +static const uint32_t gSize = sizeof(SListDetail); + +const uint32_t& SListImpl::getSize() +{ + return gSize; +} + +} // namespace shdfnd +} // namespace physx diff --git a/PxShared/src/foundation/src/unix/PsUnixSocket.cpp b/PxShared/src/foundation/src/unix/PsUnixSocket.cpp new file mode 100644 index 0000000..bc4cb5c --- /dev/null +++ b/PxShared/src/foundation/src/unix/PsUnixSocket.cpp @@ -0,0 +1,483 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "foundation/PxIntrinsics.h" + +#include "PsSocket.h" + +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/select.h> +#include <sys/time.h> +#if !PX_PS4 +#include <netdb.h> +#include <arpa/inet.h> +#else +#include <ps4/PsPS4Socket.h> +#endif +#include <fcntl.h> +#include <errno.h> +#include <unistd.h> + +#define INVALID_SOCKET -1 + +#ifndef SOMAXCONN +#define SOMAXCONN 5 +#endif + +namespace physx +{ +namespace shdfnd +{ + +const uint32_t Socket::DEFAULT_BUFFER_SIZE = 32768; + +class SocketImpl +{ + public: + SocketImpl(bool isBlocking); + virtual ~SocketImpl(); + + bool connect(const char* host, uint16_t port, uint32_t timeout); + bool listen(uint16_t port); + bool accept(bool block); + void disconnect(); + + void setBlocking(bool blocking); + + virtual uint32_t write(const uint8_t* data, uint32_t length); + virtual bool flush(); + uint32_t read(uint8_t* data, uint32_t length); + + PX_FORCE_INLINE bool isBlocking() const + { + return mIsBlocking; + } + PX_FORCE_INLINE bool isConnected() const + { + return mIsConnected; + } + PX_FORCE_INLINE const char* getHost() const + { + return mHost; + } + PX_FORCE_INLINE uint16_t getPort() const + { + return mPort; + } + + protected: + bool nonBlockingTimeout() const; + + int32_t mSocket; + int32_t mListenSocket; + const char* mHost; + uint16_t mPort; + bool mIsConnected; + bool mIsBlocking; + bool mListenMode; +}; + +void socketSetBlockingInternal(int32_t socket, bool blocking); + +SocketImpl::SocketImpl(bool isBlocking) +: mSocket(INVALID_SOCKET) +, mListenSocket(INVALID_SOCKET) +, mHost(NULL) +, mPort(0) +, mIsConnected(false) +, mIsBlocking(isBlocking) +, mListenMode(false) +{ +} + +SocketImpl::~SocketImpl() +{ +} + +bool SocketImpl::connect(const char* host, uint16_t port, uint32_t timeout) +{ + sockaddr_in socketAddress; + intrinsics::memSet(&socketAddress, 0, sizeof(sockaddr_in)); + socketAddress.sin_family = AF_INET; + socketAddress.sin_port = htons(port); + +#if PX_PS4 + socketAddress.sin_addr.s_addr = resolveName(host, timeout); +#else + // get host + hostent* hp = gethostbyname(host); + if(!hp) + { + in_addr a; + a.s_addr = inet_addr(host); + hp = gethostbyaddr(reinterpret_cast<const char*>(&a), sizeof(in_addr), AF_INET); + if(!hp) + return false; + } + intrinsics::memCopy(&socketAddress.sin_addr, hp->h_addr_list[0], hp->h_length); +#endif + // connect + mSocket = socket(AF_INET, SOCK_STREAM, 0); + if(mSocket == INVALID_SOCKET) + return false; + + socketSetBlockingInternal(mSocket, false); + + int connectRet = ::connect(mSocket, reinterpret_cast<sockaddr*>(&socketAddress), sizeof(socketAddress)); + if(connectRet < 0) + { + if(errno != EINPROGRESS) + { + disconnect(); + return false; + } + + // Setup select function call to monitor the connect call. + fd_set writefs; + fd_set exceptfs; + FD_ZERO(&writefs); + FD_ZERO(&exceptfs); + FD_SET(mSocket, &writefs); + FD_SET(mSocket, &exceptfs); + timeval timeout_; + timeout_.tv_sec = timeout / 1000; + timeout_.tv_usec = (timeout % 1000) * 1000; + int selret = ::select(mSocket + 1, NULL, &writefs, &exceptfs, &timeout_); + int excepted = FD_ISSET(mSocket, &exceptfs); + int canWrite = FD_ISSET(mSocket, &writefs); + if(selret != 1 || excepted || !canWrite) + { + disconnect(); + return false; + } + + // check if we are really connected, above code seems to return + // true if host is a unix machine even if the connection was + // not accepted. + char buffer; + if(recv(mSocket, &buffer, 0, 0) < 0) + { + if(errno != EWOULDBLOCK) + { + disconnect(); + return false; + } + } + } + + socketSetBlockingInternal(mSocket, mIsBlocking); + +#if PX_APPLE_FAMILY + int noSigPipe = 1; + setsockopt(mSocket, SOL_SOCKET, SO_NOSIGPIPE, &noSigPipe, sizeof(int)); +#endif + + mIsConnected = true; + mPort = port; + mHost = host; + return true; +} + +bool SocketImpl::listen(uint16_t port) +{ + mListenSocket = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); + if(mListenSocket == INVALID_SOCKET) + return false; + + // enable address reuse: "Address already in use" error message + int yes = 1; + if(setsockopt(mListenSocket, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(int)) == -1) + return false; + + mListenMode = true; + + sockaddr_in addr; + addr.sin_family = AF_INET; + addr.sin_port = htons(port); + addr.sin_addr.s_addr = INADDR_ANY; + intrinsics::memSet(addr.sin_zero, '\0', sizeof addr.sin_zero); + + return bind(mListenSocket, reinterpret_cast<sockaddr*>(&addr), sizeof(addr)) != -1 && + ::listen(mListenSocket, SOMAXCONN) != -1; +} + +bool SocketImpl::accept(bool block) +{ + if(mIsConnected || !mListenMode) + return false; + + // set the listen socket to be non-blocking. + socketSetBlockingInternal(mListenSocket, block); + int32_t clientSocket = ::accept(mListenSocket, 0, 0); + if(clientSocket == INVALID_SOCKET) + return false; + + mSocket = clientSocket; + mIsConnected = true; + socketSetBlockingInternal(mSocket, mIsBlocking); // force the mode to whatever the user set + + return mIsConnected; +} + +void SocketImpl::disconnect() +{ + if(mListenSocket != INVALID_SOCKET) + { + close(mListenSocket); + mListenSocket = INVALID_SOCKET; + } + if(mSocket != INVALID_SOCKET) + { + if(mIsConnected) + { + socketSetBlockingInternal(mSocket, true); + shutdown(mSocket, SHUT_RDWR); + } + close(mSocket); + mSocket = INVALID_SOCKET; + } + mIsConnected = false; + mListenMode = false; + mPort = 0; + mHost = NULL; +} + +bool SocketImpl::nonBlockingTimeout() const +{ + return !mIsBlocking && errno == EWOULDBLOCK; +} + +#if !PX_PS4 +void socketSetBlockingInternal(int32_t socket, bool blocking) +{ + int mode = fcntl(socket, F_GETFL, 0); + if(!blocking) + mode |= O_NONBLOCK; + else + mode &= ~O_NONBLOCK; + fcntl(socket, F_SETFL, mode); +} +#endif + +// should be cross-platform from here down + +void SocketImpl::setBlocking(bool blocking) +{ + if(blocking != mIsBlocking) + { + mIsBlocking = blocking; + if(isConnected()) + socketSetBlockingInternal(mSocket, blocking); + } +} + +bool SocketImpl::flush() +{ + return true; +} + +uint32_t SocketImpl::write(const uint8_t* data, uint32_t length) +{ + if(length == 0) + return 0; + + int sent = send(mSocket, reinterpret_cast<const char*>(data), int32_t(length), 0); + + if(sent <= 0 && !nonBlockingTimeout()) + disconnect(); + + return uint32_t(sent > 0 ? sent : 0); +} + +uint32_t SocketImpl::read(uint8_t* data, uint32_t length) +{ + if(length == 0) + return 0; + + int32_t received = recv(mSocket, reinterpret_cast<char*>(data), int32_t(length), 0); + + if(received <= 0 && !nonBlockingTimeout()) + disconnect(); + + return uint32_t(received > 0 ? received : 0); +} + +class BufferedSocketImpl : public SocketImpl +{ + public: + BufferedSocketImpl(bool isBlocking) : SocketImpl(isBlocking), mBufferPos(0) + { + } + virtual ~BufferedSocketImpl() + { + } + bool flush(); + uint32_t write(const uint8_t* data, uint32_t length); + + private: + uint32_t mBufferPos; + uint8_t mBuffer[Socket::DEFAULT_BUFFER_SIZE]; +}; + +bool BufferedSocketImpl::flush() +{ + uint32_t totalBytesWritten = 0; + + while(totalBytesWritten < mBufferPos && mIsConnected) + totalBytesWritten += int32_t(SocketImpl::write(mBuffer + totalBytesWritten, mBufferPos - totalBytesWritten)); + + bool ret = (totalBytesWritten == mBufferPos); + mBufferPos = 0; + return ret; +} + +uint32_t BufferedSocketImpl::write(const uint8_t* data, uint32_t length) +{ + uint32_t bytesWritten = 0; + while(mBufferPos + length >= Socket::DEFAULT_BUFFER_SIZE) + { + uint32_t currentChunk = Socket::DEFAULT_BUFFER_SIZE - mBufferPos; + intrinsics::memCopy(mBuffer + mBufferPos, data + bytesWritten, currentChunk); + bytesWritten += uint32_t(currentChunk); // for the user, this is consumed even if we fail to shove it down a + // non-blocking socket + + uint32_t sent = SocketImpl::write(mBuffer, Socket::DEFAULT_BUFFER_SIZE); + mBufferPos = Socket::DEFAULT_BUFFER_SIZE - sent; + + if(sent < Socket::DEFAULT_BUFFER_SIZE) // non-blocking or error + { + if(sent) // we can reasonably hope this is rare + intrinsics::memMove(mBuffer, mBuffer + sent, mBufferPos); + + return bytesWritten; + } + length -= currentChunk; + } + + if(length > 0) + { + intrinsics::memCopy(mBuffer + mBufferPos, data + bytesWritten, length); + bytesWritten += length; + mBufferPos += length; + } + + return bytesWritten; +} + +Socket::Socket(bool inIsBuffering, bool isBlocking) +{ + if(inIsBuffering) + { + void* mem = PX_ALLOC(sizeof(BufferedSocketImpl), "BufferedSocketImpl"); + mImpl = PX_PLACEMENT_NEW(mem, BufferedSocketImpl)(isBlocking); + } + else + { + void* mem = PX_ALLOC(sizeof(SocketImpl), "SocketImpl"); + mImpl = PX_PLACEMENT_NEW(mem, SocketImpl)(isBlocking); + } +} + +Socket::~Socket() +{ + mImpl->flush(); + mImpl->disconnect(); + mImpl->~SocketImpl(); + PX_FREE(mImpl); +} + +bool Socket::connect(const char* host, uint16_t port, uint32_t timeout) +{ + return mImpl->connect(host, port, timeout); +} + +bool Socket::listen(uint16_t port) +{ + return mImpl->listen(port); +} + +bool Socket::accept(bool block) +{ + return mImpl->accept(block); +} + +void Socket::disconnect() +{ + mImpl->disconnect(); +} + +bool Socket::isConnected() const +{ + return mImpl->isConnected(); +} + +const char* Socket::getHost() const +{ + return mImpl->getHost(); +} + +uint16_t Socket::getPort() const +{ + return mImpl->getPort(); +} + +bool Socket::flush() +{ + if(!mImpl->isConnected()) + return false; + return mImpl->flush(); +} + +uint32_t Socket::write(const uint8_t* data, uint32_t length) +{ + if(!mImpl->isConnected()) + return 0; + return mImpl->write(data, length); +} + +uint32_t Socket::read(uint8_t* data, uint32_t length) +{ + if(!mImpl->isConnected()) + return 0; + return mImpl->read(data, length); +} + +void Socket::setBlocking(bool blocking) +{ + mImpl->setBlocking(blocking); +} + +bool Socket::isBlocking() const +{ + return mImpl->isBlocking(); +} + +} // namespace shdfnd +} // namespace physx diff --git a/PxShared/src/foundation/src/unix/PsUnixSync.cpp b/PxShared/src/foundation/src/unix/PsUnixSync.cpp new file mode 100644 index 0000000..aedbbe0 --- /dev/null +++ b/PxShared/src/foundation/src/unix/PsUnixSync.cpp @@ -0,0 +1,165 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "foundation/PxAssert.h" + +#include "Ps.h" +#include "PsUserAllocated.h" +#include "PsSync.h" + +#include <errno.h> +#include <stdio.h> +#include <pthread.h> +#include <time.h> +#include <sys/time.h> + +namespace physx +{ +namespace shdfnd +{ + +namespace +{ +class _SyncImpl +{ + public: + pthread_mutex_t mutex; + pthread_cond_t cond; + volatile int setCounter; + volatile bool is_set; +}; + +_SyncImpl* getSync(SyncImpl* impl) +{ + return reinterpret_cast<_SyncImpl*>(impl); +} +} + +static const uint32_t gSize = sizeof(_SyncImpl); +const uint32_t& SyncImpl::getSize() +{ + return gSize; +} + +struct PxUnixScopeLock +{ + PxUnixScopeLock(pthread_mutex_t& m) : mMutex(m) + { + pthread_mutex_lock(&mMutex); + } + + ~PxUnixScopeLock() + { + pthread_mutex_unlock(&mMutex); + } + + private: + pthread_mutex_t& mMutex; +}; + +SyncImpl::SyncImpl() +{ + int status = pthread_mutex_init(&getSync(this)->mutex, 0); + PX_ASSERT(!status); + status = pthread_cond_init(&getSync(this)->cond, 0); + PX_ASSERT(!status); + PX_UNUSED(status); + getSync(this)->is_set = false; + getSync(this)->setCounter = 0; +} + +SyncImpl::~SyncImpl() +{ + pthread_cond_destroy(&getSync(this)->cond); + pthread_mutex_destroy(&getSync(this)->mutex); +} + +void SyncImpl::reset() +{ + PxUnixScopeLock lock(getSync(this)->mutex); + getSync(this)->is_set = false; +} + +void SyncImpl::set() +{ + PxUnixScopeLock lock(getSync(this)->mutex); + if(!getSync(this)->is_set) + { + getSync(this)->is_set = true; + getSync(this)->setCounter++; + pthread_cond_broadcast(&getSync(this)->cond); + } +} + +bool SyncImpl::wait(uint32_t ms) +{ + PxUnixScopeLock lock(getSync(this)->mutex); + int lastSetCounter = getSync(this)->setCounter; + if(!getSync(this)->is_set) + { + if(ms == uint32_t(-1)) + { + // have to loop here and check is_set since pthread_cond_wait can return successfully + // even if it was not signaled by pthread_cond_broadcast (OS efficiency design decision) + int status = 0; + while(!status && !getSync(this)->is_set && (lastSetCounter == getSync(this)->setCounter)) + status = pthread_cond_wait(&getSync(this)->cond, &getSync(this)->mutex); + PX_ASSERT((!status && getSync(this)->is_set) || (lastSetCounter != getSync(this)->setCounter)); + } + else + { + timespec ts; + timeval tp; + gettimeofday(&tp, NULL); + uint32_t sec = ms / 1000; + uint32_t usec = (ms - 1000 * sec) * 1000; + + // sschirm: taking into account that us might accumulate to a second + // otherwise the pthread_cond_timedwait complains on osx. + usec = tp.tv_usec + usec; + uint32_t div_sec = usec / 1000000; + uint32_t rem_usec = usec - div_sec * 1000000; + + ts.tv_sec = tp.tv_sec + sec + div_sec; + ts.tv_nsec = rem_usec * 1000; + + // have to loop here and check is_set since pthread_cond_timedwait can return successfully + // even if it was not signaled by pthread_cond_broadcast (OS efficiency design decision) + int status = 0; + while(!status && !getSync(this)->is_set && (lastSetCounter == getSync(this)->setCounter)) + status = pthread_cond_timedwait(&getSync(this)->cond, &getSync(this)->mutex, &ts); + PX_ASSERT((!status && getSync(this)->is_set) || (status == ETIMEDOUT) || + (lastSetCounter != getSync(this)->setCounter)); + } + } + return getSync(this)->is_set || (lastSetCounter != getSync(this)->setCounter); +} + +} // namespace shdfnd +} // namespace physx diff --git a/PxShared/src/foundation/src/unix/PsUnixThread.cpp b/PxShared/src/foundation/src/unix/PsUnixThread.cpp new file mode 100644 index 0000000..cb369e0 --- /dev/null +++ b/PxShared/src/foundation/src/unix/PsUnixThread.cpp @@ -0,0 +1,472 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "foundation/PxAssert.h" +#include "foundation/PxErrorCallback.h" + +#include "Ps.h" +#include "PsFoundation.h" +#include "PsAtomic.h" +#include "PsThread.h" + +#include <math.h> +#if !PX_APPLE_FAMILY && !defined(ANDROID) && !defined(__CYGWIN__) && !PX_PS4 && !PX_EMSCRIPTEN +#include <bits/local_lim.h> // PTHREAD_STACK_MIN +#endif +#include <stdio.h> +#include <pthread.h> +#include <unistd.h> +#if !PX_PS4 +#include <sys/syscall.h> +#if !PX_APPLE_FAMILY && !PX_EMSCRIPTEN +#include <asm/unistd.h> +#include <sys/resource.h> +#endif +#endif + +#if PX_APPLE_FAMILY +#include <sys/types.h> +#include <sys/sysctl.h> +#include <TargetConditionals.h> +#include <pthread.h> +#endif + +// fwd +#if defined(ANDROID) +extern "C" { +int android_getCpuCount(void); +} +#endif + +#define PxSpinLockPause() asm("nop") + +namespace physx +{ +namespace shdfnd +{ + +namespace +{ + +typedef enum +{ + _PxThreadNotStarted, + _PxThreadStarted, + _PxThreadStopped +} PxThreadState; + +class _ThreadImpl +{ + public: + ThreadImpl::ExecuteFn fn; + void* arg; + volatile int32_t quitNow; + volatile int32_t threadStarted; + volatile int32_t state; + + pthread_t thread; + pid_t tid; + + uint32_t affinityMask; +}; + +_ThreadImpl* getThread(ThreadImpl* impl) +{ + return reinterpret_cast<_ThreadImpl*>(impl); +} + +static void setTid(_ThreadImpl& threadImpl) +{ +// query TID +#if PX_PS4 || (defined (TARGET_OS_TV) && TARGET_OS_TV) +// AM: TODO: neither of the below are implemented +#elif PX_APPLE_FAMILY + threadImpl.tid = syscall(SYS_gettid); +#elif PX_EMSCRIPTEN + threadImpl.tid = pthread_self(); +#else + threadImpl.tid = syscall(__NR_gettid); +#endif + + // notify/unblock parent thread + atomicCompareExchange(&(threadImpl.threadStarted), 1, 0); +} + +void* PxThreadStart(void* arg) +{ + _ThreadImpl* impl = getThread(reinterpret_cast<ThreadImpl*>(arg)); + impl->state = _PxThreadStarted; + + // run setTid in thread's context + setTid(*impl); + + // then run either the passed in function or execute from the derived class (Runnable). + if(impl->fn) + (*impl->fn)(impl->arg); + else if(impl->arg) + (reinterpret_cast<Runnable*>(impl->arg))->execute(); + return 0; +} +} + +static const uint32_t gSize = sizeof(_ThreadImpl); +const uint32_t& ThreadImpl::getSize() +{ + return gSize; +} + +ThreadImpl::Id ThreadImpl::getId() +{ + return Id(pthread_self()); +} + +ThreadImpl::ThreadImpl() +{ + getThread(this)->thread = 0; + getThread(this)->tid = 0; + getThread(this)->state = _PxThreadNotStarted; + getThread(this)->quitNow = 0; + getThread(this)->threadStarted = 0; + getThread(this)->fn = NULL; + getThread(this)->arg = NULL; + getThread(this)->affinityMask = 0; +} + +ThreadImpl::ThreadImpl(ThreadImpl::ExecuteFn fn, void* arg) +{ + getThread(this)->thread = 0; + getThread(this)->tid = 0; + getThread(this)->state = _PxThreadNotStarted; + getThread(this)->quitNow = 0; + getThread(this)->threadStarted = 0; + getThread(this)->fn = fn; + getThread(this)->arg = arg; + getThread(this)->affinityMask = 0; + + start(0, NULL); +} + +ThreadImpl::~ThreadImpl() +{ + if(getThread(this)->state == _PxThreadStarted) + kill(); +} + +void ThreadImpl::start(uint32_t stackSize, Runnable* runnable) +{ + if(getThread(this)->state != _PxThreadNotStarted) + return; + + if(stackSize == 0) + stackSize = getDefaultStackSize(); + +#if defined(PTHREAD_STACK_MIN) && !defined(ANDROID) + if(stackSize < PTHREAD_STACK_MIN) + { + shdfnd::getFoundation().error(PxErrorCode::eDEBUG_WARNING, __FILE__, __LINE__, + "ThreadImpl::start(): stack size was set below PTHREAD_STACK_MIN"); + stackSize = PTHREAD_STACK_MIN; + } +#endif + + if(runnable && !getThread(this)->arg && !getThread(this)->fn) + getThread(this)->arg = runnable; + + pthread_attr_t attr; + int status = pthread_attr_init(&attr); + PX_ASSERT(!status); + PX_UNUSED(status); + + status = pthread_attr_setstacksize(&attr, stackSize); + PX_ASSERT(!status); + status = pthread_create(&getThread(this)->thread, &attr, PxThreadStart, this); + PX_ASSERT(!status); + + // wait for thread to startup and write out TID + // otherwise TID dependent calls like setAffinity will fail. + while(atomicCompareExchange(&(getThread(this)->threadStarted), 1, 1) == 0) + yield(); + + // here we are sure that getThread(this)->state >= _PxThreadStarted + + status = pthread_attr_destroy(&attr); + PX_ASSERT(!status); + + // apply stored affinity mask + if(getThread(this)->affinityMask) + setAffinityMask(getThread(this)->affinityMask); +} + +void ThreadImpl::signalQuit() +{ + atomicIncrement(&(getThread(this)->quitNow)); +} + +bool ThreadImpl::waitForQuit() +{ + if(getThread(this)->state == _PxThreadNotStarted) + return false; + + // works also with a stopped/exited thread if the handle is still valid + pthread_join(getThread(this)->thread, NULL); + return true; +} + +bool ThreadImpl::quitIsSignalled() +{ + return atomicCompareExchange(&(getThread(this)->quitNow), 0, 0) != 0; +} + +#if defined(PX_GCC_FAMILY) +__attribute__((noreturn)) +#endif + void ThreadImpl::quit() +{ + getThread(this)->state = _PxThreadStopped; + pthread_exit(0); +} + +void ThreadImpl::kill() +{ +#ifndef ANDROID + if(getThread(this)->state == _PxThreadStarted) + pthread_cancel(getThread(this)->thread); + getThread(this)->state = _PxThreadStopped; +#else + shdfnd::getFoundation().error(PxErrorCode::eDEBUG_WARNING, __FILE__, __LINE__, + "ThreadImpl::kill() called, but is not implemented"); +#endif +} + +void ThreadImpl::sleep(uint32_t ms) +{ + timespec sleepTime; + uint32_t remainder = ms % 1000; + sleepTime.tv_sec = ms - remainder; + sleepTime.tv_nsec = remainder * 1000000L; + + while(nanosleep(&sleepTime, &sleepTime) == -1) + continue; +} + +void ThreadImpl::yield() +{ + sched_yield(); +} + +#if PX_PS4 +uint32_t setAffinityMaskPS4(pthread_t, uint32_t); +#endif + +uint32_t ThreadImpl::setAffinityMask(uint32_t mask) +{ + // Same as windows impl if mask is zero + if(!mask) + return 0; + + getThread(this)->affinityMask = mask; + + uint64_t prevMask = 0; + + if(getThread(this)->state == _PxThreadStarted) + { +#if PX_PS4 + prevMask = setAffinityMaskPS4(getThread(this)->thread, mask); +#elif PX_EMSCRIPTEN + // not supported +#elif !PX_APPLE_FAMILY // Apple doesn't support syscall with getaffinity and setaffinity + int32_t errGet = syscall(__NR_sched_getaffinity, getThread(this)->tid, sizeof(prevMask), &prevMask); + if(errGet < 0) + return 0; + + int32_t errSet = syscall(__NR_sched_setaffinity, getThread(this)->tid, sizeof(mask), &mask); + if(errSet != 0) + return 0; +#endif + } + + return uint32_t(prevMask); +} + +void ThreadImpl::setName(const char* name) +{ +#if(defined(ANDROID) && (__ANDROID_API__ > 8)) + pthread_setname_np(getThread(this)->thread, name); +#else + // not implemented because most unix APIs expect setName() + // to be called from the thread's context. Example see next comment: + + // this works only with the current thread and can rename + // the main process if used in the wrong context: + // prctl(PR_SET_NAME, reinterpret_cast<unsigned long>(name) ,0,0,0); + PX_UNUSED(name); +#endif +} + +#if !PX_APPLE_FAMILY +static ThreadPriority::Enum convertPriorityFromLinux(uint32_t inPrio, int policy) +{ + PX_COMPILE_TIME_ASSERT(ThreadPriority::eLOW > ThreadPriority::eHIGH); + PX_COMPILE_TIME_ASSERT(ThreadPriority::eHIGH == 0); + + int maxL = sched_get_priority_max(policy); + int minL = sched_get_priority_min(policy); + int rangeL = maxL - minL; + int rangeNv = ThreadPriority::eLOW - ThreadPriority::eHIGH; + + // case for default scheduler policy + if(rangeL == 0) + return ThreadPriority::eNORMAL; + + float floatPrio = (float(maxL - inPrio) * float(rangeNv)) / float(rangeL); + + return ThreadPriority::Enum(int(roundf(floatPrio))); +} + +static int convertPriorityToLinux(ThreadPriority::Enum inPrio, int policy) +{ + int maxL = sched_get_priority_max(policy); + int minL = sched_get_priority_min(policy); + int rangeL = maxL - minL; + int rangeNv = ThreadPriority::eLOW - ThreadPriority::eHIGH; + + // case for default scheduler policy + if(rangeL == 0) + return 0; + + float floatPrio = (float(ThreadPriority::eLOW - inPrio) * float(rangeL)) / float(rangeNv); + + return minL + int(roundf(floatPrio)); +} +#endif + +void ThreadImpl::setPriority(ThreadPriority::Enum val) +{ + PX_UNUSED(val); +#if !PX_APPLE_FAMILY + int policy; + sched_param s_param; + pthread_getschedparam(getThread(this)->thread, &policy, &s_param); + s_param.sched_priority = convertPriorityToLinux(val, policy); + pthread_setschedparam(getThread(this)->thread, policy, &s_param); +#endif +} + +ThreadPriority::Enum ThreadImpl::getPriority(Id pthread) +{ + PX_UNUSED(pthread); +#if !PX_APPLE_FAMILY + int policy; + sched_param s_param; + int ret = pthread_getschedparam(pthread_t(pthread), &policy, &s_param); + if(ret == 0) + return convertPriorityFromLinux(s_param.sched_priority, policy); + else + return ThreadPriority::eNORMAL; +#else + return ThreadPriority::eNORMAL; +#endif +} + +uint32_t ThreadImpl::getNbPhysicalCores() +{ +#if PX_APPLE_FAMILY + int count; + size_t size = sizeof(count); + return sysctlbyname("hw.physicalcpu", &count, &size, NULL, 0) ? 0 : count; +#elif defined(ANDROID) + return android_getCpuCount(); +#else + // Linux exposes CPU topology using /sys/devices/system/cpu + // https://www.kernel.org/doc/Documentation/cputopology.txt + if(FILE* f = fopen("/sys/devices/system/cpu/possible", "r")) + { + int minIndex, maxIndex; + int n = fscanf(f, "%d-%d", &minIndex, &maxIndex); + fclose(f); + + if(n == 2) + return (maxIndex - minIndex) + 1; + else if(n == 1) + return minIndex + 1; + } + +#if PX_PS4 + // Reducing to 6 to take into account that the OS appears to use 2 cores at peak currently. + return 6; +#else + // For non-Linux kernels this fallback is possibly the best we can do + // but will report logical (hyper-threaded) counts + int n = sysconf(_SC_NPROCESSORS_CONF); + if(n < 0) + return 0; + else + return n; +#endif +#endif +} + +uint32_t TlsAlloc() +{ + pthread_key_t key; + int status = pthread_key_create(&key, NULL); + PX_ASSERT(!status); + PX_UNUSED(status); + return uint32_t(key); +} + +void TlsFree(uint32_t index) +{ + int status = pthread_key_delete(pthread_key_t(index)); + PX_ASSERT(!status); + PX_UNUSED(status); +} + +void* TlsGet(uint32_t index) +{ + return reinterpret_cast<void*>(pthread_getspecific(pthread_key_t(index))); +} + +uint32_t TlsSet(uint32_t index, void* value) +{ + int status = pthread_setspecific(pthread_key_t(index), value); + PX_ASSERT(!status); + return !status; +} + +// DM: On Linux x86-32, without implementation-specific restrictions +// the default stack size for a new thread should be 2 megabytes (kernel.org). +// NOTE: take care of this value on other architecutres! +uint32_t ThreadImpl::getDefaultStackSize() +{ + return 1 << 21; +} + +} // namespace shdfnd +} // namespace physx diff --git a/PxShared/src/foundation/src/unix/PsUnixTime.cpp b/PxShared/src/foundation/src/unix/PsUnixTime.cpp new file mode 100644 index 0000000..43b94f5 --- /dev/null +++ b/PxShared/src/foundation/src/unix/PsUnixTime.cpp @@ -0,0 +1,120 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "Ps.h" +#include "PsTime.h" + +#include <time.h> +#include <sys/time.h> + +#if PX_APPLE_FAMILY +#include <mach/mach_time.h> +#endif + +// Use real-time high-precision timer. +#if !PX_APPLE_FAMILY +#define CLOCKID CLOCK_REALTIME +#endif + +namespace physx +{ +namespace shdfnd +{ + +static const CounterFrequencyToTensOfNanos gCounterFreq = Time::getCounterFrequency(); + +const CounterFrequencyToTensOfNanos& Time::getBootCounterFrequency() +{ + return gCounterFreq; +} + +static Time::Second getTimeSeconds() +{ + static struct timeval _tv; + gettimeofday(&_tv, NULL); + return double(_tv.tv_sec) + double(_tv.tv_usec) * 0.000001; +} + +Time::Time() +{ + mLastTime = getTimeSeconds(); +} + +Time::Second Time::getElapsedSeconds() +{ + Time::Second curTime = getTimeSeconds(); + Time::Second diff = curTime - mLastTime; + mLastTime = curTime; + return diff; +} + +Time::Second Time::peekElapsedSeconds() +{ + Time::Second curTime = getTimeSeconds(); + Time::Second diff = curTime - mLastTime; + return diff; +} + +Time::Second Time::getLastTime() const +{ + return mLastTime; +} + +#if PX_APPLE_FAMILY +CounterFrequencyToTensOfNanos Time::getCounterFrequency() +{ + mach_timebase_info_data_t info; + mach_timebase_info(&info); + // mach_absolute_time * (info.numer/info.denom) is in units of nano seconds + return CounterFrequencyToTensOfNanos(info.numer, info.denom * 10); +} + +uint64_t Time::getCurrentCounterValue() +{ + return mach_absolute_time(); +} + +#else + +CounterFrequencyToTensOfNanos Time::getCounterFrequency() +{ + return CounterFrequencyToTensOfNanos(1, 10); +} + +uint64_t Time::getCurrentCounterValue() +{ + struct timespec mCurrTimeInt; + clock_gettime(CLOCKID, &mCurrTimeInt); + // Convert to nanos as this doesn't cause a large divide here + return (static_cast<uint64_t>(mCurrTimeInt.tv_sec) * 1000000000) + (static_cast<uint64_t>(mCurrTimeInt.tv_nsec)); +} +#endif + +} // namespace shdfnd +} // namespace physx diff --git a/PxShared/src/foundation/src/windows/PsWindowsAtomic.cpp b/PxShared/src/foundation/src/windows/PsWindowsAtomic.cpp new file mode 100644 index 0000000..97cdba2 --- /dev/null +++ b/PxShared/src/foundation/src/windows/PsWindowsAtomic.cpp @@ -0,0 +1,96 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "windows/PsWindowsInclude.h" +#include "PsAtomic.h" + +namespace physx +{ +namespace shdfnd +{ + +int32_t atomicExchange(volatile int32_t* val, int32_t val2) +{ + return (int32_t)InterlockedExchange((volatile LONG*)val, (LONG)val2); +} + +int32_t atomicCompareExchange(volatile int32_t* dest, int32_t exch, int32_t comp) +{ + return (int32_t)InterlockedCompareExchange((volatile LONG*)dest, exch, comp); +} + +void* atomicCompareExchangePointer(volatile void** dest, void* exch, void* comp) +{ + return InterlockedCompareExchangePointer((volatile PVOID*)dest, exch, comp); +} + +int32_t atomicIncrement(volatile int32_t* val) +{ + return (int32_t)InterlockedIncrement((volatile LONG*)val); +} + +int32_t atomicDecrement(volatile int32_t* val) +{ + return (int32_t)InterlockedDecrement((volatile LONG*)val); +} + +int32_t atomicAdd(volatile int32_t* val, int32_t delta) +{ + LONG newValue, oldValue; + do + { + oldValue = *val; + newValue = oldValue + delta; + } while(InterlockedCompareExchange((volatile LONG*)val, newValue, oldValue) != oldValue); + + return newValue; +} + +int32_t atomicMax(volatile int32_t* val, int32_t val2) +{ + // Could do this more efficiently in asm... + + LONG newValue, oldValue; + + do + { + oldValue = *val; + + if(val2 > oldValue) + newValue = val2; + else + newValue = oldValue; + + } while(InterlockedCompareExchange((volatile LONG*)val, newValue, oldValue) != oldValue); + + return newValue; +} + +} // namespace shdfnd +} // namespace physx diff --git a/PxShared/src/foundation/src/windows/PsWindowsCpu.cpp b/PxShared/src/foundation/src/windows/PsWindowsCpu.cpp new file mode 100644 index 0000000..14c78fb --- /dev/null +++ b/PxShared/src/foundation/src/windows/PsWindowsCpu.cpp @@ -0,0 +1,64 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "PsCpu.h" +#pragma warning(push) +//'symbol' is not defined as a preprocessor macro, replacing with '0' for 'directives' +#pragma warning(disable : 4668) +#if PX_VC == 10 +#pragma warning(disable : 4987) // nonstandard extension used: 'throw (...)' +#endif +#include <intrin.h> +#pragma warning(pop) + +namespace physx +{ +namespace shdfnd +{ + +#if PX_ARM +#define cpuid(reg) reg[0] = reg[1] = reg[2] = reg[3] = 0; + +uint8_t Cpu::getCpuId() +{ + uint32_t cpuInfo[4]; + cpuid(cpuInfo); + return static_cast<uint8_t>(cpuInfo[1] >> 24); // APIC Physical ID +} +#else +uint8_t Cpu::getCpuId() +{ + int CPUInfo[4]; + int InfoType = 1; + __cpuid(CPUInfo, InfoType); + return static_cast<uint8_t>(CPUInfo[1] >> 24); // APIC Physical ID +} +#endif +} +} diff --git a/PxShared/src/foundation/src/windows/PsWindowsFPU.cpp b/PxShared/src/foundation/src/windows/PsWindowsFPU.cpp new file mode 100644 index 0000000..e83cccf --- /dev/null +++ b/PxShared/src/foundation/src/windows/PsWindowsFPU.cpp @@ -0,0 +1,88 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. +#include "PsFPU.h" +#include "float.h" +#include "PsIntrinsics.h" + +#if PX_X64 +#define _MCW_ALL _MCW_DN | _MCW_EM | _MCW_RC +#else +#define _MCW_ALL _MCW_DN | _MCW_EM | _MCW_IC | _MCW_RC | _MCW_PC +#endif + +physx::shdfnd::FPUGuard::FPUGuard() +{ +// default plus FTZ and DAZ +#if PX_X64 + // query current control word state + _controlfp_s(mControlWords, 0, 0); + + // set both x87 and sse units to default + DAZ + unsigned int cw; + _controlfp_s(&cw, _CW_DEFAULT | _DN_FLUSH, _MCW_ALL); +#else + // query current control word state + __control87_2(0, 0, mControlWords, mControlWords + 1); + + // set both x87 and sse units to default + DAZ + unsigned int x87, sse; + __control87_2(_CW_DEFAULT | _DN_FLUSH, _MCW_ALL, &x87, &sse); +#endif +} + +physx::shdfnd::FPUGuard::~FPUGuard() +{ + _clearfp(); + +#if PX_X64 + // reset FP state + unsigned int cw; + _controlfp_s(&cw, *mControlWords, _MCW_ALL); +#else + + // reset FP state + unsigned int x87, sse; + __control87_2(mControlWords[0], _MCW_ALL, &x87, 0); + __control87_2(mControlWords[1], _MCW_ALL, 0, &sse); +#endif +} + +void physx::shdfnd::enableFPExceptions() +{ + // clear any pending exceptions + _clearfp(); + + // enable all fp exceptions except inexact and underflow (common, benign) + _controlfp_s(NULL, uint32_t(~_MCW_EM) | _EM_INEXACT | _EM_UNDERFLOW, _MCW_EM); +} + +void physx::shdfnd::disableFPExceptions() +{ + _controlfp_s(NULL, _MCW_EM, _MCW_EM); +} diff --git a/PxShared/src/foundation/src/windows/PsWindowsMutex.cpp b/PxShared/src/foundation/src/windows/PsWindowsMutex.cpp new file mode 100644 index 0000000..6174b96 --- /dev/null +++ b/PxShared/src/foundation/src/windows/PsWindowsMutex.cpp @@ -0,0 +1,163 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "windows/PsWindowsInclude.h" +#include "PsFoundation.h" +#include "PsUserAllocated.h" +#include "PsMutex.h" +#include "PsThread.h" +#include "foundation/PxErrorCallback.h" + +namespace physx +{ +namespace shdfnd +{ + +namespace +{ +struct MutexWinImpl +{ + CRITICAL_SECTION mLock; + Thread::Id mOwner; +}; + +MutexWinImpl* getMutex(MutexImpl* impl) +{ + return reinterpret_cast<MutexWinImpl*>(impl); +} +} + +MutexImpl::MutexImpl() +{ + InitializeCriticalSection(&getMutex(this)->mLock); + getMutex(this)->mOwner = 0; +} + +MutexImpl::~MutexImpl() +{ + DeleteCriticalSection(&getMutex(this)->mLock); +} + +void MutexImpl::lock() +{ + EnterCriticalSection(&getMutex(this)->mLock); + +#if PX_DEBUG + getMutex(this)->mOwner = Thread::getId(); +#endif +} + +bool MutexImpl::trylock() +{ + bool success = TryEnterCriticalSection(&getMutex(this)->mLock) != 0; +#if PX_DEBUG + if(success) + getMutex(this)->mOwner = Thread::getId(); +#endif + return success; +} + +void MutexImpl::unlock() +{ +#if PX_DEBUG + // ensure we are already holding the lock + if(getMutex(this)->mOwner != Thread::getId()) + { + shdfnd::getFoundation().error(PxErrorCode::eINVALID_OPERATION, __FILE__, __LINE__, + "Mutex must be unlocked only by thread that has already acquired lock"); + return; + } + +#endif + + LeaveCriticalSection(&getMutex(this)->mLock); +} + +static const uint32_t gSize = sizeof(MutexWinImpl); + +const uint32_t& MutexImpl::getSize() +{ + return gSize; +} + +class ReadWriteLockImpl +{ + PX_NOCOPY(ReadWriteLockImpl) + public: + ReadWriteLockImpl() + { + } + Mutex mutex; + volatile LONG readerCount; // handle recursive writer locking +}; + +ReadWriteLock::ReadWriteLock() +{ + mImpl = reinterpret_cast<ReadWriteLockImpl*>(PX_ALLOC(sizeof(ReadWriteLockImpl), "ReadWriteLockImpl")); + PX_PLACEMENT_NEW(mImpl, ReadWriteLockImpl); + + mImpl->readerCount = 0; +} + +ReadWriteLock::~ReadWriteLock() +{ + mImpl->~ReadWriteLockImpl(); + PX_FREE(mImpl); +} + +void ReadWriteLock::lockReader() +{ + mImpl->mutex.lock(); + + InterlockedIncrement(&mImpl->readerCount); + + mImpl->mutex.unlock(); +} + +void ReadWriteLock::lockWriter() +{ + mImpl->mutex.lock(); + + // spin lock until no readers + while(mImpl->readerCount) + ; +} + +void ReadWriteLock::unlockReader() +{ + InterlockedDecrement(&mImpl->readerCount); +} + +void ReadWriteLock::unlockWriter() +{ + mImpl->mutex.unlock(); +} + +} // namespace shdfnd +} // namespace physx diff --git a/PxShared/src/foundation/src/windows/PsWindowsPrintString.cpp b/PxShared/src/foundation/src/windows/PsWindowsPrintString.cpp new file mode 100644 index 0000000..ee72037 --- /dev/null +++ b/PxShared/src/foundation/src/windows/PsWindowsPrintString.cpp @@ -0,0 +1,54 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "PsString.h" +#include <stdio.h> +#pragma warning(push) +#pragma warning(disable : 4668) //'symbol' is not defined as a preprocessor macro, replacing with '0' for 'directives' +#include <windows.h> +#pragma warning(pop) + +#include <stdio.h> +#include <string.h> +#include <stdarg.h> + +namespace physx +{ +namespace shdfnd +{ + +void printString(const char* str) +{ + puts(str); // do not use printf here, since str can contain multiple % signs that will not be printed + OutputDebugStringA(str); + OutputDebugStringA("\n"); +} +} + +} // namespace physx diff --git a/PxShared/src/foundation/src/windows/PsWindowsSList.cpp b/PxShared/src/foundation/src/windows/PsWindowsSList.cpp new file mode 100644 index 0000000..146785e --- /dev/null +++ b/PxShared/src/foundation/src/windows/PsWindowsSList.cpp @@ -0,0 +1,79 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "windows/PsWindowsInclude.h" +#include "PsAllocator.h" +#include "PsSList.h" + +namespace physx +{ +namespace shdfnd +{ +namespace +{ +template <typename T> +SLIST_HEADER* getDetail(T* impl) +{ + return reinterpret_cast<SLIST_HEADER*>(impl); +} +} + +SListImpl::SListImpl() +{ + InitializeSListHead(getDetail(this)); +} + +SListImpl::~SListImpl() +{ +} + +void SListImpl::push(SListEntry* entry) +{ + InterlockedPushEntrySList(getDetail(this), reinterpret_cast<SLIST_ENTRY*>(entry)); +} + +SListEntry* SListImpl::pop() +{ + return reinterpret_cast<SListEntry*>(InterlockedPopEntrySList(getDetail(this))); +} + +SListEntry* SListImpl::flush() +{ + return reinterpret_cast<SListEntry*>(InterlockedFlushSList(getDetail(this))); +} + +static const uint32_t gSize = sizeof(SLIST_HEADER); + +const uint32_t& SListImpl::getSize() +{ + return gSize; +} + +} // namespace shdfnd +} // namespace physx diff --git a/PxShared/src/foundation/src/windows/PsWindowsSocket.cpp b/PxShared/src/foundation/src/windows/PsWindowsSocket.cpp new file mode 100644 index 0000000..bd253b9 --- /dev/null +++ b/PxShared/src/foundation/src/windows/PsWindowsSocket.cpp @@ -0,0 +1,446 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "foundation/PxIntrinsics.h" + +#include "windows/PsWindowsInclude.h" +#include "PsSocket.h" +#include "PsThread.h" +#include "PsArray.h" + +#include <Winsock2.h> +#pragma comment(lib, "Ws2_32") + +namespace physx +{ +namespace shdfnd +{ + +const uint32_t Socket::DEFAULT_BUFFER_SIZE = 32768; + +class SocketImpl +{ + public: + SocketImpl(bool isBlocking); + virtual ~SocketImpl(); + + bool connect(const char* host, uint16_t port, uint32_t timeout); + bool listen(uint16_t port); + bool accept(bool block); + void disconnect(); + + void setBlocking(bool blocking); + + virtual uint32_t write(const uint8_t* data, uint32_t length); + virtual bool flush(); + uint32_t read(uint8_t* data, uint32_t length); + + PX_FORCE_INLINE bool isBlocking() const + { + return mIsBlocking; + } + PX_FORCE_INLINE bool isConnected() const + { + return mIsConnected; + } + PX_FORCE_INLINE const char* getHost() const + { + return mHost; + } + PX_FORCE_INLINE uint16_t getPort() const + { + return mPort; + } + + protected: + bool nonBlockingTimeout() const; + void setBlockingInternal(SOCKET socket, bool blocking); + + mutable SOCKET mSocket; + SOCKET mListenSocket; + const char* mHost; + uint16_t mPort; + mutable bool mIsConnected; + bool mIsBlocking; + bool mListenMode; + bool mSocketLayerIntialized; +}; + +SocketImpl::SocketImpl(bool isBlocking) +: mSocket(INVALID_SOCKET) +, mListenSocket(INVALID_SOCKET) +, mPort(0) +, mHost(NULL) +, mIsConnected(false) +, mIsBlocking(isBlocking) +, mListenMode(false) +, mSocketLayerIntialized(false) +{ + WORD vreq; + WSADATA wsaData; + vreq = MAKEWORD(2, 2); + mSocketLayerIntialized = (WSAStartup(vreq, &wsaData) == 0); +} + +SocketImpl::~SocketImpl() +{ + if(mSocketLayerIntialized) + WSACleanup(); +} + +void SocketImpl::setBlockingInternal(SOCKET socket, bool blocking) +{ + uint32_t mode = uint32_t(blocking ? 0 : 1); + ioctlsocket(socket, FIONBIO, (u_long*)&mode); +} + +#ifdef PX_VC11 +#pragma warning(push) +#pragma warning(disable : 4548) // for FD_SET on vc11 only +#endif +bool SocketImpl::connect(const char* host, uint16_t port, uint32_t timeout) +{ + if(!mSocketLayerIntialized) + return false; + + sockaddr_in socketAddress; + hostent* hp; + + intrinsics::memSet(&socketAddress, 0, sizeof(sockaddr_in)); + socketAddress.sin_family = AF_INET; + socketAddress.sin_port = htons(port); + + // get host + hp = gethostbyname(host); + if(!hp) + { + in_addr a; + a.s_addr = inet_addr(host); + hp = gethostbyaddr((const char*)&a, sizeof(in_addr), AF_INET); + if(!hp) + return false; + } + intrinsics::memCopy(&socketAddress.sin_addr, hp->h_addr_list[0], (uint32_t)hp->h_length); + + // connect + mSocket = socket(PF_INET, SOCK_STREAM, 0); + if(mSocket == INVALID_SOCKET) + return false; + + setBlockingInternal(mSocket, false); + + ::connect(mSocket, (sockaddr*)&socketAddress, sizeof(socketAddress)); + // Setup select function call to monitor the connect call. + fd_set writefs; + fd_set exceptfs; + FD_ZERO(&writefs); + FD_ZERO(&exceptfs); +#pragma warning(push) +#pragma warning(disable : 4127 4548) + FD_SET(mSocket, &writefs); + FD_SET(mSocket, &exceptfs); +#pragma warning(pop) + timeval timeout_; + timeout_.tv_sec = long(timeout / 1000); + timeout_.tv_usec = long(((timeout % 1000) * 1000)); + int selret = ::select(1, NULL, &writefs, &exceptfs, &timeout_); + int excepted = FD_ISSET(mSocket, &exceptfs); + int canWrite = FD_ISSET(mSocket, &writefs); + if(selret != 1 || excepted || !canWrite) + { + disconnect(); + return false; + } + + setBlockingInternal(mSocket, mIsBlocking); + + mIsConnected = true; + mPort = port; + mHost = host; + return true; +} +#ifdef PX_VC11 +#pragma warning(pop) +#endif + +bool SocketImpl::listen(uint16_t port) +{ + if(!mSocketLayerIntialized) + return false; + + mListenSocket = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); + if(mListenSocket == INVALID_SOCKET) + return false; + + mListenMode = true; + + sockaddr_in addr = { 0 }; + addr.sin_family = AF_INET; + addr.sin_port = htons(port); + addr.sin_addr.s_addr = htonl(INADDR_ANY); + return bind(mListenSocket, (sockaddr*)&addr, sizeof(addr)) == 0 && ::listen(mListenSocket, SOMAXCONN) == 0; +} + +bool SocketImpl::accept(bool block) +{ + if(mIsConnected || !mListenMode) + return false; + + // set the listen socket to be non-blocking. + setBlockingInternal(mListenSocket, block); + SOCKET clientSocket = ::accept(mListenSocket, 0, 0); + if(clientSocket == INVALID_SOCKET) + return false; + + mSocket = clientSocket; + mIsConnected = true; + setBlockingInternal(mSocket, mIsBlocking); // force the mode to whatever the user set + + return mIsConnected; +} + +void SocketImpl::disconnect() +{ + if(mListenSocket != INVALID_SOCKET) + { + closesocket(mListenSocket); + mListenSocket = INVALID_SOCKET; + } + if(mSocket != INVALID_SOCKET) + { + WSASendDisconnect(mSocket, NULL); + closesocket(mSocket); + mSocket = INVALID_SOCKET; + } + mIsConnected = false; + mListenMode = false; + mPort = 0; + mHost = NULL; +} + +bool SocketImpl::nonBlockingTimeout() const +{ + return !mIsBlocking && WSAGetLastError() == WSAEWOULDBLOCK; +} + +// should be cross-platform from here down + +void SocketImpl::setBlocking(bool blocking) +{ + if(blocking != mIsBlocking) + { + mIsBlocking = blocking; + if(isConnected()) + setBlockingInternal(mSocket, blocking); + } +} + +bool SocketImpl::flush() +{ + return true; +} + +uint32_t SocketImpl::write(const uint8_t* data, uint32_t length) +{ + if(length == 0) + return 0; + + int sent = send(mSocket, (const char*)data, (int32_t)length, 0); + + if(sent <= 0 && !nonBlockingTimeout()) + disconnect(); + + return uint32_t(sent > 0 ? sent : 0); +} + +uint32_t SocketImpl::read(uint8_t* data, uint32_t length) +{ + if(length == 0) + return 0; + + int32_t received = recv(mSocket, (char*)data, (int32_t)length, 0); + + if(received <= 0 && !nonBlockingTimeout()) + disconnect(); + + return uint32_t(received > 0 ? received : 0); +} + +class BufferedSocketImpl : public SocketImpl +{ + public: + BufferedSocketImpl(bool isBlocking) : SocketImpl(isBlocking), mBufferPos(0) + { + } + virtual ~BufferedSocketImpl() + { + } + bool flush(); + uint32_t write(const uint8_t* data, uint32_t length); + + private: + uint32_t mBufferPos; + uint8_t mBuffer[Socket::DEFAULT_BUFFER_SIZE]; +}; + +bool BufferedSocketImpl::flush() +{ + uint32_t totalBytesWritten = 0; + + while(totalBytesWritten < mBufferPos && mIsConnected) + totalBytesWritten += (int32_t)SocketImpl::write(mBuffer + totalBytesWritten, mBufferPos - totalBytesWritten); + + bool ret = (totalBytesWritten == mBufferPos); + mBufferPos = 0; + return ret; +} + +uint32_t BufferedSocketImpl::write(const uint8_t* data, uint32_t length) +{ + uint32_t bytesWritten = 0; + while(mBufferPos + length >= Socket::DEFAULT_BUFFER_SIZE) + { + uint32_t currentChunk = Socket::DEFAULT_BUFFER_SIZE - mBufferPos; + intrinsics::memCopy(mBuffer + mBufferPos, data + bytesWritten, currentChunk); + bytesWritten += (uint32_t)currentChunk; // for the user, this is consumed even if we fail to shove it down a + // non-blocking socket + + uint32_t sent = SocketImpl::write(mBuffer, Socket::DEFAULT_BUFFER_SIZE); + mBufferPos = Socket::DEFAULT_BUFFER_SIZE - sent; + + if(sent < Socket::DEFAULT_BUFFER_SIZE) // non-blocking or error + { + if(sent) // we can reasonably hope this is rare + intrinsics::memMove(mBuffer, mBuffer + sent, mBufferPos); + + return bytesWritten; + } + length -= currentChunk; + } + + if(length > 0) + { + intrinsics::memCopy(mBuffer + mBufferPos, data + bytesWritten, length); + bytesWritten += length; + mBufferPos += length; + } + + return bytesWritten; +} + +Socket::Socket(bool inIsBuffering, bool isBlocking) +{ + if(inIsBuffering) + { + void* mem = PX_ALLOC(sizeof(BufferedSocketImpl), "BufferedSocketImpl"); + mImpl = PX_PLACEMENT_NEW(mem, BufferedSocketImpl)(isBlocking); + } + else + { + void* mem = PX_ALLOC(sizeof(SocketImpl), "SocketImpl"); + mImpl = PX_PLACEMENT_NEW(mem, SocketImpl)(isBlocking); + } +} + +Socket::~Socket() +{ + mImpl->flush(); + mImpl->disconnect(); + mImpl->~SocketImpl(); + PX_FREE(mImpl); +} + +bool Socket::connect(const char* host, uint16_t port, uint32_t timeout) +{ + return mImpl->connect(host, port, timeout); +} + +bool Socket::listen(uint16_t port) +{ + return mImpl->listen(port); +} + +bool Socket::accept(bool block) +{ + return mImpl->accept(block); +} + +void Socket::disconnect() +{ + mImpl->disconnect(); +} + +bool Socket::isConnected() const +{ + return mImpl->isConnected(); +} + +const char* Socket::getHost() const +{ + return mImpl->getHost(); +} + +uint16_t Socket::getPort() const +{ + return mImpl->getPort(); +} + +bool Socket::flush() +{ + if(!mImpl->isConnected()) + return false; + return mImpl->flush(); +} + +uint32_t Socket::write(const uint8_t* data, uint32_t length) +{ + if(!mImpl->isConnected()) + return 0; + return mImpl->write(data, length); +} + +uint32_t Socket::read(uint8_t* data, uint32_t length) +{ + if(!mImpl->isConnected()) + return 0; + return mImpl->read(data, length); +} + +void Socket::setBlocking(bool blocking) +{ + mImpl->setBlocking(blocking); +} + +bool Socket::isBlocking() const +{ + return mImpl->isBlocking(); +} + +} // namespace shdfnd +} // namespace physx diff --git a/PxShared/src/foundation/src/windows/PsWindowsSync.cpp b/PxShared/src/foundation/src/windows/PsWindowsSync.cpp new file mode 100644 index 0000000..5ce8122 --- /dev/null +++ b/PxShared/src/foundation/src/windows/PsWindowsSync.cpp @@ -0,0 +1,82 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "windows/PsWindowsInclude.h" +#include "PsUserAllocated.h" +#include "PsSync.h" + +namespace physx +{ +namespace shdfnd +{ + +namespace +{ +HANDLE& getSync(SyncImpl* impl) +{ + return *reinterpret_cast<HANDLE*>(impl); +} +} + +static const uint32_t gSize = sizeof(HANDLE); +const uint32_t& SyncImpl::getSize() +{ + return gSize; +} + +SyncImpl::SyncImpl() +{ + getSync(this) = CreateEvent(0, true, false, 0); +} + +SyncImpl::~SyncImpl() +{ + CloseHandle(getSync(this)); +} + +void SyncImpl::reset() +{ + ResetEvent(getSync(this)); +} + +void SyncImpl::set() +{ + SetEvent(getSync(this)); +} + +bool SyncImpl::wait(uint32_t milliseconds) +{ + if(milliseconds == -1) + milliseconds = INFINITE; + + return WaitForSingleObject(getSync(this), milliseconds) == WAIT_OBJECT_0 ? true : false; +} + +} // namespace shdfnd +} // namespace physx diff --git a/PxShared/src/foundation/src/windows/PsWindowsThread.cpp b/PxShared/src/foundation/src/windows/PsWindowsThread.cpp new file mode 100644 index 0000000..18ad5ee --- /dev/null +++ b/PxShared/src/foundation/src/windows/PsWindowsThread.cpp @@ -0,0 +1,405 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "windows/PsWindowsInclude.h" +#include "PsFoundation.h" +#include "PsThread.h" +#include "foundation/PxErrorCallback.h" +#include "foundation/PxAssert.h" + +// an exception for setting the thread name in Microsoft debuggers +#define NS_MS_VC_EXCEPTION 0x406D1388 + +namespace physx +{ +namespace shdfnd +{ + +namespace +{ + +#if PX_VC +#pragma warning(disable : 4061) // enumerator 'identifier' in switch of enum 'enumeration' is not handled +#pragma warning(disable : 4191) //'operator/operation' : unsafe conversion from 'type of expression' to 'type required' +#endif + +// struct for naming a thread in the debugger +#pragma pack(push, 8) + +typedef struct tagTHREADNAME_INFO +{ + DWORD dwType; // Must be 0x1000. + LPCSTR szName; // Pointer to name (in user addr space). + DWORD dwThreadID; // Thread ID (-1=caller thread). + DWORD dwFlags; // Reserved for future use, must be zero. +} THREADNAME_INFO; + +#pragma pack(pop) + +class _ThreadImpl +{ + public: + enum State + { + NotStarted, + Started, + Stopped + }; + + HANDLE thread; + LONG quitNow; // Should be 32bit aligned on SMP systems. + State state; + DWORD threadID; + + ThreadImpl::ExecuteFn fn; + void* arg; + + uint32_t affinityMask; +}; + +_ThreadImpl* getThread(ThreadImpl* impl) +{ + return reinterpret_cast<_ThreadImpl*>(impl); +} + +DWORD WINAPI PxThreadStart(LPVOID arg) +{ + _ThreadImpl* impl = getThread((ThreadImpl*)arg); + + // run either the passed in function or execute from the derived class (Runnable). + if(impl->fn) + (*impl->fn)(impl->arg); + else if(impl->arg) + ((Runnable*)impl->arg)->execute(); + return 0; +} + +// cache physical thread count +uint32_t gPhysicalCoreCount = 0; +} + +static const uint32_t gSize = sizeof(_ThreadImpl); +const uint32_t& ThreadImpl::getSize() +{ + return gSize; +} + +ThreadImpl::Id ThreadImpl::getId() +{ + return static_cast<Id>(GetCurrentThreadId()); +} + +// fwd GetLogicalProcessorInformation() +typedef BOOL(WINAPI* LPFN_GLPI)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD); + +uint32_t ThreadImpl::getNbPhysicalCores() +{ + if(!gPhysicalCoreCount) + { + // modified example code from: http://msdn.microsoft.com/en-us/library/ms683194 + LPFN_GLPI glpi; + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = NULL; + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = NULL; + DWORD returnLength = 0; + DWORD processorCoreCount = 0; + DWORD byteOffset = 0; + + glpi = (LPFN_GLPI)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "GetLogicalProcessorInformation"); + + if(NULL == glpi) + { + // GetLogicalProcessorInformation not supported on OS < XP Service Pack 3 + return 0; + } + + DWORD rc = (DWORD)glpi(NULL, &returnLength); + PX_ASSERT(rc == FALSE); + PX_UNUSED(rc); + + // first query reports required buffer space + if(GetLastError() == ERROR_INSUFFICIENT_BUFFER) + { + buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)PxAlloca(returnLength); + } + else + { + physx::shdfnd::getFoundation().error(PxErrorCode::eINTERNAL_ERROR, __FILE__, __LINE__, + "Error querying buffer size for number of physical processors"); + return 0; + } + + // retrieve data + rc = (DWORD)glpi(buffer, &returnLength); + if(rc != TRUE) + { + physx::shdfnd::getFoundation().error(PxErrorCode::eINTERNAL_ERROR, __FILE__, __LINE__, + "Error querying number of physical processors"); + return 0; + } + + ptr = buffer; + + while(byteOffset + sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION) <= returnLength) + { + switch(ptr->Relationship) + { + case RelationProcessorCore: + processorCoreCount++; + break; + default: + break; + } + + byteOffset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); + ptr++; + } + + gPhysicalCoreCount = processorCoreCount; + } + + return gPhysicalCoreCount; +} + +ThreadImpl::ThreadImpl() +{ + getThread(this)->thread = NULL; + getThread(this)->state = _ThreadImpl::NotStarted; + getThread(this)->quitNow = 0; + getThread(this)->fn = NULL; + getThread(this)->arg = NULL; + getThread(this)->affinityMask = 0; +} + +ThreadImpl::ThreadImpl(ExecuteFn fn, void* arg) +{ + getThread(this)->thread = NULL; + getThread(this)->state = _ThreadImpl::NotStarted; + getThread(this)->quitNow = 0; + getThread(this)->fn = fn; + getThread(this)->arg = arg; + getThread(this)->affinityMask = 0; + + start(0, NULL); +} + +ThreadImpl::~ThreadImpl() +{ + if(getThread(this)->state == _ThreadImpl::Started) + kill(); + CloseHandle(getThread(this)->thread); +} + +void ThreadImpl::start(uint32_t stackSize, Runnable* runnable) +{ + if(getThread(this)->state != _ThreadImpl::NotStarted) + return; + getThread(this)->state = _ThreadImpl::Started; + + if(runnable && !getThread(this)->arg && !getThread(this)->fn) + getThread(this)->arg = runnable; + + getThread(this)->thread = + CreateThread(NULL, stackSize, PxThreadStart, (LPVOID) this, CREATE_SUSPENDED, &getThread(this)->threadID); + if(!getThread(this)->thread) + { + physx::shdfnd::getFoundation().error(PxErrorCode::eINTERNAL_ERROR, __FILE__, __LINE__, + "PsWindowsThread::start: Failed to create thread."); + getThread(this)->state = _ThreadImpl::NotStarted; + return; + } + + // set affinity and resume + if(getThread(this)->affinityMask) + setAffinityMask(getThread(this)->affinityMask); + + DWORD rc = ResumeThread(getThread(this)->thread); + if(rc == DWORD(-1)) + { + physx::shdfnd::getFoundation().error(PxErrorCode::eINTERNAL_ERROR, __FILE__, __LINE__, + "PsWindowsThread::start: Failed to resume thread."); + getThread(this)->state = _ThreadImpl::NotStarted; + return; + } +} + +void ThreadImpl::signalQuit() +{ + InterlockedIncrement(&(getThread(this)->quitNow)); +} + +bool ThreadImpl::waitForQuit() +{ + if(getThread(this)->state == _ThreadImpl::NotStarted) + return false; + + WaitForSingleObject(getThread(this)->thread, INFINITE); + return true; +} + +bool ThreadImpl::quitIsSignalled() +{ + return InterlockedCompareExchange(&(getThread(this)->quitNow), 0, 0) != 0; +} + +void ThreadImpl::quit() +{ + getThread(this)->state = _ThreadImpl::Stopped; + ExitThread(0); +} + +void ThreadImpl::kill() +{ + if(getThread(this)->state == _ThreadImpl::Started) + TerminateThread(getThread(this)->thread, 0); + getThread(this)->state = _ThreadImpl::Stopped; +} + +void ThreadImpl::sleep(uint32_t ms) +{ + Sleep(ms); +} + +void ThreadImpl::yield() +{ + SwitchToThread(); +} + +uint32_t ThreadImpl::setAffinityMask(uint32_t mask) +{ + if(mask) + { + // store affinity + getThread(this)->affinityMask = mask; + + // if thread already started apply immediately + if(getThread(this)->state == _ThreadImpl::Started) + { + uint32_t err = uint32_t(SetThreadAffinityMask(getThread(this)->thread, mask)); + return err; + } + } + + return 0; +} + +void ThreadImpl::setName(const char* name) +{ + THREADNAME_INFO info; + info.dwType = 0x1000; + info.szName = name; + info.dwThreadID = getThread(this)->threadID; + info.dwFlags = 0; + + // C++ Exceptions are disabled for this project, but SEH is not (and cannot be) + // http://stackoverflow.com/questions/943087/what-exactly-will-happen-if-i-disable-c-exceptions-in-a-project + __try + { + RaiseException(NS_MS_VC_EXCEPTION, 0, sizeof(info) / sizeof(ULONG_PTR), (ULONG_PTR*)&info); + } + __except(EXCEPTION_EXECUTE_HANDLER) + { + // this runs if not attached to a debugger (thus not really naming the thread) + } +} + +void ThreadImpl::setPriority(ThreadPriority::Enum prio) +{ + BOOL rc = false; + switch(prio) + { + case ThreadPriority::eHIGH: + rc = SetThreadPriority(getThread(this)->thread, THREAD_PRIORITY_HIGHEST); + break; + case ThreadPriority::eABOVE_NORMAL: + rc = SetThreadPriority(getThread(this)->thread, THREAD_PRIORITY_ABOVE_NORMAL); + break; + case ThreadPriority::eNORMAL: + rc = SetThreadPriority(getThread(this)->thread, THREAD_PRIORITY_NORMAL); + break; + case ThreadPriority::eBELOW_NORMAL: + rc = SetThreadPriority(getThread(this)->thread, THREAD_PRIORITY_BELOW_NORMAL); + break; + case ThreadPriority::eLOW: + rc = SetThreadPriority(getThread(this)->thread, THREAD_PRIORITY_LOWEST); + break; + default: + break; + } + if(!rc) + { + physx::shdfnd::getFoundation().error(PxErrorCode::eINTERNAL_ERROR, __FILE__, __LINE__, + "PsWindowsThread::setPriority: Failed to set thread priority."); + } +} + +ThreadPriority::Enum ThreadImpl::getPriority(Id threadId) +{ + ThreadPriority::Enum retval = ThreadPriority::eLOW; + int priority = GetThreadPriority((HANDLE)threadId); + PX_COMPILE_TIME_ASSERT(THREAD_PRIORITY_HIGHEST > THREAD_PRIORITY_ABOVE_NORMAL); + if(priority >= THREAD_PRIORITY_HIGHEST) + retval = ThreadPriority::eHIGH; + else if(priority >= THREAD_PRIORITY_ABOVE_NORMAL) + retval = ThreadPriority::eABOVE_NORMAL; + else if(priority >= THREAD_PRIORITY_NORMAL) + retval = ThreadPriority::eNORMAL; + else if(priority >= THREAD_PRIORITY_BELOW_NORMAL) + retval = ThreadPriority::eBELOW_NORMAL; + return retval; +} + +uint32_t TlsAlloc() +{ + DWORD rv = ::TlsAlloc(); + PX_ASSERT(rv != TLS_OUT_OF_INDEXES); + return (uint32_t)rv; +} + +void TlsFree(uint32_t index) +{ + ::TlsFree(index); +} + +void* TlsGet(uint32_t index) +{ + return ::TlsGetValue(index); +} + +uint32_t TlsSet(uint32_t index, void* value) +{ + return (uint32_t)::TlsSetValue(index, value); +} + +uint32_t ThreadImpl::getDefaultStackSize() +{ + return 1048576; +}; + +} // namespace shdfnd +} // namespace physx diff --git a/PxShared/src/foundation/src/windows/PsWindowsTime.cpp b/PxShared/src/foundation/src/windows/PsWindowsTime.cpp new file mode 100644 index 0000000..2c1e435 --- /dev/null +++ b/PxShared/src/foundation/src/windows/PsWindowsTime.cpp @@ -0,0 +1,101 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "PsTime.h" +#include "windows/PsWindowsInclude.h" + +namespace +{ +int64_t getTimeTicks() +{ + LARGE_INTEGER a; + QueryPerformanceCounter(&a); + return a.QuadPart; +} + +double getTickDuration() +{ + LARGE_INTEGER a; + QueryPerformanceFrequency(&a); + return 1.0f / double(a.QuadPart); +} + +double sTickDuration = getTickDuration(); +} // namespace + +namespace physx +{ +namespace shdfnd +{ + +static const CounterFrequencyToTensOfNanos gCounterFreq = Time::getCounterFrequency(); + +const CounterFrequencyToTensOfNanos& Time::getBootCounterFrequency() +{ + return gCounterFreq; +} + +CounterFrequencyToTensOfNanos Time::getCounterFrequency() +{ + LARGE_INTEGER freq; + QueryPerformanceFrequency(&freq); + return CounterFrequencyToTensOfNanos(Time::sNumTensOfNanoSecondsInASecond, (uint64_t)freq.QuadPart); +} + +uint64_t Time::getCurrentCounterValue() +{ + LARGE_INTEGER ticks; + QueryPerformanceCounter(&ticks); + return (uint64_t)ticks.QuadPart; +} + +Time::Time() : mTickCount(0) +{ + getElapsedSeconds(); +} + +Time::Second Time::getElapsedSeconds() +{ + int64_t lastTickCount = mTickCount; + mTickCount = getTimeTicks(); + return (mTickCount - lastTickCount) * sTickDuration; +} + +Time::Second Time::peekElapsedSeconds() +{ + return (getTimeTicks() - mTickCount) * sTickDuration; +} + +Time::Second Time::getLastTime() const +{ + return mTickCount * sTickDuration; +} + +} // namespace shdfnd +} // namespace physx diff --git a/PxShared/src/pvd/include/PsPvd.h b/PxShared/src/pvd/include/PsPvd.h new file mode 100644 index 0000000..1f0ae79 --- /dev/null +++ b/PxShared/src/pvd/include/PsPvd.h @@ -0,0 +1,85 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PXPVDSDK_PSPVD_H +#define PXPVDSDK_PSPVD_H + +/** \addtogroup pvd +@{ +*/ +#include "pvd/PxPvd.h" +#include "PsBroadcast.h" + +#if !PX_DOXYGEN +namespace physx +{ +#endif + +class PxPvdTransport; + +#if !PX_DOXYGEN +namespace pvdsdk +{ +#endif + +class PvdDataStream; +class PvdClient; +class PvdOMMetaDataProvider; + +// PsPvd is used for advanced user, it support custom pvd client API +class PsPvd : public physx::PxPvd, public shdfnd::AllocationListener +{ + public: + virtual void addClient(PvdClient* client) = 0; + virtual void removeClient(PvdClient* client) = 0; + + virtual bool registerObject(const void* inItem) = 0; + virtual bool unRegisterObject(const void* inItem) = 0; + + //AllocationListener + void onAllocation(size_t size, const char* typeName, const char* filename, int line, void* allocatedMemory) = 0; + void onDeallocation(void* addr) = 0; + + virtual PvdOMMetaDataProvider& getMetaDataProvider() = 0; + + virtual uint64_t getNextStreamId() = 0; + // Call to flush events to PVD + virtual void flush() = 0; + +}; + +PX_PVDSDK_API void PX_CALL_CONV SetPvdAllocatorCallback(PxAllocatorCallback* inAllocatorCallback); + +#if !PX_DOXYGEN +} // namespace pvdsdk +} // namespace physx +#endif + +/** @} */ +#endif // PXPVDSDK_PSPVD_H diff --git a/PxShared/src/pvd/include/PxProfileAllocatorWrapper.h b/PxShared/src/pvd/include/PxProfileAllocatorWrapper.h new file mode 100644 index 0000000..22903ec --- /dev/null +++ b/PxShared/src/pvd/include/PxProfileAllocatorWrapper.h @@ -0,0 +1,231 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef PXPVDSDK_PXPROFILEALLOCATORWRAPPER_H +#define PXPVDSDK_PXPROFILEALLOCATORWRAPPER_H + +#include "foundation/PxPreprocessor.h" +#include "foundation/PxAllocatorCallback.h" +#include "foundation/PxErrorCallback.h" +#include "foundation/PxAssert.h" + +#include "PsArray.h" +#include "PsHashMap.h" + +namespace physx { namespace profile { + + /** + \brief Helper struct to encapsulate the user allocator callback + Useful for array and hash templates + */ + struct PxProfileAllocatorWrapper + { + PxAllocatorCallback* mUserAllocator; + + PxProfileAllocatorWrapper( PxAllocatorCallback& inUserAllocator ) + : mUserAllocator( &inUserAllocator ) + { + } + + PxProfileAllocatorWrapper( PxAllocatorCallback* inUserAllocator ) + : mUserAllocator( inUserAllocator ) + { + } + + PxAllocatorCallback& getAllocator() const + { + PX_ASSERT( NULL != mUserAllocator ); + return *mUserAllocator; + } + }; + + /** + \brief Helper class to encapsulate the reflection allocator + */ + template <typename T> + class PxProfileWrapperReflectionAllocator + { + static const char* getName() + { +#if PX_LINUX || PX_ANDROID || PX_PS4 || PX_IOS || PX_OSX || PX_EMSCRIPTEN || PX_NX + return __PRETTY_FUNCTION__; +#else + return typeid(T).name(); +#endif + } + PxProfileAllocatorWrapper* mWrapper; + + public: + PxProfileWrapperReflectionAllocator(PxProfileAllocatorWrapper& inWrapper) : mWrapper( &inWrapper ) {} + PxProfileWrapperReflectionAllocator( const PxProfileWrapperReflectionAllocator& inOther ) + : mWrapper( inOther.mWrapper ) + { + } + PxProfileWrapperReflectionAllocator& operator=( const PxProfileWrapperReflectionAllocator& inOther ) + { + mWrapper = inOther.mWrapper; + return *this; + } + PxAllocatorCallback& getAllocator() { return mWrapper->getAllocator(); } + void* allocate(size_t size, const char* filename, int line) + { +#if PX_CHECKED // checked and debug builds + if(!size) + return 0; + return getAllocator().allocate(size, getName(), filename, line); +#else + return getAllocator().allocate(size, "<no allocation names in this config>", filename, line); +#endif + } + void deallocate(void* ptr) + { + if(ptr) + getAllocator().deallocate(ptr); + } + }; + + /** + \brief Helper class to encapsulate the named allocator + */ + struct PxProfileWrapperNamedAllocator + { + PxProfileAllocatorWrapper* mWrapper; + const char* mAllocationName; + PxProfileWrapperNamedAllocator(PxProfileAllocatorWrapper& inWrapper, const char* inAllocationName) + : mWrapper( &inWrapper ) + , mAllocationName( inAllocationName ) + {} + PxProfileWrapperNamedAllocator( const PxProfileWrapperNamedAllocator& inOther ) + : mWrapper( inOther.mWrapper ) + , mAllocationName( inOther.mAllocationName ) + { + } + PxProfileWrapperNamedAllocator& operator=( const PxProfileWrapperNamedAllocator& inOther ) + { + mWrapper = inOther.mWrapper; + mAllocationName = inOther.mAllocationName; + return *this; + } + PxAllocatorCallback& getAllocator() { return mWrapper->getAllocator(); } + void* allocate(size_t size, const char* filename, int line) + { + if(!size) + return 0; + return getAllocator().allocate(size, mAllocationName, filename, line); + } + void deallocate(void* ptr) + { + if(ptr) + getAllocator().deallocate(ptr); + } + }; + + /** + \brief Helper struct to encapsulate the array + */ + template<class T> + struct PxProfileArray : public shdfnd::Array<T, PxProfileWrapperReflectionAllocator<T> > + { + typedef PxProfileWrapperReflectionAllocator<T> TAllocatorType; + + PxProfileArray( PxProfileAllocatorWrapper& inWrapper ) + : shdfnd::Array<T, TAllocatorType >( TAllocatorType( inWrapper ) ) + { + } + + PxProfileArray( const PxProfileArray< T >& inOther ) + : shdfnd::Array<T, TAllocatorType >( inOther, inOther ) + { + } + }; + + /** + \brief Helper struct to encapsulate the array + */ + template<typename TKeyType, typename TValueType, typename THashType=shdfnd::Hash<TKeyType> > + struct PxProfileHashMap : public shdfnd::HashMap<TKeyType, TValueType, THashType, PxProfileWrapperReflectionAllocator< TValueType > > + { + typedef shdfnd::HashMap<TKeyType, TValueType, THashType, PxProfileWrapperReflectionAllocator< TValueType > > THashMapType; + typedef PxProfileWrapperReflectionAllocator<TValueType> TAllocatorType; + PxProfileHashMap( PxProfileAllocatorWrapper& inWrapper ) + : THashMapType( TAllocatorType( inWrapper ) ) + { + } + }; + + /** + \brief Helper function to encapsulate the profile allocation + */ + template<typename TDataType> + inline TDataType* PxProfileAllocate( PxAllocatorCallback* inAllocator, const char* file, int inLine ) + { + PxProfileAllocatorWrapper wrapper( inAllocator ); + typedef PxProfileWrapperReflectionAllocator< TDataType > TAllocator; + TAllocator theAllocator( wrapper ); + return reinterpret_cast<TDataType*>( theAllocator.allocate( sizeof( TDataType ), file, inLine ) ); + } + + /** + \brief Helper function to encapsulate the profile allocation + */ + template<typename TDataType> + inline TDataType* PxProfileAllocate( PxAllocatorCallback& inAllocator, const char* file, int inLine ) + { + return PxProfileAllocate<TDataType>( &inAllocator, file, inLine ); + } + + /** + \brief Helper function to encapsulate the profile deallocation + */ + template<typename TDataType> + inline void PxProfileDeleteAndDeallocate( PxProfileAllocatorWrapper& inAllocator, TDataType* inDType ) + { + PX_ASSERT(inDType); + PxAllocatorCallback& allocator( inAllocator.getAllocator() ); + inDType->~TDataType(); + allocator.deallocate( inDType ); + } + + /** + \brief Helper function to encapsulate the profile deallocation + */ + template<typename TDataType> + inline void PxProfileDeleteAndDeallocate( PxAllocatorCallback& inAllocator, TDataType* inDType ) + { + PxProfileAllocatorWrapper wrapper( &inAllocator ); + PxProfileDeleteAndDeallocate( wrapper, inDType ); + } + +} } + +#define PX_PROFILE_NEW( allocator, dtype ) new (physx::profile::PxProfileAllocate<dtype>( allocator, __FILE__, __LINE__ )) dtype +#define PX_PROFILE_DELETE( allocator, obj ) physx::profile::PxProfileDeleteAndDeallocate( allocator, obj ); + +#endif // PXPVDSDK_PXPROFILEALLOCATORWRAPPER_H diff --git a/PxShared/src/pvd/include/PxPvdClient.h b/PxShared/src/pvd/include/PxPvdClient.h new file mode 100644 index 0000000..fccf76d --- /dev/null +++ b/PxShared/src/pvd/include/PxPvdClient.h @@ -0,0 +1,77 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PXPVDSDK_PXPVDCLIENT_H +#define PXPVDSDK_PXPVDCLIENT_H + +/** \addtogroup pvd +@{ +*/ +#include "foundation/PxFlags.h" +#include "foundation/PxVec3.h" + +#if !PX_DOXYGEN +namespace physx +{ +namespace pvdsdk +{ +#endif + +class PvdDataStream; +class PvdUserRenderer; + +/** +\brief PvdClient is the per-client connection to PVD. +It provides callback when PVD is connected/disconnted. +It provides access to the internal object so that advanced users can create extension client. +*/ +class PvdClient +{ + public: + virtual PvdDataStream* getDataStream() = 0; + virtual PvdUserRenderer* getUserRender() = 0; + + virtual bool isConnected() const = 0; + virtual void onPvdConnected() = 0; + virtual void onPvdDisconnected() = 0; + virtual void flush() = 0; + + protected: + virtual ~PvdClient() + { + } +}; + +#if !PX_DOXYGEN +} // namespace pvdsdk +} // namespace physx +#endif + +/** @} */ +#endif // PXPVDSDK_PXPVDCLIENT_H diff --git a/PxShared/src/pvd/include/PxPvdDataStream.h b/PxShared/src/pvd/include/PxPvdDataStream.h new file mode 100644 index 0000000..c47ef12 --- /dev/null +++ b/PxShared/src/pvd/include/PxPvdDataStream.h @@ -0,0 +1,272 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +#ifndef PXPVDSDK_PXPVDDATASTREAM_H +#define PXPVDSDK_PXPVDDATASTREAM_H + +/** \addtogroup pvd +@{ +*/ +#include "pvd/PxPvd.h" +#include "PxPvdErrorCodes.h" +#include "PxPvdObjectModelBaseTypes.h" + +#if !PX_DOXYGEN +namespace physx +{ +namespace pvdsdk +{ +#endif + +class PvdPropertyDefinitionHelper; + +class PvdMetaDataStream +{ + protected: + virtual ~PvdMetaDataStream() + { + } + + public: + virtual PvdError createClass(const NamespacedName& nm) = 0; + template <typename TDataType> + PvdError createClass() + { + return createClass(getPvdNamespacedNameForType<TDataType>()); + } + + virtual PvdError deriveClass(const NamespacedName& parent, const NamespacedName& child) = 0; + template <typename TParentType, typename TChildType> + PvdError deriveClass() + { + return deriveClass(getPvdNamespacedNameForType<TParentType>(), getPvdNamespacedNameForType<TChildType>()); + } + + virtual bool isClassExist(const NamespacedName& nm) = 0; + template <typename TDataType> + bool isClassExist() + { + return isClassExist(getPvdNamespacedNameForType<TDataType>()); + } + + virtual PvdError createProperty(const NamespacedName& clsName, const char* name, const char* semantic, + const NamespacedName& dtypeName, PropertyType::Enum propertyType, + DataRef<NamedValue> values = DataRef<NamedValue>()) = 0; + template <typename TClsType, typename TDataType> + PvdError createProperty(String name, String semantic = "", PropertyType::Enum propertyType = PropertyType::Scalar, + DataRef<NamedValue> values = DataRef<NamedValue>()) + { + return createProperty(getPvdNamespacedNameForType<TClsType>(), name, semantic, + getPvdNamespacedNameForType<TDataType>(), propertyType, values); + } + + virtual PvdError createPropertyMessage(const NamespacedName& cls, const NamespacedName& msgName, + DataRef<PropertyMessageArg> entries, uint32_t messageSizeInBytes) = 0; + + template <typename TClsType, typename TMsgType> + PvdError createPropertyMessage(DataRef<PropertyMessageArg> entries) + { + return createPropertyMessage(getPvdNamespacedNameForType<TClsType>(), getPvdNamespacedNameForType<TMsgType>(), + entries, sizeof(TMsgType)); + } +}; + +class PvdInstanceDataStream +{ + protected: + virtual ~PvdInstanceDataStream() + { + } + + public: + virtual PvdError createInstance(const NamespacedName& cls, const void* instance) = 0; + + template <typename TDataType> + PvdError createInstance(const TDataType* inst) + { + return createInstance(getPvdNamespacedNameForType<TDataType>(), inst); + } + virtual bool isInstanceValid(const void* instance) = 0; + + // If the property will fit or is already completely in memory + virtual PvdError setPropertyValue(const void* instance, String name, DataRef<const uint8_t> data, + const NamespacedName& incomingTypeName) = 0; + template <typename TDataType> + PvdError setPropertyValue(const void* instance, String name, const TDataType& value) + { + const uint8_t* dataStart = reinterpret_cast<const uint8_t*>(&value); + return setPropertyValue(instance, name, DataRef<const uint8_t>(dataStart, dataStart + sizeof(TDataType)), + getPvdNamespacedNameForType<TDataType>()); + } + + template <typename TDataType> + PvdError setPropertyValue(const void* instance, String name, const TDataType* value, uint32_t numItems) + { + const uint8_t* dataStart = reinterpret_cast<const uint8_t*>(value); + return setPropertyValue(instance, name, + DataRef<const uint8_t>(dataStart, dataStart + sizeof(TDataType) * numItems), + getPvdNamespacedNameForType<TDataType>()); + } + + // Else if the property is very large (contact reports) you can send it in chunks. + virtual PvdError beginSetPropertyValue(const void* instance, String name, const NamespacedName& incomingTypeName) = 0; + + template <typename TDataType> + PvdError beginSetPropertyValue(const void* instance, String name) + { + return beginSetPropertyValue(instance, name, getPvdNamespacedNameForType<TDataType>()); + } + virtual PvdError appendPropertyValueData(DataRef<const uint8_t> data) = 0; + + template <typename TDataType> + PvdError appendPropertyValueData(const TDataType* value, uint32_t numItems) + { + const uint8_t* dataStart = reinterpret_cast<const uint8_t*>(value); + return appendPropertyValueData(DataRef<const uint8_t>(dataStart, dataStart + numItems * sizeof(TDataType))); + } + + virtual PvdError endSetPropertyValue() = 0; + + // Set a set of properties to various values on an object. + + virtual PvdError setPropertyMessage(const void* instance, const NamespacedName& msgName, + DataRef<const uint8_t> data) = 0; + + template <typename TDataType> + PvdError setPropertyMessage(const void* instance, const TDataType& value) + { + const uint8_t* dataStart = reinterpret_cast<const uint8_t*>(&value); + return setPropertyMessage(instance, getPvdNamespacedNameForType<TDataType>(), + DataRef<const uint8_t>(dataStart, sizeof(TDataType))); + } + // If you need to send of lot of identical messages, this avoids a hashtable lookup per message. + virtual PvdError beginPropertyMessageGroup(const NamespacedName& msgName) = 0; + + template <typename TDataType> + PvdError beginPropertyMessageGroup() + { + return beginPropertyMessageGroup(getPvdNamespacedNameForType<TDataType>()); + } + virtual PvdError sendPropertyMessageFromGroup(const void* instance, DataRef<const uint8_t> data) = 0; + + template <typename TDataType> + PvdError sendPropertyMessageFromGroup(const void* instance, const TDataType& value) + { + const uint8_t* dataStart = reinterpret_cast<const uint8_t*>(&value); + return sendPropertyMessageFromGroup(instance, DataRef<const uint8_t>(dataStart, sizeof(TDataType))); + } + + virtual PvdError endPropertyMessageGroup() = 0; + + // These functions ensure the target array doesn't contain duplicates + virtual PvdError pushBackObjectRef(const void* instId, String propName, const void* objRef) = 0; + virtual PvdError removeObjectRef(const void* instId, String propName, const void* objRef) = 0; + + // Instance elimination. + virtual PvdError destroyInstance(const void* key) = 0; + + // Profiling hooks + virtual PvdError beginSection(const void* instance, String name) = 0; + virtual PvdError endSection(const void* instance, String name) = 0; + + // Origin Shift + virtual PvdError originShift(const void* scene, PxVec3 shift) = 0; + + public: + /*For some cases, pvd command cannot be run immediately. For example, when create joints, while the actors may still + *pending for insert, the joints update commands can be run deffered. + */ + class PvdCommand + { + public: + // Assigned is needed for copying + PvdCommand(const PvdCommand&) + { + } + PvdCommand& operator=(const PvdCommand&) + { + return *this; + } + + public: + PvdCommand() + { + } + virtual ~PvdCommand() + { + } + + // Not pure virtual so can have default PvdCommand obj + virtual bool canRun(PvdInstanceDataStream&) + { + return false; + } + virtual void run(PvdInstanceDataStream&) + { + } + }; + + // PVD SDK provide this helper function to allocate cmd's memory and release them at after flush the command queue + virtual void* allocateMemForCmd(uint32_t length) = 0; + + // PVD will call the destructor of PvdCommand object at the end fo flushPvdCommand + virtual void pushPvdCommand(PvdCommand& cmd) = 0; + virtual void flushPvdCommand() = 0; +}; + +class PvdDataStream : public PvdInstanceDataStream, public PvdMetaDataStream +{ + protected: + virtual ~PvdDataStream() + { + } + + public: + virtual void release() = 0; + virtual bool isConnected() = 0; + + virtual void addProfileZone(void* zone, const char* name) = 0; + virtual void addProfileZoneEvent(void* zone, const char* name, uint16_t eventId, bool compileTimeEnabled) = 0; + + virtual PvdPropertyDefinitionHelper& getPropertyDefinitionHelper() = 0; + + virtual void setIsTopLevelUIElement(const void* instance, bool topLevel) = 0; + virtual void sendErrorMessage(uint32_t code, const char* message, const char* file, uint32_t line) = 0; + virtual void updateCamera(const char* name, const PxVec3& origin, const PxVec3& up, const PxVec3& target) = 0; + +/** + \brief Create a new PvdDataStream. + \param pvd A pointer to a valid PxPvd instance. This must be non-null. +*/ + PX_PVDSDK_API static PvdDataStream* create(PxPvd* pvd); +}; +#if !PX_DOXYGEN +} // pvdsdk +} // physx +#endif + +/** @} */ +#endif // PXPVDSDK_PXPVDDATASTREAM_H diff --git a/PxShared/src/pvd/include/PxPvdDataStreamHelpers.h b/PxShared/src/pvd/include/PxPvdDataStreamHelpers.h new file mode 100644 index 0000000..7b47db5 --- /dev/null +++ b/PxShared/src/pvd/include/PxPvdDataStreamHelpers.h @@ -0,0 +1,120 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +#ifndef PXPVDSDK_PXPVDDATASTREAMHELPERS_H +#define PXPVDSDK_PXPVDDATASTREAMHELPERS_H + +/** \addtogroup pvd +@{ +*/ +#include "PxPvdObjectModelBaseTypes.h" + +#if !PX_DOXYGEN +namespace physx +{ +namespace pvdsdk +{ +#endif + +class PvdPropertyDefinitionHelper +{ + protected: + virtual ~PvdPropertyDefinitionHelper() + { + } + + public: + /** + Push a name c such that it appends such as a.b.c. + */ + virtual void pushName(const char* inName, const char* inAppendStr = ".") = 0; + /** + Push a name c such that it appends like a.b[c] + */ + virtual void pushBracketedName(const char* inName, const char* leftBracket = "[", const char* rightBracket = "]") = 0; + /** + * Pop the current name + */ + virtual void popName() = 0; + + virtual void clearNameStack() = 0; + /** + * Get the current name at the top of the name stack. + * Would return "a.b.c" or "a.b[c]" in the above examples. + */ + virtual const char* getTopName() = 0; + + virtual void addNamedValue(const char* name, uint32_t value) = 0; + virtual void clearNamedValues() = 0; + virtual DataRef<NamedValue> getNamedValues() = 0; + + /** + * Define a property using the top of the name stack and the passed-in semantic + */ + virtual void createProperty(const NamespacedName& clsName, const char* inSemantic, const NamespacedName& dtypeName, + PropertyType::Enum propType = PropertyType::Scalar) = 0; + + template <typename TClsType, typename TDataType> + void createProperty(const char* inSemantic = "", PropertyType::Enum propType = PropertyType::Scalar) + { + createProperty(getPvdNamespacedNameForType<TClsType>(), inSemantic, getPvdNamespacedNameForType<TDataType>(), + propType); + } + + // The datatype used for instances needs to be pointer unless you actually have pvdsdk::InstanceId members on your + // value structs. + virtual void addPropertyMessageArg(const NamespacedName& inDatatype, uint32_t inOffset, uint32_t inSize) = 0; + + template <typename TDataType> + void addPropertyMessageArg(uint32_t offset) + { + addPropertyMessageArg(getPvdNamespacedNameForType<TDataType>(), offset, static_cast<uint32_t>(sizeof(TDataType))); + } + virtual void addPropertyMessage(const NamespacedName& clsName, const NamespacedName& msgName, + uint32_t inStructSizeInBytes) = 0; + template <typename TClsType, typename TMsgType> + void addPropertyMessage() + { + addPropertyMessage(getPvdNamespacedNameForType<TClsType>(), getPvdNamespacedNameForType<TMsgType>(), + static_cast<uint32_t>(sizeof(TMsgType))); + } + virtual void clearPropertyMessageArgs() = 0; + + void clearBufferedData() + { + clearNameStack(); + clearPropertyMessageArgs(); + clearNamedValues(); + } +}; + +#if !PX_DOXYGEN +} // pvdsdk +} // physx +#endif + +/** @} */ +#endif // PXPVDSDK_PXPVDDATASTREAMHELPERS_H diff --git a/PxShared/src/pvd/include/PxPvdErrorCodes.h b/PxShared/src/pvd/include/PxPvdErrorCodes.h new file mode 100644 index 0000000..ae17a6f --- /dev/null +++ b/PxShared/src/pvd/include/PxPvdErrorCodes.h @@ -0,0 +1,62 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +#ifndef PXPVDSDK_PXPVDERRORCODES_H +#define PXPVDSDK_PXPVDERRORCODES_H + +/** \addtogroup pvd +@{ +*/ + +#include "foundation/Px.h" + +#if !PX_DOXYGEN +namespace physx +{ +namespace pvdsdk +{ +#endif + +struct PvdErrorType +{ + enum Enum + { + Success = 0, + NetworkError, + ArgumentError, + Disconnect, + InternalProblem + }; +}; + +typedef PvdErrorType::Enum PvdError; + +#if !PX_DOXYGEN +} +} +#endif +/** @} */ +#endif // PXPVDSDK_PXPVDERRORCODES_H diff --git a/PxShared/src/pvd/include/PxPvdObjectModelBaseTypes.h b/PxShared/src/pvd/include/PxPvdObjectModelBaseTypes.h new file mode 100644 index 0000000..f65e581 --- /dev/null +++ b/PxShared/src/pvd/include/PxPvdObjectModelBaseTypes.h @@ -0,0 +1,428 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PXPVDSDK_PXPVDOBJECTMODELBASETYPES_H +#define PXPVDSDK_PXPVDOBJECTMODELBASETYPES_H + +/** \addtogroup pvd +@{ +*/ +#include "foundation/PxAssert.h" + +#if !PX_DOXYGEN +namespace physx +{ +namespace pvdsdk +{ +#endif + +using namespace physx; + +inline const char* nonNull(const char* str) +{ + return str ? str : ""; +} +// strcmp will crash if passed a null string, however, +// so we need to make sure that doesn't happen. We do that +// by equating NULL and the empty string, "". +inline bool safeStrEq(const char* lhs, const char* rhs) +{ + return ::strcmp(nonNull(lhs), nonNull(rhs)) == 0; +} + +// Does this string have useful information in it. +inline bool isMeaningful(const char* str) +{ + return *(nonNull(str)) > 0; +} + +inline uint32_t safeStrLen(const char* str) +{ + str = nonNull(str); + return static_cast<uint32_t>(strlen(str)); +} + +struct ObjectRef +{ + int32_t mInstanceId; + + ObjectRef(int32_t iid = -1) : mInstanceId(iid) + { + } + operator int32_t() const + { + return mInstanceId; + } + bool hasValue() const + { + return mInstanceId > 0; + } +}; + +struct U32Array4 +{ + uint32_t mD0; + uint32_t mD1; + uint32_t mD2; + uint32_t mD3; + U32Array4(uint32_t d0, uint32_t d1, uint32_t d2, uint32_t d3) : mD0(d0), mD1(d1), mD2(d2), mD3(d3) + { + } + U32Array4() : mD0(0), mD1(0), mD2(0), mD3(0) + { + } +}; + +typedef bool PvdBool; +typedef const char* String; +typedef void* VoidPtr; +typedef double PvdF64; +typedef float PvdF32; +typedef int64_t PvdI64; +typedef uint64_t PvdU64; +typedef int32_t PvdI32; +typedef uint32_t PvdU32; +typedef int16_t PvdI16; +typedef uint16_t PvdU16; +typedef int8_t PvdI8; +typedef uint8_t PvdU8; + +struct PvdColor +{ + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; + PvdColor(uint8_t _r, uint8_t _g, uint8_t _b, uint8_t _a = 255) : r(_r), g(_g), b(_b), a(_a) + { + } + PvdColor() : r(0), g(0), b(0), a(255) + { + } + PvdColor(uint32_t abgr) + { + uint8_t* valPtr = reinterpret_cast<uint8_t*>(&abgr); + r = valPtr[0]; + g = valPtr[1]; + b = valPtr[2]; + a = valPtr[3]; + } +}; + +struct StringHandle +{ + uint32_t mHandle; + StringHandle(uint32_t val = 0) : mHandle(val) + { + } + operator uint32_t() const + { + return mHandle; + } +}; + +#define DECLARE_TYPES \ +DECLARE_BASE_PVD_TYPE(PvdI8) \ +DECLARE_BASE_PVD_TYPE(PvdU8) \ +DECLARE_BASE_PVD_TYPE(PvdI16) \ +DECLARE_BASE_PVD_TYPE(PvdU16) \ +DECLARE_BASE_PVD_TYPE(PvdI32) \ +DECLARE_BASE_PVD_TYPE(PvdU32) \ +DECLARE_BASE_PVD_TYPE(PvdI64) \ +DECLARE_BASE_PVD_TYPE(PvdU64) \ +DECLARE_BASE_PVD_TYPE(PvdF32) \ +DECLARE_BASE_PVD_TYPE(PvdF64) \ +DECLARE_BASE_PVD_TYPE(PvdBool) \ +DECLARE_BASE_PVD_TYPE(PvdColor) \ +DECLARE_BASE_PVD_TYPE(String) \ +DECLARE_BASE_PVD_TYPE(StringHandle) \ +DECLARE_BASE_PVD_TYPE(ObjectRef) \ +DECLARE_BASE_PVD_TYPE(VoidPtr) \ +DECLARE_BASE_PVD_TYPE(PxVec2) \ +DECLARE_BASE_PVD_TYPE(PxVec3) \ +DECLARE_BASE_PVD_TYPE(PxVec4) \ +DECLARE_BASE_PVD_TYPE(PxBounds3) \ +DECLARE_BASE_PVD_TYPE(PxQuat) \ +DECLARE_BASE_PVD_TYPE(PxTransform) \ +DECLARE_BASE_PVD_TYPE(PxMat33) \ +DECLARE_BASE_PVD_TYPE(PxMat44) \ +DECLARE_BASE_PVD_TYPE(U32Array4) + +struct PvdBaseType +{ + enum Enum + { + None = 0, + InternalStart = 1, + InternalStop = 64, +#define DECLARE_BASE_PVD_TYPE(type) type, + DECLARE_TYPES + Last +#undef DECLARE_BASE_PVD_TYPE + }; +}; +struct NamespacedName +{ + String mNamespace; + String mName; + NamespacedName(String ns, String nm) : mNamespace(ns), mName(nm) + { + } + NamespacedName(String nm = "") : mNamespace(""), mName(nm) + { + } + bool operator==(const NamespacedName& other) const + { + return safeStrEq(mNamespace, other.mNamespace) && safeStrEq(mName, other.mName); + } +}; + +struct NamedValue +{ + String mName; + uint32_t mValue; + NamedValue(String nm = "", uint32_t val = 0) : mName(nm), mValue(val) + { + } +}; + +template <typename T> +struct BaseDataTypeToTypeMap +{ + bool compile_error; +}; +template <PvdBaseType::Enum> +struct BaseTypeToDataTypeMap +{ + bool compile_error; +}; + +// Users can extend this mapping with new datatypes. +template <typename T> +struct PvdDataTypeToNamespacedNameMap +{ + bool Name; +}; +// This mapping tells you the what class id to use for the base datatypes +// +#define DECLARE_BASE_PVD_TYPE(type) \ + template <> \ + struct BaseDataTypeToTypeMap<type> \ + { \ + enum Enum \ + { \ + BaseTypeEnum = PvdBaseType::type \ + }; \ + }; \ + template <> \ + struct BaseDataTypeToTypeMap<const type&> \ + { \ + enum Enum \ + { \ + BaseTypeEnum = PvdBaseType::type \ + }; \ + }; \ + template <> \ + struct BaseTypeToDataTypeMap<PvdBaseType::type> \ + { \ + typedef type TDataType; \ + }; \ + template <> \ + struct PvdDataTypeToNamespacedNameMap<type> \ + { \ + NamespacedName Name; \ + PvdDataTypeToNamespacedNameMap<type>() : Name("physx3", #type) \ + { \ + } \ + }; \ + template <> \ + struct PvdDataTypeToNamespacedNameMap<const type&> \ + { \ + NamespacedName Name; \ + PvdDataTypeToNamespacedNameMap<const type&>() : Name("physx3", #type) \ + { \ + } \ + }; + +DECLARE_TYPES +#undef DECLARE_BASE_PVD_TYPE + +template <typename TDataType> +inline int32_t getPvdTypeForType() +{ + return static_cast<PvdBaseType::Enum>(BaseDataTypeToTypeMap<TDataType>::BaseTypeEnum); +} +template <typename TDataType> +inline NamespacedName getPvdNamespacedNameForType() +{ + return PvdDataTypeToNamespacedNameMap<TDataType>().Name; +} + +#define DEFINE_PVD_TYPE_NAME_MAP(type, ns, name) \ + template <> \ + struct PvdDataTypeToNamespacedNameMap<type> \ + { \ + NamespacedName Name; \ + PvdDataTypeToNamespacedNameMap<type>() : Name(ns, name) \ + { \ + } \ + }; + +#define DEFINE_PVD_TYPE_ALIAS(newType, oldType) \ + template <> \ + struct PvdDataTypeToNamespacedNameMap<newType> \ + { \ + NamespacedName Name; \ + PvdDataTypeToNamespacedNameMap<newType>() : Name(PvdDataTypeToNamespacedNameMap<oldType>().Name) \ + { \ + } \ + }; + +DEFINE_PVD_TYPE_ALIAS(const void*, void*) + +struct ArrayData +{ + uint8_t* mBegin; + uint8_t* mEnd; + uint8_t* mCapacity; //>= stop + ArrayData(uint8_t* beg = NULL, uint8_t* end = NULL, uint8_t* cap = NULL) : mBegin(beg), mEnd(end), mCapacity(cap) + { + } + uint8_t* begin() + { + return mBegin; + } + uint8_t* end() + { + return mEnd; + } + uint32_t byteCapacity() + { + return static_cast<uint32_t>(mCapacity - mBegin); + } + uint32_t byteSize() const + { + return static_cast<uint32_t>(mEnd - mBegin); + } // in bytes + uint32_t numberOfItems(uint32_t objectByteSize) + { + if(objectByteSize) + return byteSize() / objectByteSize; + return 0; + } + + void forgetData() + { + mBegin = mEnd = mCapacity = 0; + } +}; + +template <typename T> +class DataRef +{ + const T* mBegin; + const T* mEnd; + + public: + DataRef(const T* b, uint32_t count) : mBegin(b), mEnd(b + count) + { + } + DataRef(const T* b = NULL, const T* e = NULL) : mBegin(b), mEnd(e) + { + } + DataRef(const DataRef& o) : mBegin(o.mBegin), mEnd(o.mEnd) + { + } + DataRef& operator=(const DataRef& o) + { + mBegin = o.mBegin; + mEnd = o.mEnd; + return *this; + } + uint32_t size() const + { + return static_cast<uint32_t>(mEnd - mBegin); + } + const T* begin() const + { + return mBegin; + } + const T* end() const + { + return mEnd; + } + const T& operator[](uint32_t idx) const + { + PX_ASSERT(idx < size()); + return mBegin[idx]; + } + const T& back() const + { + PX_ASSERT(mEnd > mBegin); + return *(mEnd - 1); + } +}; + +struct PropertyType +{ + enum Enum + { + Unknown = 0, + Scalar, + Array + }; +}; + +// argument to the create property message function +struct PropertyMessageArg +{ + String mPropertyName; + NamespacedName mDatatypeName; + // where in the message this property starts. + uint32_t mMessageOffset; + // size of this entry object + uint32_t mByteSize; + + PropertyMessageArg(String propName, NamespacedName dtype, uint32_t msgOffset, uint32_t byteSize) + : mPropertyName(propName), mDatatypeName(dtype), mMessageOffset(msgOffset), mByteSize(byteSize) + { + } + PropertyMessageArg() : mPropertyName(""), mMessageOffset(0), mByteSize(0) + { + } +}; + +class PvdUserRenderer; +DEFINE_PVD_TYPE_NAME_MAP(PvdUserRenderer, "_debugger_", "PvdUserRenderer") + +#if !PX_DOXYGEN +} +} +#endif + +/** @} */ +#endif // PXPVDSDK_PXPVDOBJECTMODELBASETYPES_H diff --git a/PxShared/src/pvd/include/PxPvdRenderBuffer.h b/PxShared/src/pvd/include/PxPvdRenderBuffer.h new file mode 100644 index 0000000..58c8d51 --- /dev/null +++ b/PxShared/src/pvd/include/PxPvdRenderBuffer.h @@ -0,0 +1,140 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PXPVDSDK_PXPVDRENDERBUFFER_H +#define PXPVDSDK_PXPVDRENDERBUFFER_H + +/** \addtogroup pvd +@{ +*/ + +#include "foundation/PxVec3.h" + +#if !PX_DOXYGEN +namespace physx +{ +namespace pvdsdk +{ +#endif + +/** +\brief Default color values used for debug rendering. +*/ +struct PvdDebugColor +{ + enum Enum + { + eARGB_BLACK = 0xff000000, + eARGB_RED = 0xffff0000, + eARGB_GREEN = 0xff00ff00, + eARGB_BLUE = 0xff0000ff, + eARGB_YELLOW = 0xffffff00, + eARGB_MAGENTA = 0xffff00ff, + eARGB_CYAN = 0xff00ffff, + eARGB_WHITE = 0xffffffff, + eARGB_GREY = 0xff808080, + eARGB_DARKRED = 0x88880000, + eARGB_DARKGREEN = 0x88008800, + eARGB_DARKBLUE = 0x88000088 + }; +}; + +/** +\brief Used to store a single point and colour for debug rendering. +*/ +struct PvdDebugPoint +{ + PvdDebugPoint(const PxVec3& p, const uint32_t& c) : pos(p), color(c) + { + } + + PxVec3 pos; + uint32_t color; +}; + +/** +\brief Used to store a single line and colour for debug rendering. +*/ +struct PvdDebugLine +{ + PvdDebugLine(const PxVec3& p0, const PxVec3& p1, const uint32_t& c) : pos0(p0), color0(c), pos1(p1), color1(c) + { + } + + PxVec3 pos0; + uint32_t color0; + PxVec3 pos1; + uint32_t color1; +}; + +/** +\brief Used to store a single triangle and colour for debug rendering. +*/ +struct PvdDebugTriangle +{ + PvdDebugTriangle(const PxVec3& p0, const PxVec3& p1, const PxVec3& p2, const uint32_t& c) + : pos0(p0), color0(c), pos1(p1), color1(c), pos2(p2), color2(c) + { + } + + PxVec3 pos0; + uint32_t color0; + PxVec3 pos1; + uint32_t color1; + PxVec3 pos2; + uint32_t color2; +}; + +/** +\brief Used to store a text for debug rendering. Doesn't own 'string' array. +*/ +struct PvdDebugText +{ + PvdDebugText() : string(0) + { + } + + PvdDebugText(const PxVec3& p, const float& s, const uint32_t& c, const char* str) + : position(p), size(s), color(c), string(str) + { + } + + PxVec3 position; + float size; + uint32_t color; + const char* string; +}; + +#if !PX_DOXYGEN +} +} // namespace physx +#endif + +/** @} */ +#endif // PXPVDSDK_PXPVDRENDERBUFFER_H diff --git a/PxShared/src/pvd/include/PxPvdUserRenderer.h b/PxShared/src/pvd/include/PxPvdUserRenderer.h new file mode 100644 index 0000000..ac6f26d --- /dev/null +++ b/PxShared/src/pvd/include/PxPvdUserRenderer.h @@ -0,0 +1,107 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +#ifndef PXPVDSDK_PXPVDUSERRENDERER_H +#define PXPVDSDK_PXPVDUSERRENDERER_H + +/** \addtogroup pvd +@{ +*/ +#include "foundation/PxVec3.h" +#include "foundation/PxTransform.h" +#include "pvd/PxPvd.h" + +#include "PxPvdDataStream.h" +#include "PxPvdRenderBuffer.h" +#include "PsUserAllocated.h" + +#if !PX_DOXYGEN +namespace physx +{ +#endif + +class PxPvd; + +#if !PX_DOXYGEN +namespace pvdsdk +{ +#endif + +class RendererEventClient; + +class PvdUserRenderer : public shdfnd::UserAllocated +{ + protected: + virtual ~PvdUserRenderer() + { + } + + public: + virtual void release() = 0; + virtual void setClient(RendererEventClient* client) = 0; + + // Instance to associate the further rendering with. + virtual void setInstanceId(const void* instanceId) = 0; + // Draw these points associated with this instance + virtual void drawPoints(const PvdDebugPoint* points, uint32_t count) = 0; + // Draw these lines associated with this instance + virtual void drawLines(const PvdDebugLine* lines, uint32_t count) = 0; + // Draw these triangles associated with this instance + virtual void drawTriangles(const PvdDebugTriangle* triangles, uint32_t count) = 0; + // Draw this text associated with this instance + virtual void drawText(const PvdDebugText& text) = 0; + + // Draw SDK debug render + virtual void drawRenderbuffer(const PvdDebugPoint* pointData, uint32_t pointCount, const PvdDebugLine* lineData, + uint32_t lineCount, const PvdDebugTriangle* triangleData, uint32_t triangleCount) = 0; + + // Constraint visualization routines + virtual void visualizeJointFrames(const PxTransform& parent, const PxTransform& child) = 0; + virtual void visualizeLinearLimit(const PxTransform& t0, const PxTransform& t1, float value, bool active) = 0; + virtual void visualizeAngularLimit(const PxTransform& t0, float lower, float upper, bool active) = 0; + virtual void visualizeLimitCone(const PxTransform& t, float ySwing, float zSwing, bool active) = 0; + virtual void visualizeDoubleCone(const PxTransform& t, float angle, bool active) = 0; + + // Clear the immedate buffer. + virtual void flushRenderEvents() = 0; + + PX_PVDSDK_API static PvdUserRenderer* create(uint32_t bufferSize = 0x2000); +}; + +class RendererEventClient +{ + public: + virtual ~RendererEventClient(){} + + virtual void handleBufferFlush(const uint8_t* inData, uint32_t inLength) = 0; +}; + +#if !PX_DOXYGEN +} +} +#endif +/** @} */ +#endif // PXPVDSDK_PXPVDUSERRENDERER_H diff --git a/PxShared/src/pvd/src/PxProfileBase.h b/PxShared/src/pvd/src/PxProfileBase.h new file mode 100644 index 0000000..52918a1 --- /dev/null +++ b/PxShared/src/pvd/src/PxProfileBase.h @@ -0,0 +1,35 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PXPVDSDK_PXPROFILEBASE_H +#define PXPVDSDK_PXPROFILEBASE_H + +#include "foundation/PxSimpleTypes.h" + +#define PX_PROFILE_POINTER_TO_U64( pointer ) static_cast<uint64_t>(reinterpret_cast<size_t>(pointer)) + +#endif // PXPVDSDK_PXPROFILEBASE_H diff --git a/PxShared/src/pvd/src/PxProfileCompileTimeEventFilter.h b/PxShared/src/pvd/src/PxProfileCompileTimeEventFilter.h new file mode 100644 index 0000000..428374e --- /dev/null +++ b/PxShared/src/pvd/src/PxProfileCompileTimeEventFilter.h @@ -0,0 +1,75 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PXPVDSDK_PXPROFILECOMPILETIMEEVENTFILTER_H +#define PXPVDSDK_PXPROFILECOMPILETIMEEVENTFILTER_H + +#include "PxProfileBase.h" +#include "PxProfileEventId.h" + +//Define before including header in order to enable a different +//compile time event profile threshold. +#ifndef PX_PROFILE_EVENT_PROFILE_THRESHOLD +#define PX_PROFILE_EVENT_PROFILE_THRESHOLD EventPriorities::Medium +#endif + +namespace physx { namespace profile { + + /** + \brief Profile event priorities. Used to filter out events. + */ + struct EventPriorities + { + enum Enum + { + None, // the filter setting to kill all events + Coarse, + Medium, + Detail, + Never // the priority to set for an event if it should never fire. + }; + }; + + /** + \brief Gets the priority for a given event. + Specialize this object in order to get the priorities setup correctly. + */ + template<uint16_t TEventId> + struct EventPriority { static const uint32_t val = EventPriorities::Medium; }; + + /** + \brief Filter events by given event priority and set threshold. + */ + template<uint16_t TEventId> + struct EventFilter + { + static const bool val = EventPriority<TEventId>::val <= PX_PROFILE_EVENT_PROFILE_THRESHOLD; + }; + +}} + +#endif // PXPVDSDK_PXPROFILECOMPILETIMEEVENTFILTER_H diff --git a/PxShared/src/pvd/src/PxProfileContextProvider.h b/PxShared/src/pvd/src/PxProfileContextProvider.h new file mode 100644 index 0000000..44f6b94 --- /dev/null +++ b/PxShared/src/pvd/src/PxProfileContextProvider.h @@ -0,0 +1,98 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PXPVDSDK_PXPROFILECONTEXTPROVIDER_H +#define PXPVDSDK_PXPROFILECONTEXTPROVIDER_H + +#include "PxProfileBase.h" + +namespace physx { namespace profile { + + struct PxProfileEventExecutionContext + { + uint32_t mThreadId; + uint8_t mCpuId; + uint8_t mThreadPriority; + + PxProfileEventExecutionContext( uint32_t inThreadId = 0, uint8_t inThreadPriority = 2 /*eThreadPriorityNormal*/, uint8_t inCpuId = 0 ) + : mThreadId( inThreadId ) + , mCpuId( inCpuId ) + , mThreadPriority( inThreadPriority ) + { + } + + bool operator==( const PxProfileEventExecutionContext& inOther ) const + { + return mThreadId == inOther.mThreadId + && mCpuId == inOther.mCpuId + && mThreadPriority == inOther.mThreadPriority; + } + }; + + //Provides the context in which the event is happening. + class PxProfileContextProvider + { + protected: + virtual ~PxProfileContextProvider(){} + public: + virtual PxProfileEventExecutionContext getExecutionContext() = 0; + virtual uint32_t getThreadId() = 0; + }; + //Provides pre-packaged context. + struct PxProfileTrivialContextProvider + { + PxProfileEventExecutionContext mContext; + PxProfileTrivialContextProvider( PxProfileEventExecutionContext inContext = PxProfileEventExecutionContext() ) + : mContext( inContext ) + { + } + PxProfileEventExecutionContext getExecutionContext() { return mContext; } + uint32_t getThreadId() { return mContext.mThreadId; } + }; + + //Forwards the get context calls to another (perhaps shared) context. + template<typename TProviderType> + struct PxProfileContextProviderForward + { + TProviderType* mProvider; + PxProfileContextProviderForward( TProviderType* inProvider ) : mProvider( inProvider ) {} + PxProfileEventExecutionContext getExecutionContext() { return mProvider->getExecutionContext(); } + uint32_t getThreadId() { return mProvider->getThreadId(); } + }; + + template<typename TProviderType> + struct PxProfileContextProviderImpl : public PxProfileContextProvider + { + PxProfileContextProviderForward<TProviderType> mContext; + PxProfileContextProviderImpl( TProviderType* inP ) : mContext( inP ) {} + PxProfileEventExecutionContext getExecutionContext() { return mContext.getExecutionContext(); } + uint32_t getThreadId() { return mContext.getThreadId(); } + }; + +} } + +#endif // PXPVDSDK_PXPROFILECONTEXTPROVIDER_H diff --git a/PxShared/src/pvd/src/PxProfileContextProviderImpl.h b/PxShared/src/pvd/src/PxProfileContextProviderImpl.h new file mode 100644 index 0000000..bc5f09f --- /dev/null +++ b/PxShared/src/pvd/src/PxProfileContextProviderImpl.h @@ -0,0 +1,52 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PXPVDSDK_PXPROFILECONTEXTPROVIDERIMPL_H +#define PXPVDSDK_PXPROFILECONTEXTPROVIDERIMPL_H + +#include "PxProfileContextProvider.h" + +#include "PsThread.h" + +namespace physx { namespace profile { + + struct PxDefaultContextProvider + { + PxProfileEventExecutionContext getExecutionContext() + { + shdfnd::Thread::Id theId( shdfnd::Thread::getId() ); + return PxProfileEventExecutionContext( static_cast<uint32_t>( theId ), static_cast<uint8_t>( shdfnd::ThreadPriority::eNORMAL ), 0 ); + } + + uint32_t getThreadId() + { + return static_cast<uint32_t>( shdfnd::Thread::getId() ); + } + }; +} } + +#endif // PXPVDSDK_PXPROFILECONTEXTPROVIDERIMPL_H diff --git a/PxShared/src/pvd/src/PxProfileDataBuffer.h b/PxShared/src/pvd/src/PxProfileDataBuffer.h new file mode 100644 index 0000000..2191519 --- /dev/null +++ b/PxShared/src/pvd/src/PxProfileDataBuffer.h @@ -0,0 +1,167 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef PXPVDSDK_PXPROFILEDATABUFFER_H +#define PXPVDSDK_PXPROFILEDATABUFFER_H + +#include "PxProfileAllocatorWrapper.h" +#include "PxProfileMemoryBuffer.h" +#include "PxProfileEventBufferClient.h" + +namespace physx { namespace profile { + + template<typename TMutex + , typename TScopedLock> + class DataBuffer //base class for buffers that cache data and then dump the data to clients. + { + public: + typedef TMutex TMutexType; + typedef TScopedLock TScopedLockType; + typedef PxProfileWrapperNamedAllocator TU8AllocatorType; + + typedef MemoryBuffer<TU8AllocatorType > TMemoryBufferType; + typedef PxProfileArray<PxProfileEventBufferClient*> TBufferClientArray; + + protected: + + PxProfileAllocatorWrapper mWrapper; + TMemoryBufferType mDataArray; + TBufferClientArray mClients; + uint32_t mBufferFullAmount; + EventContextInformation mEventContextInformation; + TMutexType* mBufferMutex; + volatile bool mHasClients; + EventSerializer<TMemoryBufferType > mSerializer; + + public: + + DataBuffer( PxAllocatorCallback* inFoundation + , uint32_t inBufferFullAmount + , TMutexType* inBufferMutex + , const char* inAllocationName ) + : mWrapper( inFoundation ) + , mDataArray( TU8AllocatorType( mWrapper, inAllocationName ) ) + , mClients( mWrapper ) + , mBufferFullAmount( inBufferFullAmount ) + , mBufferMutex( inBufferMutex ) + , mHasClients( false ) + , mSerializer( &mDataArray ) + { + //The data array is never resized really. We ensure + //it is bigger than it will ever need to be. + mDataArray.reserve( inBufferFullAmount + 68 ); + } + + virtual ~DataBuffer() + { + while( mClients.size() ) + { + removeClient( *mClients[0] ); + } + } + + PxProfileAllocatorWrapper& getWrapper() { return mWrapper; } + TMutexType* getBufferMutex() { return mBufferMutex; } + void setBufferMutex(TMutexType* mutex) { mBufferMutex = mutex; } + + void addClient( PxProfileEventBufferClient& inClient ) + { + TScopedLockType lock( mBufferMutex ); + mClients.pushBack( &inClient ); + mHasClients = true; + } + + void removeClient( PxProfileEventBufferClient& inClient ) + { + TScopedLockType lock( mBufferMutex ); + for ( uint32_t idx =0; idx < mClients.size(); ++idx ) + { + if ( mClients[idx] == &inClient ) + { + inClient.handleClientRemoved(); + mClients.replaceWithLast( idx ); + break; + } + } + mHasClients = mClients.size() != 0; + } + + + bool hasClients() const + { + return mHasClients; + } + + virtual void flushEvents() + { + TScopedLockType lock(mBufferMutex); + const uint8_t* theData = mDataArray.begin(); + uint32_t theDataSize = mDataArray.size(); + sendDataToClients(theData, theDataSize); + mDataArray.clear(); + clearCachedData(); + } + + //Used for chaining together event buffers. + virtual void handleBufferFlush( const uint8_t* inData, uint32_t inDataSize ) + { + TScopedLockType lock( mBufferMutex ); + if ( inData && inDataSize ) + { + clearCachedData(); + if ( mDataArray.size() + inDataSize >= mBufferFullAmount ) + flushEvents(); + if ( inDataSize >= mBufferFullAmount ) + sendDataToClients( inData, inDataSize ); + else + mDataArray.write( inData, inDataSize ); + } + } + + protected: + virtual void clearCachedData() + { + } + + private: + + void sendDataToClients( const uint8_t* inData, uint32_t inDataSize ) + { + uint32_t clientCount = mClients.size(); + for( uint32_t idx =0; idx < clientCount; ++idx ) + mClients[idx]->handleBufferFlush( inData, inDataSize ); + } + + }; + +}} + + +#endif // PXPVDSDK_PXPROFILEDATABUFFER_H diff --git a/PxShared/src/pvd/src/PxProfileDataParsing.h b/PxShared/src/pvd/src/PxProfileDataParsing.h new file mode 100644 index 0000000..e7fa0df --- /dev/null +++ b/PxShared/src/pvd/src/PxProfileDataParsing.h @@ -0,0 +1,218 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef PXPVDSDK_PXPROFILEDATAPARSING_H +#define PXPVDSDK_PXPROFILEDATAPARSING_H + +#include "PxProfileBase.h" + +namespace physx { namespace profile { + + //Converts datatypes without using type punning. + struct BlockParserDataConverter + { + union + { + uint8_t mU8[8]; + uint16_t mU16[4]; + uint32_t mU32[2]; + uint64_t mU64[1]; + + int8_t mI8[8]; + int16_t mI16[4]; + int32_t mI32[2]; + int64_t mI64[1]; + + + float mF32[2]; + double mF64[1]; + }; + + template<typename TDataType> inline TDataType convert() { PX_ASSERT( false ); return TDataType(); } + + template<typename TDataType> + inline void convert( const TDataType& ) {} + }; + + template<> inline uint8_t BlockParserDataConverter::convert<uint8_t>() { return mU8[0]; } + template<> inline uint16_t BlockParserDataConverter::convert<uint16_t>() { return mU16[0]; } + template<> inline uint32_t BlockParserDataConverter::convert<uint32_t>() { return mU32[0]; } + template<> inline uint64_t BlockParserDataConverter::convert<uint64_t>() { return mU64[0]; } + template<> inline int8_t BlockParserDataConverter::convert<int8_t>() { return mI8[0]; } + template<> inline int16_t BlockParserDataConverter::convert<int16_t>() { return mI16[0]; } + template<> inline int32_t BlockParserDataConverter::convert<int32_t>() { return mI32[0]; } + template<> inline int64_t BlockParserDataConverter::convert<int64_t>() { return mI64[0]; } + template<> inline float BlockParserDataConverter::convert<float>() { return mF32[0]; } + template<> inline double BlockParserDataConverter::convert<double>() { return mF64[0]; } + + template<> inline void BlockParserDataConverter::convert<uint8_t>( const uint8_t& inData ) { mU8[0] = inData; } + template<> inline void BlockParserDataConverter::convert<uint16_t>( const uint16_t& inData ) { mU16[0] = inData; } + template<> inline void BlockParserDataConverter::convert<uint32_t>( const uint32_t& inData ) { mU32[0] = inData; } + template<> inline void BlockParserDataConverter::convert<uint64_t>( const uint64_t& inData ) { mU64[0] = inData; } + template<> inline void BlockParserDataConverter::convert<int8_t>( const int8_t& inData ) { mI8[0] = inData; } + template<> inline void BlockParserDataConverter::convert<int16_t>( const int16_t& inData ) { mI16[0] = inData; } + template<> inline void BlockParserDataConverter::convert<int32_t>( const int32_t& inData ) { mI32[0] = inData; } + template<> inline void BlockParserDataConverter::convert<int64_t>( const int64_t& inData ) { mI64[0] = inData; } + template<> inline void BlockParserDataConverter::convert<float>( const float& inData ) { mF32[0] = inData; } + template<> inline void BlockParserDataConverter::convert<double>( const double& inData ) { mF64[0] = inData; } + + + //Handles various details around parsing blocks of uint8_t data. + struct BlockParseFunctions + { + template<uint8_t ByteCount> + static inline void swapBytes( uint8_t* inData ) + { + for ( uint32_t idx = 0; idx < ByteCount/2; ++idx ) + { + uint32_t endIdx = ByteCount-idx-1; + uint8_t theTemp = inData[idx]; + inData[idx] = inData[endIdx]; + inData[endIdx] = theTemp; + } + } + + static inline bool checkLength( const uint8_t* inStart, const uint8_t* inStop, uint32_t inLength ) + { + return static_cast<uint32_t>(inStop - inStart) >= inLength; + } + //warning work-around + template<typename T> + static inline T val(T v) {return v;} + + template<bool DoSwapBytes, typename TDataType> + static inline bool parse( const uint8_t*& inStart, const uint8_t* inStop, TDataType& outData ) + { + if ( checkLength( inStart, inStop, sizeof( TDataType ) ) ) + { + BlockParserDataConverter theConverter; + for ( uint32_t idx =0; idx < sizeof( TDataType ); ++idx ) + theConverter.mU8[idx] = inStart[idx]; + if ( val(DoSwapBytes)) + swapBytes<sizeof(TDataType)>( theConverter.mU8 ); + outData = theConverter.convert<TDataType>(); + inStart += sizeof( TDataType ); + return true; + } + return false; + } + + template<bool DoSwapBytes, typename TDataType> + static inline bool parseBlock( const uint8_t*& inStart, const uint8_t* inStop, TDataType* outData, uint32_t inNumItems ) + { + uint32_t desired = sizeof(TDataType)*inNumItems; + if ( checkLength( inStart, inStop, desired ) ) + { + if ( val(DoSwapBytes) ) + { + for ( uint32_t item = 0; item < inNumItems; ++item ) + { + BlockParserDataConverter theConverter; + for ( uint32_t idx =0; idx < sizeof( TDataType ); ++idx ) + theConverter.mU8[idx] = inStart[idx]; + swapBytes<sizeof(TDataType)>( theConverter.mU8 ); + outData[item] = theConverter.convert<TDataType>(); + inStart += sizeof(TDataType); + } + } + else + { + uint8_t* target = reinterpret_cast<uint8_t*>(outData); + memmove( target, inStart, desired ); + inStart += desired; + } + return true; + } + return false; + } + + //In-place byte swapping block + template<bool DoSwapBytes, typename TDataType> + static inline bool parseBlock( uint8_t*& inStart, const uint8_t* inStop, uint32_t inNumItems ) + { + uint32_t desired = sizeof(TDataType)*inNumItems; + if ( checkLength( inStart, inStop, desired ) ) + { + if ( val(DoSwapBytes) ) + { + for ( uint32_t item = 0; item < inNumItems; ++item, inStart += sizeof( TDataType ) ) + swapBytes<sizeof(TDataType)>( inStart ); //In-place swap. + } + else + inStart += sizeof( TDataType ) * inNumItems; + return true; + } + return false; + } + }; + + //Wraps the begin/end keeping track of them. + template<bool DoSwapBytes> + struct BlockParser + { + const uint8_t* mBegin; + const uint8_t* mEnd; + BlockParser( const uint8_t* inBegin=NULL, const uint8_t* inEnd=NULL ) + : mBegin( inBegin ) + , mEnd( inEnd ) + { + } + inline bool hasMoreData() const { return mBegin != mEnd; } + inline bool checkLength( uint32_t inLength ) { return BlockParseFunctions::checkLength( mBegin, mEnd, inLength ); } + + template<typename TDataType> + inline bool read( TDataType& outDatatype ) { return BlockParseFunctions::parse<DoSwapBytes>( mBegin, mEnd, outDatatype ); } + + template<typename TDataType> + inline bool readBlock( TDataType* outDataPtr, uint32_t inNumItems ) { return BlockParseFunctions::parseBlock<DoSwapBytes>( mBegin, mEnd, outDataPtr, inNumItems ); } + + template<typename TDataType> + inline bool readBlock( uint32_t inNumItems ) + { + uint8_t* theTempPtr = const_cast<uint8_t*>(mBegin); + bool retval = BlockParseFunctions::parseBlock<DoSwapBytes, TDataType>( theTempPtr, mEnd, inNumItems ); + mBegin = theTempPtr; + return retval; + } + + uint32_t amountLeft() const { return static_cast<uint32_t>( mEnd - mBegin ); } + }; + + //Reads the data without checking for error conditions + template<typename TDataType, typename TBlockParserType> + inline TDataType blockParserRead( TBlockParserType& inType ) + { + TDataType retval; + inType.read( retval ); + return retval; + } +}} + +#endif // PXPVDSDK_PXPROFILEDATAPARSING_H diff --git a/PxShared/src/pvd/src/PxProfileEventBuffer.h b/PxShared/src/pvd/src/PxProfileEventBuffer.h new file mode 100644 index 0000000..213d980 --- /dev/null +++ b/PxShared/src/pvd/src/PxProfileEventBuffer.h @@ -0,0 +1,270 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef PXPVDSDK_PXPROFILEEVENTBUFFER_H +#define PXPVDSDK_PXPROFILEEVENTBUFFER_H + +#include "PxProfileEvents.h" +#include "PxProfileEventSerialization.h" +#include "PxProfileEventSystem.h" +#include "PxProfileDataBuffer.h" +#include "PxProfileContextProvider.h" + +#include "PsArray.h" +#include "PsTime.h" +#include "PsCpu.h" + +namespace physx { namespace profile { + + /** + * An event buffer maintains an in-memory buffer of events. When this buffer is full + * it sends to buffer to all handlers registered and resets the buffer. + * + * It is parameterized in four ways. The first is a context provider that provides + * both thread id and context id. + * + * The second is the mutex (which may be null) and a scoped locking mechanism. Thus the buffer + * may be used in a multithreaded context but clients of the buffer don't pay for this if they + * don't intend to use it this way. + * + * Finally the buffer may use an event filtering mechanism. This mechanism needs one function, + * namely isEventEnabled( uint8_t subsystem, uint8_t eventId ). + * + * All of these systems can be parameterized at compile time leading to an event buffer + * that should be as fast as possible given the constraints. + * + * Buffers may be chained together as this buffer has a handleBufferFlush method that + * will grab the mutex and add the data to this event buffer. + * + * Overall, lets look at the PhysX SDK an how all the pieces fit together. + * The SDK should have a mutex-protected event buffer where actual devs or users of PhysX + * can register handlers. This buffer has slow but correct implementations of the + * context provider interface. + * + * The SDK object should also have a concrete event filter which was used in the + * construction of the event buffer and which it exposes through opaque interfaces. + * + * The SDK should protect its event buffer and its event filter from multithreaded + * access and thus this provides the safest and slowest way to log events and to + * enable/disable events. + * + * Each scene should also have a concrete event filter. This filter is updated from + * the SDK event filter (in a mutex protected way) every frame. Thus scenes can change + * their event filtering on a frame-by-frame basis. It means that tasks running + * under the scene don't need a mutex when accessing the filter. + * + * Furthermore the scene should have an event buffer that always sets the context id + * on each event to the scene. This allows PVD and other systems to correlate events + * to scenes. Scenes should provide access only to a relative event sending system + * that looks up thread id upon each event but uses the scene id. + * + * The SDK's event buffer should be setup as an EventBufferClient for each scene's + * event buffer. Thus the SDK should expose an EventBufferClient interface that + * any client can use. + * + * For extremely *extremely* performance sensitive areas we should create a specialized + * per-scene, per-thread event buffer that is set on the task for these occasions. This buffer + * uses a trivial event context setup with the scene's context id and the thread id. It should + * share the scene's concrete event filter and it should have absolutely no locking. It should + * empty into the scene's event buffer which in some cases should empty into the SDK's event buffer + * which when full will push events all the way out of the system. The task should *always* flush + * the event buffer (if it has one) when it is finished; nothing else will work reliably. + * + * If the per-scene,per-thread event buffer is correctly parameterized and fully defined adding + * a new event should be an inline operation requiring no mutex grabs in the common case. I don't + * believe you can get faster event production than this; the events are as small as possible (all + * relative events) and they are all produced inline resulting in one 4 byte header and one + * 8 byte timestamp per event. Reducing the memory pressure in this way reduces the communication + * overhead, the mutex grabs, basically everything that makes profiling expensive at the cost + * of a per-scene,per-thread event buffer (which could easily be reduced to a per-thread event + * buffer. + */ + template<typename TContextProvider, + typename TMutex, + typename TScopedLock, + typename TEventFilter> + class EventBuffer : public DataBuffer<TMutex, TScopedLock> + { + public: + typedef DataBuffer<TMutex, TScopedLock> TBaseType; + typedef TContextProvider TContextProviderType; + typedef TEventFilter TEventFilterType; + typedef typename TBaseType::TMutexType TMutexType; + typedef typename TBaseType::TScopedLockType TScopedLockType; + typedef typename TBaseType::TU8AllocatorType TU8AllocatorType; + typedef typename TBaseType::TMemoryBufferType TMemoryBufferType; + typedef typename TBaseType::TBufferClientArray TBufferClientArray; + + private: + EventContextInformation mEventContextInformation; + uint64_t mLastTimestamp; + TContextProvider mContextProvider; + TEventFilterType mEventFilter; + + public: + EventBuffer(PxAllocatorCallback* inFoundation + , uint32_t inBufferFullAmount + , const TContextProvider& inProvider + , TMutexType* inBufferMutex + , const TEventFilterType& inEventFilter ) + : TBaseType( inFoundation, inBufferFullAmount, inBufferMutex, "struct physx::profile::ProfileEvent" ) + , mLastTimestamp( 0 ) + , mContextProvider( inProvider ) + , mEventFilter( inEventFilter ) + { + memset(&mEventContextInformation,0,sizeof(EventContextInformation)); + } + + TContextProvider& getContextProvider() { return mContextProvider; } + + PX_FORCE_INLINE void startEvent(uint16_t inId, uint32_t threadId, uint64_t contextId, uint8_t cpuId, uint8_t threadPriority, uint64_t inTimestamp) + { + TScopedLockType lock(TBaseType::mBufferMutex); + if ( mEventFilter.isEventEnabled( inId ) ) + { + StartEvent theEvent; + theEvent.init( threadId, contextId, cpuId, threadPriority, inTimestamp ); + doAddProfileEvent( inId, theEvent ); + } + } + + PX_FORCE_INLINE void startEvent(uint16_t inId, uint64_t contextId) + { + PxProfileEventExecutionContext ctx( mContextProvider.getExecutionContext() ); + startEvent( inId, ctx.mThreadId, contextId, ctx.mCpuId, static_cast<uint8_t>(ctx.mThreadPriority), shdfnd::Time::getCurrentCounterValue() ); + } + + PX_FORCE_INLINE void startEvent(uint16_t inId, uint64_t contextId, uint32_t threadId) + { + startEvent( inId, threadId, contextId, 0, 0, shdfnd::Time::getCurrentCounterValue() ); + } + + PX_FORCE_INLINE void stopEvent(uint16_t inId, uint32_t threadId, uint64_t contextId, uint8_t cpuId, uint8_t threadPriority, uint64_t inTimestamp) + { + TScopedLockType lock(TBaseType::mBufferMutex); + if ( mEventFilter.isEventEnabled( inId ) ) + { + StopEvent theEvent; + theEvent.init( threadId, contextId, cpuId, threadPriority, inTimestamp ); + doAddProfileEvent( inId, theEvent ); + } + } + + PX_FORCE_INLINE void stopEvent(uint16_t inId, uint64_t contextId) + { + PxProfileEventExecutionContext ctx( mContextProvider.getExecutionContext() ); + stopEvent( inId, ctx.mThreadId, contextId, ctx.mCpuId, static_cast<uint8_t>(ctx.mThreadPriority), shdfnd::Time::getCurrentCounterValue() ); + } + + PX_FORCE_INLINE void stopEvent(uint16_t inId, uint64_t contextId, uint32_t threadId) + { + stopEvent( inId, threadId, contextId, 0, 0, shdfnd::Time::getCurrentCounterValue() ); + } + + inline void eventValue( uint16_t inId, uint64_t contextId, int64_t inValue ) + { + eventValue( inId, mContextProvider.getThreadId(), contextId, inValue ); + } + + inline void eventValue( uint16_t inId, uint32_t threadId, uint64_t contextId, int64_t inValue ) + { + TScopedLockType lock( TBaseType::mBufferMutex ); + EventValue theEvent; + theEvent.init( inValue, contextId, threadId ); + EventHeader theHeader( static_cast<uint8_t>( getEventType<EventValue>() ), inId ); + //set the header relative timestamp; + EventValue& theType( theEvent ); + theType.setupHeader( theHeader ); + sendEvent( theHeader, theType ); + } + + void flushProfileEvents() + { + TBaseType::flushEvents(); + } + + void release() + { + PX_PROFILE_DELETE( TBaseType::mWrapper.mUserFoundation, this ); + } + protected: + //Clears the cache meaning event compression + //starts over again. + //only called when the buffer mutex is held + void clearCachedData() + { + mEventContextInformation.setToDefault(); + mLastTimestamp = 0; + } + + template<typename TProfileEventType> + PX_FORCE_INLINE void doAddProfileEvent(uint16_t eventId, const TProfileEventType& inType) + { + TScopedLockType lock(TBaseType::mBufferMutex); + if (mEventContextInformation == inType.mContextInformation) + doAddEvent(static_cast<uint8_t>(inType.getRelativeEventType()), eventId, inType.getRelativeEvent()); + else + { + mEventContextInformation = inType.mContextInformation; + doAddEvent( static_cast<uint8_t>( getEventType<TProfileEventType>() ), eventId, inType ); + } + } + + template<typename TDataType> + PX_FORCE_INLINE void doAddEvent(uint8_t inEventType, uint16_t eventId, const TDataType& inType) + { + EventHeader theHeader( inEventType, eventId ); + //set the header relative timestamp; + TDataType& theType( const_cast<TDataType&>( inType ) ); + uint64_t currentTs = inType.getTimestamp(); + theType.setupHeader(theHeader, mLastTimestamp); + mLastTimestamp = currentTs; + sendEvent( theHeader, theType ); + } + + template<typename TDataType> + PX_FORCE_INLINE void sendEvent( EventHeader& inHeader, TDataType& inType ) + { + uint32_t sizeToWrite = sizeof(inHeader) + inType.getEventSize(inHeader); + PX_UNUSED(sizeToWrite); + + uint32_t writtenSize = inHeader.streamify( TBaseType::mSerializer ); + writtenSize += inType.streamify(TBaseType::mSerializer, inHeader); + + PX_ASSERT(writtenSize == sizeToWrite); + + if ( TBaseType::mDataArray.size() >= TBaseType::mBufferFullAmount ) + flushProfileEvents(); + + } + + }; +}} +#endif // PXPVDSDK_PXPROFILEEVENTBUFFER_H diff --git a/PxShared/src/pvd/src/PxProfileEventBufferAtomic.h b/PxShared/src/pvd/src/PxProfileEventBufferAtomic.h new file mode 100644 index 0000000..f87839f --- /dev/null +++ b/PxShared/src/pvd/src/PxProfileEventBufferAtomic.h @@ -0,0 +1,320 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef PXPVDSDK_PXPROFILEEVENTBUFFERATOMIC_H +#define PXPVDSDK_PXPROFILEEVENTBUFFERATOMIC_H + +#include "PxProfileEvents.h" +#include "PxProfileEventSerialization.h" +#include "PxProfileEventSystem.h" +#include "PxProfileDataBuffer.h" +#include "PxProfileContextProvider.h" + +#include "PsArray.h" +#include "PsAlloca.h" +#include "PsTime.h" +#include "PsCpu.h" +#include "PsAtomic.h" +#include "PsAllocator.h" + + +namespace physx { + namespace profile { + + static const uint32_t LOCAL_BUFFER_SIZE = 512; + + /** + * An event buffer maintains an in-memory buffer of events. When this buffer is full + * it sends to buffer to all handlers registered and resets the buffer. + * + * It is parameterized in four ways. The first is a context provider that provides + * both thread id and context id. + * + * The second is the mutex (which may be null) and a scoped locking mechanism. Thus the buffer + * may be used in a multithreaded context but clients of the buffer don't pay for this if they + * don't intend to use it this way. + * + * Finally the buffer may use an event filtering mechanism. This mechanism needs one function, + * namely isEventEnabled( uint8_t subsystem, uint8_t eventId ). + * + * All of these systems can be parameterized at compile time leading to an event buffer + * that should be as fast as possible given the constraints. + * + * Buffers may be chained together as this buffer has a handleBufferFlush method that + * will grab the mutex and add the data to this event buffer. + * + * Overall, lets look at the PhysX SDK an how all the pieces fit together. + * The SDK should have a mutex-protected event buffer where actual devs or users of PhysX + * can register handlers. This buffer has slow but correct implementations of the + * context provider interface. + * + * The SDK object should also have a concrete event filter which was used in the + * construction of the event buffer and which it exposes through opaque interfaces. + * + * The SDK should protect its event buffer and its event filter from multithreaded + * access and thus this provides the safest and slowest way to log events and to + * enable/disable events. + * + * Each scene should also have a concrete event filter. This filter is updated from + * the SDK event filter (in a mutex protected way) every frame. Thus scenes can change + * their event filtering on a frame-by-frame basis. It means that tasks running + * under the scene don't need a mutex when accessing the filter. + * + * Furthermore the scene should have an event buffer that always sets the context id + * on each event to the scene. This allows PVD and other systems to correlate events + * to scenes. Scenes should provide access only to a relative event sending system + * that looks up thread id upon each event but uses the scene id. + * + * The SDK's event buffer should be setup as an EventBufferClient for each scene's + * event buffer. Thus the SDK should expose an EventBufferClient interface that + * any client can use. + * + * For extremely *extremely* performance sensitive areas we should create a specialized + * per-scene, per-thread event buffer that is set on the task for these occasions. This buffer + * uses a trivial event context setup with the scene's context id and the thread id. It should + * share the scene's concrete event filter and it should have absolutely no locking. It should + * empty into the scene's event buffer which in some cases should empty into the SDK's event buffer + * which when full will push events all the way out of the system. The task should *always* flush + * the event buffer (if it has one) when it is finished; nothing else will work reliably. + * + * If the per-scene,per-thread event buffer is correctly parameterized and fully defined adding + * a new event should be an inline operation requiring no mutex grabs in the common case. I don't + * believe you can get faster event production than this; the events are as small as possible (all + * relative events) and they are all produced inline resulting in one 4 byte header and one + * 8 byte timestamp per event. Reducing the memory pressure in this way reduces the communication + * overhead, the mutex grabs, basically everything that makes profiling expensive at the cost + * of a per-scene,per-thread event buffer (which could easily be reduced to a per-thread event + * buffer. + */ + template<typename TContextProvider, + typename TMutex, + typename TScopedLock, + typename TEventFilter> + class EventBufferAtomic : public DataBuffer < TMutex, TScopedLock > + { + public: + typedef DataBuffer<TMutex, TScopedLock> TBaseType; + typedef TContextProvider TContextProviderType; + typedef TEventFilter TEventFilterType; + typedef typename TBaseType::TMutexType TMutexType; + typedef typename TBaseType::TScopedLockType TScopedLockType; + typedef typename TBaseType::TU8AllocatorType TU8AllocatorType; + typedef typename TBaseType::TMemoryBufferType TMemoryBufferType; + typedef typename TBaseType::TBufferClientArray TBufferClientArray; + + private: + TContextProvider mContextProvider; + TEventFilterType mEventFilter; + volatile int32_t mReserved; + volatile int32_t mWritten; + + public: + EventBufferAtomic(PxAllocatorCallback* inFoundation + , uint32_t inBufferFullAmount + , const TContextProvider& inProvider + , TMutexType* inBufferMutex + , const TEventFilterType& inEventFilter) + : TBaseType(inFoundation, inBufferFullAmount, inBufferMutex, "struct physx::profile::ProfileEvent") + , mContextProvider(inProvider) + , mEventFilter(inEventFilter) + , mReserved(0) + , mWritten(0) + { + } + + TContextProvider& getContextProvider() { return mContextProvider; } + + PX_FORCE_INLINE void startEvent(uint16_t inId, uint32_t threadId, uint64_t contextId, uint8_t cpuId, uint8_t threadPriority, uint64_t inTimestamp) + { + if (mEventFilter.isEventEnabled(inId)) + { + StartEvent theEvent; + theEvent.init(threadId, contextId, cpuId, threadPriority, inTimestamp); + doAddProfileEvent(inId, theEvent); + } + } + + PX_FORCE_INLINE void startEvent(uint16_t inId, uint64_t contextId) + { + PxProfileEventExecutionContext ctx(mContextProvider.getExecutionContext()); + startEvent(inId, ctx.mThreadId, contextId, ctx.mCpuId, static_cast<uint8_t>(ctx.mThreadPriority), shdfnd::Time::getCurrentCounterValue()); + } + + PX_FORCE_INLINE void startEvent(uint16_t inId, uint64_t contextId, uint32_t threadId) + { + startEvent(inId, threadId, contextId, 0, 0, shdfnd::Time::getCurrentCounterValue()); + } + + PX_FORCE_INLINE void stopEvent(uint16_t inId, uint32_t threadId, uint64_t contextId, uint8_t cpuId, uint8_t threadPriority, uint64_t inTimestamp) + { + if (mEventFilter.isEventEnabled(inId)) + { + StopEvent theEvent; + theEvent.init(threadId, contextId, cpuId, threadPriority, inTimestamp); + doAddProfileEvent(inId, theEvent); + } + } + + PX_FORCE_INLINE void stopEvent(uint16_t inId, uint64_t contextId) + { + PxProfileEventExecutionContext ctx(mContextProvider.getExecutionContext()); + stopEvent(inId, ctx.mThreadId, contextId, ctx.mCpuId, static_cast<uint8_t>(ctx.mThreadPriority), shdfnd::Time::getCurrentCounterValue()); + } + + PX_FORCE_INLINE void stopEvent(uint16_t inId, uint64_t contextId, uint32_t threadId) + { + stopEvent(inId, threadId, contextId, 0, 0, shdfnd::Time::getCurrentCounterValue()); + } + + inline void eventValue(uint16_t inId, uint64_t contextId, int64_t inValue) + { + eventValue(inId, mContextProvider.getThreadId(), contextId, inValue); + } + + inline void eventValue(uint16_t inId, uint32_t threadId, uint64_t contextId, int64_t inValue) + { + EventValue theEvent; + theEvent.init(inValue, contextId, threadId); + EventHeader theHeader(static_cast<uint8_t>(getEventType<EventValue>()), inId); + //set the header relative timestamp; + EventValue& theType(theEvent); + theType.setupHeader(theHeader); + + int32_t sizeToWrite = int32_t(sizeof(theHeader) + theType.getEventSize(theHeader)); + int32_t reserved = shdfnd::atomicAdd(&mReserved, sizeToWrite); + sendEvent(theHeader, theType, reserved, sizeToWrite); + } + + void flushProfileEvents(int32_t reserved = -1) + { + TScopedLockType lock(TBaseType::mBufferMutex); + + // set the buffer full to lock additional writes + int32_t reservedOld = shdfnd::atomicExchange(&mReserved, int32_t(TBaseType::mBufferFullAmount + 1)); + if (reserved == -1) + reserved = reservedOld; + + // spin till we have written all the data + while (reserved > mWritten) + { + } + + // check if we have written all data + PX_ASSERT(reserved == mWritten); + + // set the correct size of the serialization data buffer + TBaseType::mSerializer.mArray->setEnd(TBaseType::mSerializer.mArray->begin() + mWritten); + + // flush events + TBaseType::flushEvents(); + + // write master timestamp and set reserved/written to start writing to buffer again + mWritten = 0; + mReserved = 0; + } + + void release() + { + PX_PROFILE_DELETE(TBaseType::mWrapper.mUserFoundation, this); + } + protected: + //Clears the cache meaning event compression + //starts over again. + //only called when the buffer mutex is held + void clearCachedData() + { + } + + template<typename TProfileEventType> + PX_FORCE_INLINE void doAddProfileEvent(uint16_t eventId, const TProfileEventType& inType) + { + doAddEvent(static_cast<uint8_t>(getEventType<TProfileEventType>()), eventId, inType); + } + + template<typename TDataType> + PX_FORCE_INLINE void doAddEvent(uint8_t inEventType, uint16_t eventId, const TDataType& inType) + { + EventHeader theHeader(inEventType, eventId); + TDataType& theType(const_cast<TDataType&>(inType)); + theType.setupHeader(theHeader, 0); + + const int32_t sizeToWrite = int32_t(sizeof(theHeader) + theType.getEventSize(theHeader)); + + int32_t reserved = shdfnd::atomicAdd(&mReserved, sizeToWrite); + sendEvent(theHeader, theType, reserved, sizeToWrite); + } + + template<typename TDataType> + PX_FORCE_INLINE void sendEvent(EventHeader& inHeader, TDataType& inType, int32_t reserved, int32_t sizeToWrite) + { + // if we don't fit to the buffer, we wait till it is flushed + if (reserved - sizeToWrite >= int32_t(TBaseType::mBufferFullAmount)) + { + while (reserved - sizeToWrite >= int32_t(TBaseType::mBufferFullAmount)) + { + // I32 overflow + if (mReserved < int32_t(TBaseType::mBufferFullAmount)) + { + reserved = shdfnd::atomicAdd(&mReserved, sizeToWrite); + } + } + } + + int32_t writeIndex = reserved - sizeToWrite; + uint32_t writtenSize = 0; + + PX_ASSERT(writeIndex >= 0); + + PX_ALLOCA(tempBuffer, uint8_t, sizeToWrite); + TempMemoryBuffer memoryBuffer(tempBuffer, sizeToWrite); + EventSerializer<TempMemoryBuffer> eventSerializer(&memoryBuffer); + + writtenSize = inHeader.streamify(eventSerializer); + writtenSize += inType.streamify(eventSerializer, inHeader); + + TBaseType::mSerializer.mArray->reserve(writeIndex + writtenSize); + TBaseType::mSerializer.mArray->write(&tempBuffer[0], writtenSize, writeIndex); + + PX_ASSERT(writtenSize == uint32_t(sizeToWrite)); + shdfnd::atomicAdd(&mWritten, sizeToWrite); + + if (reserved >= int32_t(TBaseType::mBufferFullAmount)) + { + TScopedLockType lock(TBaseType::mBufferMutex); + // we flush the buffer if its full and we did not flushed him in the meantime + if(mReserved >= reserved) + flushProfileEvents(reserved); + } + } + + }; + } +} +#endif // PXPVDSDK_PXPROFILEEVENTBUFFERATOMIC_H diff --git a/PxShared/src/pvd/src/PxProfileEventBufferClient.h b/PxShared/src/pvd/src/PxProfileEventBufferClient.h new file mode 100644 index 0000000..d8a1ff2 --- /dev/null +++ b/PxShared/src/pvd/src/PxProfileEventBufferClient.h @@ -0,0 +1,81 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PXPVDSDK_PXPROFILEEVENTBUFFERCLIENT_H +#define PXPVDSDK_PXPROFILEEVENTBUFFERCLIENT_H + +#include "PxProfileBase.h" +#include "PxProfileEventNames.h" + +namespace physx { namespace profile { + + /** + \brief Client handles the data when an event buffer flushes. This data + can be parsed (PxProfileEventHandler.h) as a binary set of events. + */ + class PxProfileEventBufferClient + { + protected: + virtual ~PxProfileEventBufferClient(){} + public: + /** + \brief Callback when the event buffer is full. This data is serialized profile events + and can be read back using: PxProfileEventHandler::parseEventBuffer. + + \param inData Provided buffer data. + \param inLength Data length. + + @see PxProfileEventHandler::parseEventBuffer. + */ + virtual void handleBufferFlush( const uint8_t* inData, uint32_t inLength ) = 0; + + /** + \brief Happens if something removes all the clients from the manager. + */ + virtual void handleClientRemoved() = 0; + }; + + /** + \brief Client handles new profile event add. + */ + class PxProfileZoneClient : public PxProfileEventBufferClient + { + protected: + virtual ~PxProfileZoneClient(){} + public: + /** + \brief Callback when new profile event is added. + + \param inName Added profile event name. + */ + virtual void handleEventAdded( const PxProfileEventName& inName ) = 0; + }; + +} } + + +#endif // PXPVDSDK_PXPROFILEEVENTBUFFERCLIENT_H diff --git a/PxShared/src/pvd/src/PxProfileEventBufferClientManager.h b/PxShared/src/pvd/src/PxProfileEventBufferClientManager.h new file mode 100644 index 0000000..1402183 --- /dev/null +++ b/PxShared/src/pvd/src/PxProfileEventBufferClientManager.h @@ -0,0 +1,94 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PXPVDSDK_PXPROFILEEVENTBUFFERCLIENTMANAGER_H +#define PXPVDSDK_PXPROFILEEVENTBUFFERCLIENTMANAGER_H + +#include "PxProfileEventBufferClient.h" + +namespace physx { namespace profile { + + /** + \brief Manager keep collections of PxProfileEventBufferClient clients. + + @see PxProfileEventBufferClient + */ + class PxProfileEventBufferClientManager + { + protected: + virtual ~PxProfileEventBufferClientManager(){} + public: + /** + \brief Adds new client. + \param inClient Client to add. + */ + virtual void addClient( PxProfileEventBufferClient& inClient ) = 0; + + /** + \brief Removes a client. + \param inClient Client to remove. + */ + virtual void removeClient( PxProfileEventBufferClient& inClient ) = 0; + + /** + \brief Check if manager has clients. + \return True if manager has added clients. + */ + virtual bool hasClients() const = 0; + }; + + /** + \brief Manager keep collections of PxProfileZoneClient clients. + + @see PxProfileZoneClient + */ + class PxProfileZoneClientManager + { + protected: + virtual ~PxProfileZoneClientManager(){} + public: + /** + \brief Adds new client. + \param inClient Client to add. + */ + virtual void addClient( PxProfileZoneClient& inClient ) = 0; + + /** + \brief Removes a client. + \param inClient Client to remove. + */ + virtual void removeClient( PxProfileZoneClient& inClient ) = 0; + + /** + \brief Check if manager has clients. + \return True if manager has added clients. + */ + virtual bool hasClients() const = 0; + }; +} } + +#endif // PXPVDSDK_PXPROFILEEVENTBUFFERCLIENTMANAGER_H diff --git a/PxShared/src/pvd/src/PxProfileEventFilter.h b/PxShared/src/pvd/src/PxProfileEventFilter.h new file mode 100644 index 0000000..0f38d65 --- /dev/null +++ b/PxShared/src/pvd/src/PxProfileEventFilter.h @@ -0,0 +1,93 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PXPVDSDK_PXPROFILEEVENTFILTER_H +#define PXPVDSDK_PXPROFILEEVENTFILTER_H + +#include "foundation/PxAssert.h" +#include "PxProfileBase.h" +#include "PxProfileEventId.h" + +namespace physx { namespace profile { + + /** + \brief Called upon every event to give a quick-out before adding the event + to the event buffer. + + \note: not thread safe, can be called from different threads at the same time + */ + class PxProfileEventFilter + { + protected: + virtual ~PxProfileEventFilter(){} + public: + /** + \brief Disabled events will not go into the event buffer and will not be + transmitted to clients. + \param inId Profile event id. + \param isEnabled True if event should be enabled. + */ + virtual void setEventEnabled( const PxProfileEventId& inId, bool isEnabled ) = 0; + + /** + \brief Returns the current state of the profile event. + \return True if profile event is enabled. + */ + virtual bool isEventEnabled( const PxProfileEventId& inId ) const = 0; + }; + + /** + \brief Forwards the filter requests to another event filter. + */ + template<typename TFilterType> + struct PxProfileEventFilterForward + { + /** + \brief Default constructor. + */ + PxProfileEventFilterForward( TFilterType* inFilter ) : filter( inFilter ) {} + + /** + \brief Disabled events will not go into the event buffer and will not be + transmitted to clients. + \param inId Profile event id. + \param isEnabled True if event should be enabled. + */ + void setEventEnabled( const PxProfileEventId& inId, bool isEnabled ) { filter->setEventEnabled( inId, isEnabled ); } + + /** + \brief Returns the current state of the profile event. + \return True if profile event is enabled. + */ + bool isEventEnabled( const PxProfileEventId& inId ) const { return filter->isEventEnabled( inId ); } + + TFilterType* filter; + }; + +} } + +#endif // PXPVDSDK_PXPROFILEEVENTFILTER_H diff --git a/PxShared/src/pvd/src/PxProfileEventHandler.h b/PxShared/src/pvd/src/PxProfileEventHandler.h new file mode 100644 index 0000000..0e0f0f9 --- /dev/null +++ b/PxShared/src/pvd/src/PxProfileEventHandler.h @@ -0,0 +1,99 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PXPVDSDK_PXPROFILEEVENTHANDLER_H +#define PXPVDSDK_PXPROFILEEVENTHANDLER_H + +#include "PxProfileBase.h" +#include "PxProfileEventId.h" + +namespace physx { namespace profile { + + /** + \brief A client of the event system can expect to find these events in the event buffer. + */ + class PxProfileEventHandler + { + protected: + virtual ~PxProfileEventHandler(){} + public: + /** + \brief Event start - onStartEvent. + + \param[in] inId Profile event id. + \param[in] threadId Thread id. + \param[in] contextId Context id. + \param[in] cpuId CPU id. + \param[in] threadPriority Thread priority. + \param[in] timestamp Timestamp in cycles. + */ + virtual void onStartEvent( const PxProfileEventId& inId, uint32_t threadId, uint64_t contextId, uint8_t cpuId, uint8_t threadPriority, uint64_t timestamp ) = 0; + + /** + \brief Event stop - onStopEvent. + + \param[in] inId Profile event id. + \param[in] threadId Thread id. + \param[in] contextId Context id. + \param[in] cpuId CPU id. + \param[in] threadPriority Thread priority. + \param[in] timestamp Timestamp in cycles. + */ + virtual void onStopEvent( const PxProfileEventId& inId, uint32_t threadId, uint64_t contextId, uint8_t cpuId, uint8_t threadPriority, uint64_t timestamp ) = 0; + + /** + \brief Event value - onEventValue. + + \param[in] inId Profile event id. + \param[in] threadId Thread id. + \param[in] contextId Context id. + \param[in] inValue Value. + */ + virtual void onEventValue( const PxProfileEventId& inId, uint32_t threadId, uint64_t contextId, int64_t inValue ) = 0; + + /** + \brief Parse the flushed profile buffer which contains the profile events. + + \param[in] inBuffer The profile buffer with profile events. + \param[in] inBufferSize Buffer size. + \param[in] inHandler The profile event callback to receive the parsed events. + \param[in] inSwapBytes Swap bytes possibility. + */ + static void parseEventBuffer( const uint8_t* inBuffer, uint32_t inBufferSize, PxProfileEventHandler& inHandler, bool inSwapBytes ); + + /** + \brief Translates event duration in timestamp (cycles) into nanoseconds. + + \param[in] duration Timestamp duration of the event. + + \return event duration in nanoseconds. + */ + static uint64_t durationToNanoseconds(uint64_t duration); + }; +} } + +#endif // PXPVDSDK_PXPROFILEEVENTHANDLER_H diff --git a/PxShared/src/pvd/src/PxProfileEventId.h b/PxShared/src/pvd/src/PxProfileEventId.h new file mode 100644 index 0000000..dd98cd5 --- /dev/null +++ b/PxShared/src/pvd/src/PxProfileEventId.h @@ -0,0 +1,80 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PXPVDSDK_PXPROFILEEVENTID_H +#define PXPVDSDK_PXPROFILEEVENTID_H + +#include "PxProfileBase.h" + +namespace physx { namespace profile { + /** + \brief A event id structure. Optionally includes information about + if the event was enabled at compile time. + */ + struct PxProfileEventId + { + uint16_t eventId; + mutable bool compileTimeEnabled; + + /** + \brief Profile event id constructor. + \param inId Profile event id. + \param inCompileTimeEnabled Compile time enabled. + */ + PxProfileEventId( uint16_t inId = 0, bool inCompileTimeEnabled = true ) + : eventId( inId ) + , compileTimeEnabled( inCompileTimeEnabled ) + { + } + + operator uint16_t () const { return eventId; } + + bool operator==( const PxProfileEventId& inOther ) const + { + return eventId == inOther.eventId; + } + }; + + /** + \brief Template event id structure. + */ + template<bool TEnabled> + struct PxProfileCompileTimeFilteredEventId : public PxProfileEventId + { + /** + \brief Constructor. + \param inId Profile event id. + */ + PxProfileCompileTimeFilteredEventId( uint16_t inId = 0 ) + : PxProfileEventId( inId, TEnabled ) + { + } + }; + +} } + +#endif // PXPVDSDK_PXPROFILEEVENTID_H diff --git a/PxShared/src/pvd/src/PxProfileEventImpl.cpp b/PxShared/src/pvd/src/PxProfileEventImpl.cpp new file mode 100644 index 0000000..a519f92 --- /dev/null +++ b/PxShared/src/pvd/src/PxProfileEventImpl.cpp @@ -0,0 +1,230 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#include "foundation/PxErrorCallback.h" +#include "foundation/PxAllocatorCallback.h" + +#include "PxProfileEvents.h" +#include "PxProfileEventSerialization.h" +#include "PxProfileEventBuffer.h" +#include "PxProfileZoneImpl.h" +#include "PxProfileZoneManagerImpl.h" +#include "PxProfileEventParser.h" +#include "PxProfileEventHandler.h" +#include "PxProfileScopedMutexLock.h" +#include "PxProfileEventFilter.h" +#include "PxProfileContextProvider.h" +#include "PxProfileEventMutex.h" +#include "PxProfileMemoryEventTypes.h" +#include "PxProfileMemoryEventRecorder.h" +#include "PxProfileMemoryEventBuffer.h" +#include "PxProfileMemoryEventParser.h" +#include "PxProfileContextProviderImpl.h" + +#include "PsUserAllocated.h" +#include "PsTime.h" + +#include <stdio.h> + +namespace physx { namespace profile { + + + uint64_t PxProfileEventHandler::durationToNanoseconds(uint64_t duration) + { + return shdfnd::Time::getBootCounterFrequency().toTensOfNanos(duration) * 10; + } + + void PxProfileEventHandler::parseEventBuffer( const uint8_t* inBuffer, uint32_t inBufferSize, PxProfileEventHandler& inHandler, bool inSwapBytes ) + { + if ( inSwapBytes == false ) + parseEventData<false>( inBuffer, inBufferSize, &inHandler ); + else + parseEventData<true>( inBuffer, inBufferSize, &inHandler ); + } + + template<uint32_t TNumEvents> + struct ProfileBulkEventHandlerBuffer + { + Event mEvents[TNumEvents]; + uint32_t mEventCount; + PxProfileBulkEventHandler* mHandler; + ProfileBulkEventHandlerBuffer( PxProfileBulkEventHandler* inHdl ) + : mEventCount( 0 ) + , mHandler( inHdl ) + { + } + void onEvent( const Event& inEvent ) + { + mEvents[mEventCount] = inEvent; + ++mEventCount; + if ( mEventCount == TNumEvents ) + flush(); + } + void onEvent( const PxProfileEventId& inId, uint32_t threadId, uint64_t contextId, uint8_t cpuId, uint8_t threadPriority, uint64_t timestamp, EventTypes::Enum inType ) + { + StartEvent theEvent; + theEvent.init( threadId, contextId, cpuId, static_cast<uint8_t>( threadPriority ), timestamp ); + onEvent( Event( EventHeader( static_cast<uint8_t>( inType ), inId.eventId ), theEvent ) ); + } + void onStartEvent( const PxProfileEventId& inId, uint32_t threadId, uint64_t contextId, uint8_t cpuId, uint8_t threadPriority, uint64_t timestamp ) + { + onEvent( inId, threadId, contextId, cpuId, threadPriority, timestamp, EventTypes::StartEvent ); + } + void onStopEvent( const PxProfileEventId& inId, uint32_t threadId, uint64_t contextId, uint8_t cpuId, uint8_t threadPriority, uint64_t timestamp ) + { + onEvent( inId, threadId, contextId, cpuId, threadPriority, timestamp, EventTypes::StopEvent ); + } + void onEventValue( const PxProfileEventId& inId, uint32_t threadId, uint64_t contextId, int64_t value ) + { + EventValue theEvent; + theEvent.init( value, contextId, threadId ); + onEvent( Event( inId.eventId, theEvent ) ); + } + void flush() + { + if ( mEventCount ) + mHandler->handleEvents( mEvents, mEventCount ); + mEventCount = 0; + } + }; + + + void PxProfileBulkEventHandler::parseEventBuffer( const uint8_t* inBuffer, uint32_t inBufferSize, PxProfileBulkEventHandler& inHandler, bool inSwapBytes ) + { + ProfileBulkEventHandlerBuffer<256> hdler( &inHandler ); + if ( inSwapBytes ) + parseEventData<true>( inBuffer, inBufferSize, &hdler ); + else + parseEventData<false>( inBuffer, inBufferSize, &hdler ); + hdler.flush(); + } + + struct PxProfileNameProviderImpl + { + PxProfileNameProvider* mImpl; + PxProfileNameProviderImpl( PxProfileNameProvider* inImpl ) + : mImpl( inImpl ) + { + } + PxProfileNames getProfileNames() const { return mImpl->getProfileNames(); } + }; + + + struct PxProfileNameProviderForward + { + PxProfileNames mNames; + PxProfileNameProviderForward( PxProfileNames inNames ) + : mNames( inNames ) + { + } + PxProfileNames getProfileNames() const { return mNames; } + }; + + + PX_FOUNDATION_API PxProfileZone& PxProfileZone::createProfileZone( PxAllocatorCallback* inAllocator, const char* inSDKName, PxProfileNames inNames, uint32_t inEventBufferByteSize ) + { + typedef ZoneImpl<PxProfileNameProviderForward> TSDKType; + return *PX_PROFILE_NEW( inAllocator, TSDKType ) ( inAllocator, inSDKName, inEventBufferByteSize, PxProfileNameProviderForward( inNames ) ); + } + + PxProfileZoneManager& PxProfileZoneManager::createProfileZoneManager(PxAllocatorCallback* inAllocator ) + { + return *PX_PROFILE_NEW( inAllocator, ZoneManagerImpl ) ( inAllocator ); + } + + PxProfileMemoryEventRecorder& PxProfileMemoryEventRecorder::createRecorder( PxAllocatorCallback* inAllocator ) + { + return *PX_PROFILE_NEW( inAllocator, PxProfileMemoryEventRecorderImpl )( inAllocator ); + } + + PxProfileMemoryEventBuffer& PxProfileMemoryEventBuffer::createMemoryEventBuffer( PxAllocatorCallback& inAllocator, uint32_t inBufferSize ) + { + return *PX_PROFILE_NEW( &inAllocator, PxProfileMemoryEventBufferImpl )( inAllocator, inBufferSize ); + } + template<uint32_t TNumEvents> + struct ProfileBulkMemoryEventHandlerBuffer + { + PxProfileBulkMemoryEvent mEvents[TNumEvents]; + uint32_t mEventCount; + PxProfileBulkMemoryEventHandler* mHandler; + ProfileBulkMemoryEventHandlerBuffer( PxProfileBulkMemoryEventHandler* inHdl ) + : mEventCount( 0 ) + , mHandler( inHdl ) + { + } + void onEvent( const PxProfileBulkMemoryEvent& evt ) + { + mEvents[mEventCount] = evt; + ++mEventCount; + if ( mEventCount == TNumEvents ) + flush(); + } + + template<typename TDataType> + void operator()( const MemoryEventHeader&, const TDataType& ) {} + + void operator()( const MemoryEventHeader&, const AllocationEvent& evt ) + { + onEvent( PxProfileBulkMemoryEvent( evt.mSize, evt.mType, evt.mFile, evt.mLine, evt.mAddress ) ); + } + + void operator()( const MemoryEventHeader&, const DeallocationEvent& evt ) + { + onEvent( PxProfileBulkMemoryEvent( evt.mAddress ) ); + } + + void flush() + { + if ( mEventCount ) + mHandler->handleEvents( mEvents, mEventCount ); + mEventCount = 0; + } + }; + + void PxProfileBulkMemoryEventHandler::parseEventBuffer( const uint8_t* inBuffer, uint32_t inBufferSize, PxProfileBulkMemoryEventHandler& inHandler, bool inSwapBytes, PxAllocatorCallback* inAlloc ) + { + PX_ASSERT(inAlloc); + + ProfileBulkMemoryEventHandlerBuffer<0x1000>* theBuffer = PX_PROFILE_NEW(inAlloc, ProfileBulkMemoryEventHandlerBuffer<0x1000>)(&inHandler); + + if ( inSwapBytes ) + { + MemoryEventParser<true> theParser( *inAlloc ); + theParser.parseEventData( inBuffer, inBufferSize, theBuffer ); + } + else + { + MemoryEventParser<false> theParser( *inAlloc ); + theParser.parseEventData( inBuffer, inBufferSize, theBuffer ); + } + theBuffer->flush(); + + PX_PROFILE_DELETE(*inAlloc, theBuffer); + } + +} } + diff --git a/PxShared/src/pvd/src/PxProfileEventMutex.h b/PxShared/src/pvd/src/PxProfileEventMutex.h new file mode 100644 index 0000000..5ec837b --- /dev/null +++ b/PxShared/src/pvd/src/PxProfileEventMutex.h @@ -0,0 +1,63 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + + +#ifndef PXPVDSDK_PXPROFILEEVENTMUTEX_H +#define PXPVDSDK_PXPROFILEEVENTMUTEX_H + +#include "PxProfileBase.h" + +namespace physx { namespace profile { + + /** + * Mutex interface that hides implementation around lock and unlock. + * The event system locks the mutex for every interaction. + */ + class PxProfileEventMutex + { + protected: + virtual ~PxProfileEventMutex(){} + public: + virtual void lock() = 0; + virtual void unlock() = 0; + }; + + /** + * Take any mutex type that implements lock and unlock and make an EventMutex out of it. + */ + template<typename TMutexType> + struct PxProfileEventMutexImpl : public PxProfileEventMutex + { + TMutexType* mMutex; + PxProfileEventMutexImpl( TMutexType* inMtx ) : mMutex( inMtx ) {} + virtual void lock() { mMutex->lock(); } + virtual void unlock() { mMutex->unlock(); } + }; + +} } + +#endif // PXPVDSDK_PXPROFILEEVENTMUTEX_H diff --git a/PxShared/src/pvd/src/PxProfileEventNames.h b/PxShared/src/pvd/src/PxProfileEventNames.h new file mode 100644 index 0000000..831da20 --- /dev/null +++ b/PxShared/src/pvd/src/PxProfileEventNames.h @@ -0,0 +1,90 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PXPVDSDK_PXPROFILEEVENTNAMES_H +#define PXPVDSDK_PXPROFILEEVENTNAMES_H + +#include "PxProfileBase.h" +#include "PxProfileEventId.h" + +namespace physx { namespace profile { + + /** + \brief Mapping from event id to name. + */ + struct PxProfileEventName + { + const char* name; + PxProfileEventId eventId; + + /** + \brief Default constructor. + \param inName Profile event name. + \param inId Profile event id. + */ + PxProfileEventName( const char* inName, PxProfileEventId inId ) : name( inName ), eventId( inId ) {} + }; + + /** + \brief Aggregator of event id -> name mappings + */ + struct PxProfileNames + { + /** + \brief Default constructor that doesn't point to any names. + \param inEventCount Number of provided events. + \param inSubsystems Event names array. + */ + PxProfileNames( uint32_t inEventCount = 0, const PxProfileEventName* inSubsystems = NULL ) + : eventCount( inEventCount ) + , events( inSubsystems ) + { + } + + uint32_t eventCount; + const PxProfileEventName* events; + }; + + /** + \brief Provides a mapping from event ID -> name. + */ + class PxProfileNameProvider + { + public: + /** + \brief Returns profile event names. + \return Profile event names. + */ + virtual PxProfileNames getProfileNames() const = 0; + + protected: + virtual ~PxProfileNameProvider(){} + PxProfileNameProvider& operator=(const PxProfileNameProvider&) { return *this; } + }; +} } + +#endif // PXPVDSDK_PXPROFILEEVENTNAMES_H diff --git a/PxShared/src/pvd/src/PxProfileEventParser.h b/PxShared/src/pvd/src/PxProfileEventParser.h new file mode 100644 index 0000000..60aebdc --- /dev/null +++ b/PxShared/src/pvd/src/PxProfileEventParser.h @@ -0,0 +1,193 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef PXPVDSDK_PXPROFILEEVENTPARSER_H +#define PXPVDSDK_PXPROFILEEVENTPARSER_H + +#include "PxProfileEvents.h" +#include "PxProfileEventSerialization.h" + +namespace physx { namespace profile { + + struct EventParserData + { + EventContextInformation mContextInformation; + uint64_t mLastTimestamp; + + EventParserData() : mLastTimestamp(0) + { + } + }; + + //This object will be copied a lot so all of its important + //data must be pointers. + template<typename THandlerType, bool TSwapBytes> + struct EventParseOperator + { + typedef EventDeserializer<TSwapBytes> TDeserializerType; + + EventParserData* mData; + TDeserializerType* mDeserializer; + EventHeader* mHeader; + THandlerType* mHandler; + + EventParseOperator( EventParserData* inData, TDeserializerType* inDeserializer, EventHeader* inHeader, THandlerType* inHandler ) + : mData( inData ) + , mDeserializer( inDeserializer ) + , mHeader( inHeader ) + , mHandler( inHandler ) + {} + + template<typename TEventType> + bool parse( TEventType& ioEvent ) + { + ioEvent.streamify( *mDeserializer, *mHeader ); + bool success = mDeserializer->mFail == false; + PX_ASSERT( success ); + return success; + } + + bool parseHeader( EventHeader& ioEvent ) + { + ioEvent.streamify( *mDeserializer ); + bool success = mDeserializer->mFail == false; + PX_ASSERT( success ); + return success; + } + + template<typename TEventType> + bool handleProfileEvent( TEventType& evt ) + { + bool retval = parse( evt ); + if ( retval ) + { + mData->mContextInformation = evt.mContextInformation; + handle( evt.getRelativeEvent(), evt.mContextInformation ); + } + return retval; + } + + template<typename TEventType> + bool handleRelativeProfileEvent( TEventType& evt ) + { + bool retval = parse( evt ); + if ( retval ) + handle( evt, mData->mContextInformation ); + return retval; + } + + template<typename TRelativeEventType> + void handle( const TRelativeEventType& evt, const EventContextInformation& inInfo ) + { + mData->mLastTimestamp = mHeader->uncompressTimestamp( mData->mLastTimestamp, evt.getTimestamp()); + const_cast<TRelativeEventType&>(evt).setTimestamp( mData->mLastTimestamp ); + evt.handle( mHandler, mHeader->mEventId + , inInfo.mThreadId + , inInfo.mContextId + , inInfo.mCpuId + , inInfo.mThreadPriority ); + } + + bool operator()( const StartEvent& ) + { + StartEvent evt; + return handleProfileEvent( evt ); + } + bool operator()( const StopEvent& ) + { + StopEvent evt; + return handleProfileEvent( evt ); + } + bool operator()( const RelativeStartEvent& ) + { + RelativeStartEvent evt; + return handleRelativeProfileEvent( evt ); + + } + bool operator()( const RelativeStopEvent& ) + { + RelativeStopEvent evt; + return handleRelativeProfileEvent( evt ); + } + bool operator()( const EventValue& ) + { + EventValue evt; + bool retval = parse( evt ); + if ( retval ) + { + evt.mValue = mHeader->uncompressTimestamp( 0, evt.mValue ); + evt.handle( mHandler, mHeader->mEventId ); + } + return retval; + } + + //obsolete, placeholder to skip data from PhysX SDKs < 3.4 + bool operator()( const CUDAProfileBuffer& ) + { + CUDAProfileBuffer evt; + bool retval = parse( evt ); + return retval; + } + + //Unknown event type. + bool operator()(uint8_t ) + { + PX_ASSERT( false ); + return false; + } + }; + + template<bool TSwapBytes, typename THandlerType> + inline bool parseEventData( const uint8_t* inData, uint32_t inLength, THandlerType* inHandler ) + { + EventDeserializer<TSwapBytes> deserializer( inData, inLength ); + Event::EventData crapData; + EventHeader theHeader; + EventParserData theData; + EventParseOperator<THandlerType,TSwapBytes> theOp( &theData, &deserializer, &theHeader, inHandler ); + while( deserializer.mLength && deserializer.mFail == false) + { + if ( theOp.parseHeader( theHeader ) ) + visit<bool>( static_cast<EventTypes::Enum>( theHeader.mEventType ), crapData, theOp ); + } + return deserializer.mFail == false; + } + + class PxProfileBulkEventHandler + { + protected: + virtual ~PxProfileBulkEventHandler(){} + public: + virtual void handleEvents(const physx::profile::Event* inEvents, uint32_t inBufferSize) = 0; + static void parseEventBuffer(const uint8_t* inBuffer, uint32_t inBufferSize, PxProfileBulkEventHandler& inHandler, bool inSwapBytes); + }; +}} + +#endif // PXPVDSDK_PXPROFILEEVENTPARSER_H diff --git a/PxShared/src/pvd/src/PxProfileEventSender.h b/PxShared/src/pvd/src/PxProfileEventSender.h new file mode 100644 index 0000000..3c1cf5e --- /dev/null +++ b/PxShared/src/pvd/src/PxProfileEventSender.h @@ -0,0 +1,129 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PXPVDSDK_PXPROFILEEVENTSENDER_H +#define PXPVDSDK_PXPROFILEEVENTSENDER_H + +#include "PxProfileBase.h" + +namespace physx { namespace profile { + + /** + \brief Tagging interface to indicate an object that is capable of flushing a profile + event stream at a certain point. + */ + class PxProfileEventFlusher + { + protected: + virtual ~PxProfileEventFlusher(){} + public: + /** + \brief Flush profile events. Sends the profile event buffer to hooked clients. + */ + virtual void flushProfileEvents() = 0; + }; + + /** + \brief Sends the full events where the caller must provide the context and thread id. + */ + class PxProfileEventSender + { + protected: + virtual ~PxProfileEventSender(){} + public: + + /** + \brief Use this as a thread id for events that start on one thread and end on another + */ + static const uint32_t CrossThreadId = 99999789; + + /** + \brief Send a start profile event, optionally with a context. Events are sorted by thread + and context in the client side. + \param inId Profile event id. + \param contextId Context id. + */ + virtual void startEvent( uint16_t inId, uint64_t contextId) = 0; + /** + \brief Send a stop profile event, optionally with a context. Events are sorted by thread + and context in the client side. + \param inId Profile event id. + \param contextId Context id. + */ + virtual void stopEvent( uint16_t inId, uint64_t contextId) = 0; + + /** + \brief Send a start profile event, optionally with a context. Events are sorted by thread + and context in the client side. + \param inId Profile event id. + \param contextId Context id. + \param threadId Thread id. + */ + virtual void startEvent( uint16_t inId, uint64_t contextId, uint32_t threadId) = 0; + /** + \brief Send a stop profile event, optionally with a context. Events are sorted by thread + and context in the client side. + \param inId Profile event id. + \param contextId Context id. + \param threadId Thread id. + */ + virtual void stopEvent( uint16_t inId, uint64_t contextId, uint32_t threadId ) = 0; + + virtual void atEvent(uint16_t inId, uint64_t contextId, uint32_t threadId, uint64_t start, uint64_t stop) = 0; + + /** + \brief Set an specific events value. This is different than the profiling value + for the event; it is a value recorded and kept around without a timestamp associated + with it. This value is displayed when the event itself is processed. + \param inId Profile event id. + \param contextId Context id. + \param inValue Value to set for the event. + */ + virtual void eventValue( uint16_t inId, uint64_t contextId, int64_t inValue ) = 0; + }; + + /** + \brief Tagging interface to indicate an object that may or may not return + an object capable of adding profile events to a buffer. + */ + class PxProfileEventSenderProvider + { + protected: + virtual ~PxProfileEventSenderProvider(){} + public: + /** + \brief This method can *always* return NULL. + Thus need to always check that what you are getting is what + you think it is. + + \return Perhaps a profile event sender. + */ + virtual PxProfileEventSender* getProfileEventSender() = 0; + }; +} } + +#endif // PXPVDSDK_PXPROFILEEVENTSENDER_H diff --git a/PxShared/src/pvd/src/PxProfileEventSerialization.h b/PxShared/src/pvd/src/PxProfileEventSerialization.h new file mode 100644 index 0000000..07c0563 --- /dev/null +++ b/PxShared/src/pvd/src/PxProfileEventSerialization.h @@ -0,0 +1,258 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef PXPVDSDK_PXPROFILEEVENTSERIALIZATION_H +#define PXPVDSDK_PXPROFILEEVENTSERIALIZATION_H + +#include "PxProfileBase.h" +#include "PxProfileDataParsing.h" +#include "PxProfileEvents.h" + +namespace physx { namespace profile { + + /** + * Array type must be a pxu8 container. Templated so that this object can write + * to different collections. + */ + + template<typename TArrayType> + struct EventSerializer + { + TArrayType* mArray; + EventSerializer( TArrayType* inA ) : mArray( inA ) {} + + template<typename TDataType> + uint32_t streamify( const char*, const TDataType& inType ) + { + return mArray->write( inType ); + } + + uint32_t streamify( const char*, const char*& inType ) + { + PX_ASSERT( inType != NULL ); + uint32_t len( static_cast<uint32_t>( strlen( inType ) ) ); + ++len; //include the null terminator + uint32_t writtenSize = 0; + writtenSize = mArray->write(len); + writtenSize += mArray->write(inType, len); + return writtenSize; + } + + uint32_t streamify( const char*, const uint8_t* inData, uint32_t len ) + { + uint32_t writtenSize = mArray->write(len); + if ( len ) + writtenSize += mArray->write(inData, len); + return writtenSize; + } + + uint32_t streamify( const char* nm, const uint64_t& inType, EventStreamCompressionFlags::Enum inFlags ) + { + uint32_t writtenSize = 0; + switch( inFlags ) + { + case EventStreamCompressionFlags::U8: + writtenSize = streamify(nm, static_cast<uint8_t>(inType)); + break; + case EventStreamCompressionFlags::U16: + writtenSize = streamify(nm, static_cast<uint16_t>(inType)); + break; + case EventStreamCompressionFlags::U32: + writtenSize = streamify(nm, static_cast<uint32_t>(inType)); + break; + case EventStreamCompressionFlags::U64: + writtenSize = streamify(nm, inType); + break; + } + return writtenSize; + } + + uint32_t streamify( const char* nm, const uint32_t& inType, EventStreamCompressionFlags::Enum inFlags ) + { + uint32_t writtenSize = 0; + switch( inFlags ) + { + case EventStreamCompressionFlags::U8: + writtenSize = streamify(nm, static_cast<uint8_t>(inType)); + break; + case EventStreamCompressionFlags::U16: + writtenSize = streamify(nm, static_cast<uint16_t>(inType)); + break; + case EventStreamCompressionFlags::U32: + case EventStreamCompressionFlags::U64: + writtenSize = streamify(nm, inType); + break; + } + return writtenSize; + } + }; + + /** + * The event deserializes takes a buffer implements the streamify functions + * by setting the passed in data to the data in the buffer. + */ + template<bool TSwapBytes> + struct EventDeserializer + { + const uint8_t* mData; + uint32_t mLength; + bool mFail; + + EventDeserializer( const uint8_t* inData, uint32_t inLength ) + : mData( inData ) + , mLength( inLength ) + , mFail( false ) + { + if ( mData == NULL ) + mLength = 0; + } + + bool val() { return TSwapBytes; } + + uint32_t streamify( const char* , uint8_t& inType ) + { + uint8_t* theData = reinterpret_cast<uint8_t*>( &inType ); //type punned pointer... + if ( mFail || sizeof( inType ) > mLength ) + { + PX_ASSERT( false ); + mFail = true; + } + else + { + for( uint32_t idx = 0; idx < sizeof( uint8_t ); ++idx, ++mData, --mLength ) + theData[idx] = *mData; + } + return 0; + } + + //default streamify reads things natively as bytes. + template<typename TDataType> + uint32_t streamify( const char* , TDataType& inType ) + { + uint8_t* theData = reinterpret_cast<uint8_t*>( &inType ); //type punned pointer... + if ( mFail || sizeof( inType ) > mLength ) + { + PX_ASSERT( false ); + mFail = true; + } + else + { + for( uint32_t idx = 0; idx < sizeof( TDataType ); ++idx, ++mData, --mLength ) + theData[idx] = *mData; + bool temp = val(); + if ( temp ) + BlockParseFunctions::swapBytes<sizeof(TDataType)>( theData ); + } + return 0; + } + + uint32_t streamify( const char*, const char*& inType ) + { + uint32_t theLen; + streamify( "", theLen ); + theLen = PxMin( theLen, mLength ); + inType = reinterpret_cast<const char*>( mData ); + mData += theLen; + mLength -= theLen; + return 0; + } + + uint32_t streamify( const char*, const uint8_t*& inData, uint32_t& len ) + { + uint32_t theLen; + streamify( "", theLen ); + theLen = PxMin( theLen, mLength ); + len = theLen; + inData = reinterpret_cast<const uint8_t*>( mData ); + mData += theLen; + mLength -= theLen; + return 0; + } + + uint32_t streamify( const char* nm, uint64_t& inType, EventStreamCompressionFlags::Enum inFlags ) + { + switch( inFlags ) + { + case EventStreamCompressionFlags::U8: + { + uint8_t val=0; + streamify( nm, val ); + inType = val; + } + break; + case EventStreamCompressionFlags::U16: + { + uint16_t val; + streamify( nm, val ); + inType = val; + } + break; + case EventStreamCompressionFlags::U32: + { + uint32_t val; + streamify( nm, val ); + inType = val; + } + break; + case EventStreamCompressionFlags::U64: + streamify( nm, inType ); + break; + } + return 0; + } + + uint32_t streamify( const char* nm, uint32_t& inType, EventStreamCompressionFlags::Enum inFlags ) + { + switch( inFlags ) + { + case EventStreamCompressionFlags::U8: + { + uint8_t val=0; + streamify( nm, val ); + inType = val; + } + break; + case EventStreamCompressionFlags::U16: + { + uint16_t val=0; + streamify( nm, val ); + inType = val; + } + break; + case EventStreamCompressionFlags::U32: + case EventStreamCompressionFlags::U64: + streamify( nm, inType ); + break; + } + return 0; + } + }; +}} +#endif // PXPVDSDK_PXPROFILEEVENTSERIALIZATION_H diff --git a/PxShared/src/pvd/src/PxProfileEventSystem.h b/PxShared/src/pvd/src/PxProfileEventSystem.h new file mode 100644 index 0000000..7411824 --- /dev/null +++ b/PxShared/src/pvd/src/PxProfileEventSystem.h @@ -0,0 +1,56 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PXPVDSDK_PXPROFILEEVENTSYSTEM_H +#define PXPVDSDK_PXPROFILEEVENTSYSTEM_H + +#include "PxProfileBase.h" +#include "PxProfileEventSender.h" +#include "PxProfileEventBufferClient.h" +#include "PxProfileEventBufferClientManager.h" + +namespace physx { namespace profile { + class PxProfileContextProvider; + class PxProfileEventMutex; + class PxProfileEventFilter; + + /** + * Wraps the different interfaces into one object. + */ + class PxProfileEventSystem : public PxProfileEventSender + , public PxProfileEventBufferClient + , public PxProfileEventBufferClientManager + , public PxProfileEventFlusher + { + protected: + ~PxProfileEventSystem(){} + public: + virtual void release() = 0; + }; +} } + +#endif // PXPVDSDK_PXPROFILEEVENTSYSTEM_H diff --git a/PxShared/src/pvd/src/PxProfileEvents.h b/PxShared/src/pvd/src/PxProfileEvents.h new file mode 100644 index 0000000..25f09d7 --- /dev/null +++ b/PxShared/src/pvd/src/PxProfileEvents.h @@ -0,0 +1,706 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PXPVDSDK_PXPROFILEEVENTS_H +#define PXPVDSDK_PXPROFILEEVENTS_H + +#include "foundation/PxMath.h" +#include "foundation/PxAssert.h" + +#include "PxProfileBase.h" +#include "PxProfileEventId.h" + + +#define PX_PROFILE_UNION_1(a) physx::profile::TUnion<a, physx::profile::Empty> +#define PX_PROFILE_UNION_2(a,b) physx::profile::TUnion<a, PX_PROFILE_UNION_1(b)> +#define PX_PROFILE_UNION_3(a,b,c) physx::profile::TUnion<a, PX_PROFILE_UNION_2(b,c)> +#define PX_PROFILE_UNION_4(a,b,c,d) physx::profile::TUnion<a, PX_PROFILE_UNION_3(b,c,d)> +#define PX_PROFILE_UNION_5(a,b,c,d,e) physx::profile::TUnion<a, PX_PROFILE_UNION_4(b,c,d,e)> +#define PX_PROFILE_UNION_6(a,b,c,d,e,f) physx::profile::TUnion<a, PX_PROFILE_UNION_5(b,c,d,e,f)> +#define PX_PROFILE_UNION_7(a,b,c,d,e,f,g) physx::profile::TUnion<a, PX_PROFILE_UNION_6(b,c,d,e,f,g)> +#define PX_PROFILE_UNION_8(a,b,c,d,e,f,g,h) physx::profile::TUnion<a, PX_PROFILE_UNION_7(b,c,d,e,f,g,h)> +#define PX_PROFILE_UNION_9(a,b,c,d,e,f,g,h,i) physx::profile::TUnion<a, PX_PROFILE_UNION_8(b,c,d,e,f,g,h,i)> + +namespace physx { namespace profile { + + struct Empty {}; + + template <typename T> struct Type2Type {}; + + template <typename U, typename V> + union TUnion + { + typedef U Head; + typedef V Tail; + + Head head; + Tail tail; + + template <typename TDataType> + void init(const TDataType& inData) + { + toType(Type2Type<TDataType>()).init(inData); + } + + template <typename TDataType> + PX_FORCE_INLINE TDataType& toType(const Type2Type<TDataType>& outData) { return tail.toType(outData); } + + PX_FORCE_INLINE Head& toType(const Type2Type<Head>&) { return head; } + + template <typename TDataType> + PX_FORCE_INLINE const TDataType& toType(const Type2Type<TDataType>& outData) const { return tail.toType(outData); } + + PX_FORCE_INLINE const Head& toType(const Type2Type<Head>&) const { return head; } + }; + + struct EventTypes + { + enum Enum + { + Unknown = 0, + StartEvent, + StopEvent, + RelativeStartEvent, //reuses context,id from the earlier event. + RelativeStopEvent, //reuses context,id from the earlier event. + EventValue, + CUDAProfileBuffer //obsolete, placeholder to skip data from PhysX SDKs < 3.4 + }; + }; + + struct EventStreamCompressionFlags + { + enum Enum + { + U8 = 0, + U16 = 1, + U32 = 2, + U64 = 3, + CompressionMask = 3 + }; + }; + +#if (PX_PS4) || (PX_APPLE_FAMILY) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wimplicit-fallthrough" +#endif + + //Find the smallest value that will represent the incoming value without loss. + //We can enlarge the current compression value, but we can't make is smaller. + //In this way, we can use this function to find the smallest compression setting + //that will work for a set of values. + inline EventStreamCompressionFlags::Enum findCompressionValue( uint64_t inValue, EventStreamCompressionFlags::Enum inCurrentCompressionValue = EventStreamCompressionFlags::U8 ) + { + PX_ASSERT_WITH_MESSAGE( (inCurrentCompressionValue >= EventStreamCompressionFlags::U8) && + (inCurrentCompressionValue <= EventStreamCompressionFlags::U64), + "Invalid inCurrentCompressionValue in profile::findCompressionValue"); + + //Fallthrough is intentional + switch( inCurrentCompressionValue ) + { + case EventStreamCompressionFlags::U8: + if ( inValue <= UINT8_MAX ) + return EventStreamCompressionFlags::U8; + case EventStreamCompressionFlags::U16: + if ( inValue <= UINT16_MAX ) + return EventStreamCompressionFlags::U16; + case EventStreamCompressionFlags::U32: + if ( inValue <= UINT32_MAX ) + return EventStreamCompressionFlags::U32; + case EventStreamCompressionFlags::U64: + break; + } + return EventStreamCompressionFlags::U64; + } + + //Find the smallest value that will represent the incoming value without loss. + //We can enlarge the current compression value, but we can't make is smaller. + //In this way, we can use this function to find the smallest compression setting + //that will work for a set of values. + inline EventStreamCompressionFlags::Enum findCompressionValue( uint32_t inValue, EventStreamCompressionFlags::Enum inCurrentCompressionValue = EventStreamCompressionFlags::U8 ) + { + PX_ASSERT_WITH_MESSAGE( (inCurrentCompressionValue >= EventStreamCompressionFlags::U8) && + (inCurrentCompressionValue <= EventStreamCompressionFlags::U64), + "Invalid inCurrentCompressionValue in profile::findCompressionValue"); + + //Fallthrough is intentional + switch( inCurrentCompressionValue ) + { + case EventStreamCompressionFlags::U8: + if ( inValue <= UINT8_MAX ) + return EventStreamCompressionFlags::U8; + case EventStreamCompressionFlags::U16: + if ( inValue <= UINT16_MAX ) + return EventStreamCompressionFlags::U16; + case EventStreamCompressionFlags::U32: + case EventStreamCompressionFlags::U64: + break; + } + return EventStreamCompressionFlags::U32; + } + +#if (PX_PS4) || (PX_APPLE_FAMILY) +#pragma clang diagnostic pop +#endif + + //Event header is 32 bytes and precedes all events. + struct EventHeader + { + uint8_t mEventType; //Used to parse the correct event out of the stream + uint8_t mStreamOptions; //Timestamp compression, etc. + uint16_t mEventId; //16 bit per-event-system event id + EventHeader( uint8_t type = 0, uint16_t id = 0 ) + : mEventType( type ) + , mStreamOptions( uint8_t(-1) ) + , mEventId( id ) + { + } + + EventHeader( EventTypes::Enum type, uint16_t id ) + : mEventType( static_cast<uint8_t>( type ) ) + , mStreamOptions( uint8_t(-1) ) + , mEventId( id ) + { + } + + EventStreamCompressionFlags::Enum getTimestampCompressionFlags() const + { + return static_cast<EventStreamCompressionFlags::Enum> ( mStreamOptions & EventStreamCompressionFlags::CompressionMask ); + } + + uint64_t compressTimestamp( uint64_t inLastTimestamp, uint64_t inCurrentTimestamp ) + { + mStreamOptions = EventStreamCompressionFlags::U64; + uint64_t retval = inCurrentTimestamp; + if ( inLastTimestamp ) + { + retval = inCurrentTimestamp - inLastTimestamp; + EventStreamCompressionFlags::Enum compressionValue = findCompressionValue( retval ); + mStreamOptions = static_cast<uint8_t>( compressionValue ); + if ( compressionValue == EventStreamCompressionFlags::U64 ) + retval = inCurrentTimestamp; //just send the timestamp as is. + } + return retval; + } + + uint64_t uncompressTimestamp( uint64_t inLastTimestamp, uint64_t inCurrentTimestamp ) const + { + if ( getTimestampCompressionFlags() != EventStreamCompressionFlags::U64 ) + return inLastTimestamp + inCurrentTimestamp; + return inCurrentTimestamp; + } + + void setContextIdCompressionFlags( uint64_t inContextId ) + { + uint8_t options = static_cast<uint8_t>( findCompressionValue( inContextId ) ); + mStreamOptions = uint8_t(mStreamOptions | options << 2); + } + + EventStreamCompressionFlags::Enum getContextIdCompressionFlags() const + { + return static_cast< EventStreamCompressionFlags::Enum >( ( mStreamOptions >> 2 ) & EventStreamCompressionFlags::CompressionMask ); + } + + bool operator==( const EventHeader& inOther ) const + { + return mEventType == inOther.mEventType + && mStreamOptions == inOther.mStreamOptions + && mEventId == inOther.mEventId; + } + + template<typename TStreamType> + inline uint32_t streamify( TStreamType& inStream ) + { + uint32_t writtenSize = inStream.streamify( "EventType", mEventType ); + writtenSize += inStream.streamify("StreamOptions", mStreamOptions); //Timestamp compression, etc. + writtenSize += inStream.streamify("EventId", mEventId); //16 bit per-event-system event id + return writtenSize; + } + + + }; + + //Declaration of type level getEventType function that maps enumeration event types to datatypes + template<typename TDataType> + inline EventTypes::Enum getEventType() { PX_ASSERT( false ); return EventTypes::Unknown; } + + //Relative profile event means this event is sharing the context and thread id + //with the event before it. + struct RelativeProfileEvent + { + uint64_t mTensOfNanoSeconds; //timestamp is in tensOfNanonseconds + void init( uint64_t inTs ) { mTensOfNanoSeconds = inTs; } + void init( const RelativeProfileEvent& inData ) { mTensOfNanoSeconds = inData.mTensOfNanoSeconds; } + bool operator==( const RelativeProfileEvent& other ) const + { + return mTensOfNanoSeconds == other.mTensOfNanoSeconds; + } + template<typename TStreamType> + uint32_t streamify( TStreamType& inStream, const EventHeader& inHeader ) + { + return inStream.streamify( "TensOfNanoSeconds", mTensOfNanoSeconds, inHeader.getTimestampCompressionFlags() ); + } + uint64_t getTimestamp() const { return mTensOfNanoSeconds; } + void setTimestamp( uint64_t inTs ) { mTensOfNanoSeconds = inTs; } + void setupHeader( EventHeader& inHeader, uint64_t inLastTimestamp ) + { + mTensOfNanoSeconds = inHeader.compressTimestamp( inLastTimestamp, mTensOfNanoSeconds ); + } + + uint32_t getEventSize(const EventHeader& inHeader) + { + uint32_t size = 0; + switch (inHeader.getTimestampCompressionFlags()) + { + case EventStreamCompressionFlags::U8: + size = 1; + break; + case EventStreamCompressionFlags::U16: + size = 2; + break; + case EventStreamCompressionFlags::U32: + size = 4; + break; + case EventStreamCompressionFlags::U64: + size = 8; + break; + } + return size; + } + }; + + //Start version of the relative event. + struct RelativeStartEvent : public RelativeProfileEvent + { + void init( uint64_t inTs = 0 ) { RelativeProfileEvent::init( inTs ); } + void init( const RelativeStartEvent& inData ) { RelativeProfileEvent::init( inData ); } + template<typename THandlerType> + void handle( THandlerType* inHdlr, uint16_t eventId, uint32_t thread, uint64_t context, uint8_t inCpuId, uint8_t threadPriority ) const + { + inHdlr->onStartEvent( PxProfileEventId( eventId ), thread, context, inCpuId, threadPriority, mTensOfNanoSeconds ); + } + }; + + template<> inline EventTypes::Enum getEventType<RelativeStartEvent>() { return EventTypes::RelativeStartEvent; } + + //Stop version of relative event. + struct RelativeStopEvent : public RelativeProfileEvent + { + void init( uint64_t inTs = 0 ) { RelativeProfileEvent::init( inTs ); } + void init( const RelativeStopEvent& inData ) { RelativeProfileEvent::init( inData ); } + template<typename THandlerType> + void handle( THandlerType* inHdlr, uint16_t eventId, uint32_t thread, uint64_t context, uint8_t inCpuId, uint8_t threadPriority ) const + { + inHdlr->onStopEvent( PxProfileEventId( eventId ), thread, context, inCpuId, threadPriority, mTensOfNanoSeconds ); + } + }; + + template<> inline EventTypes::Enum getEventType<RelativeStopEvent>() { return EventTypes::RelativeStopEvent; } + + struct EventContextInformation + { + uint64_t mContextId; + uint32_t mThreadId; //Thread this event was taken from + uint8_t mThreadPriority; + uint8_t mCpuId; + + void init( uint32_t inThreadId = UINT32_MAX + , uint64_t inContextId = (uint64_t(-1)) + , uint8_t inPriority = UINT8_MAX + , uint8_t inCpuId = UINT8_MAX ) + { + mContextId = inContextId; + mThreadId = inThreadId; + mThreadPriority = inPriority; + mCpuId = inCpuId; + } + + void init( const EventContextInformation& inData ) + { + mContextId = inData.mContextId; + mThreadId = inData.mThreadId; + mThreadPriority = inData.mThreadPriority; + mCpuId = inData.mCpuId; + } + + template<typename TStreamType> + uint32_t streamify( TStreamType& inStream, EventStreamCompressionFlags::Enum inContextIdFlags ) + { + uint32_t writtenSize = inStream.streamify( "ThreadId", mThreadId ); + writtenSize += inStream.streamify("ContextId", mContextId, inContextIdFlags); + writtenSize += inStream.streamify("ThreadPriority", mThreadPriority); + writtenSize += inStream.streamify("CpuId", mCpuId); + return writtenSize; + } + + bool operator==( const EventContextInformation& other ) const + { + return mThreadId == other.mThreadId + && mContextId == other.mContextId + && mThreadPriority == other.mThreadPriority + && mCpuId == other.mCpuId; + } + + void setToDefault() + { + *this = EventContextInformation(); + } + }; + + //Profile event contains all the data required to tell the profile what is going + //on. + struct ProfileEvent + { + EventContextInformation mContextInformation; + RelativeProfileEvent mTimeData; //timestamp in seconds. + void init( uint32_t inThreadId, uint64_t inContextId, uint8_t inCpuId, uint8_t inPriority, uint64_t inTs ) + { + mContextInformation.init( inThreadId, inContextId, inPriority, inCpuId ); + mTimeData.init( inTs ); + } + + void init( const ProfileEvent& inData ) + { + mContextInformation.init( inData.mContextInformation ); + mTimeData.init( inData.mTimeData ); + } + + bool operator==( const ProfileEvent& other ) const + { + return mContextInformation == other.mContextInformation + && mTimeData == other.mTimeData; + } + + template<typename TStreamType> + uint32_t streamify( TStreamType& inStream, const EventHeader& inHeader ) + { + uint32_t writtenSize = mContextInformation.streamify(inStream, inHeader.getContextIdCompressionFlags()); + writtenSize += mTimeData.streamify(inStream, inHeader); + return writtenSize; + } + + uint32_t getEventSize(const EventHeader& inHeader) + { + uint32_t eventSize = 0; + // time is stored depending on the conpress flag mTimeData.streamify(inStream, inHeader); + switch (inHeader.getTimestampCompressionFlags()) + { + case EventStreamCompressionFlags::U8: + eventSize++; + break; + case EventStreamCompressionFlags::U16: + eventSize += 2; + break; + case EventStreamCompressionFlags::U32: + eventSize += 4; + break; + case EventStreamCompressionFlags::U64: + eventSize += 8; + break; + } + + // context information + // mContextInformation.streamify( inStream, inHeader.getContextIdCompressionFlags() ); + eventSize += 6; // uint32_t mThreadId; uint8_t mThreadPriority; uint8_t mCpuId; + switch (inHeader.getContextIdCompressionFlags()) + { + case EventStreamCompressionFlags::U8: + eventSize++; + break; + case EventStreamCompressionFlags::U16: + eventSize += 2; + break; + case EventStreamCompressionFlags::U32: + eventSize += 4; + break; + case EventStreamCompressionFlags::U64: + eventSize += 8; + break; + } + + return eventSize; + } + + uint64_t getTimestamp() const { return mTimeData.getTimestamp(); } + void setTimestamp( uint64_t inTs ) { mTimeData.setTimestamp( inTs ); } + + void setupHeader( EventHeader& inHeader, uint64_t inLastTimestamp ) + { + mTimeData.setupHeader( inHeader, inLastTimestamp ); + inHeader.setContextIdCompressionFlags( mContextInformation.mContextId ); + } + }; + + //profile start event starts the profile session. + struct StartEvent : public ProfileEvent + { + void init( uint32_t inThreadId = 0, uint64_t inContextId = 0, uint8_t inCpuId = 0, uint8_t inPriority = 0, uint64_t inTensOfNanoSeconds = 0 ) + { + ProfileEvent::init( inThreadId, inContextId, inCpuId, inPriority, inTensOfNanoSeconds ); + } + void init( const StartEvent& inData ) + { + ProfileEvent::init( inData ); + } + + RelativeStartEvent getRelativeEvent() const { RelativeStartEvent theEvent; theEvent.init( mTimeData.mTensOfNanoSeconds ); return theEvent; } + EventTypes::Enum getRelativeEventType() const { return getEventType<RelativeStartEvent>(); } + }; + + template<> inline EventTypes::Enum getEventType<StartEvent>() { return EventTypes::StartEvent; } + + //Profile stop event stops the profile session. + struct StopEvent : public ProfileEvent + { + void init( uint32_t inThreadId = 0, uint64_t inContextId = 0, uint8_t inCpuId = 0, uint8_t inPriority = 0, uint64_t inTensOfNanoSeconds = 0 ) + { + ProfileEvent::init( inThreadId, inContextId, inCpuId, inPriority, inTensOfNanoSeconds ); + } + void init( const StopEvent& inData ) + { + ProfileEvent::init( inData ); + } + RelativeStopEvent getRelativeEvent() const { RelativeStopEvent theEvent; theEvent.init( mTimeData.mTensOfNanoSeconds ); return theEvent; } + EventTypes::Enum getRelativeEventType() const { return getEventType<RelativeStopEvent>(); } + }; + + template<> inline EventTypes::Enum getEventType<StopEvent>() { return EventTypes::StopEvent; } + + struct EventValue + { + uint64_t mValue; + uint64_t mContextId; + uint32_t mThreadId; + void init( int64_t inValue = 0, uint64_t inContextId = 0, uint32_t inThreadId = 0 ) + { + mValue = static_cast<uint64_t>( inValue ); + mContextId = inContextId; + mThreadId = inThreadId; + } + + void init( const EventValue& inData ) + { + mValue = inData.mValue; + mContextId = inData.mContextId; + mThreadId = inData.mThreadId; + } + + int64_t getValue() const { return static_cast<int16_t>( mValue ); } + + void setupHeader( EventHeader& inHeader ) + { + mValue = inHeader.compressTimestamp( 0, mValue ); + inHeader.setContextIdCompressionFlags( mContextId ); + } + + template<typename TStreamType> + uint32_t streamify( TStreamType& inStream, const EventHeader& inHeader ) + { + uint32_t writtenSize = inStream.streamify("Value", mValue, inHeader.getTimestampCompressionFlags()); + writtenSize += inStream.streamify("ContextId", mContextId, inHeader.getContextIdCompressionFlags()); + writtenSize += inStream.streamify("ThreadId", mThreadId); + return writtenSize; + } + + uint32_t getEventSize(const EventHeader& inHeader) + { + uint32_t eventSize = 0; + // value + switch (inHeader.getTimestampCompressionFlags()) + { + case EventStreamCompressionFlags::U8: + eventSize++; + break; + case EventStreamCompressionFlags::U16: + eventSize += 2; + break; + case EventStreamCompressionFlags::U32: + eventSize += 4; + break; + case EventStreamCompressionFlags::U64: + eventSize += 8; + break; + } + + // context information + switch (inHeader.getContextIdCompressionFlags()) + { + case EventStreamCompressionFlags::U8: + eventSize++; + break; + case EventStreamCompressionFlags::U16: + eventSize += 2; + break; + case EventStreamCompressionFlags::U32: + eventSize += 4; + break; + case EventStreamCompressionFlags::U64: + eventSize += 8; + break; + } + + eventSize += 4; // uint32_t mThreadId; + + return eventSize; + } + + bool operator==( const EventValue& other ) const + { + return mValue == other.mValue + && mContextId == other.mContextId + && mThreadId == other.mThreadId; + } + + template<typename THandlerType> + void handle( THandlerType* inHdlr, uint16_t eventId ) const + { + inHdlr->onEventValue( PxProfileEventId( eventId ), mThreadId, mContextId, getValue() ); + } + + }; + template<> inline EventTypes::Enum getEventType<EventValue>() { return EventTypes::EventValue; } + + //obsolete, placeholder to skip data from PhysX SDKs < 3.4 + struct CUDAProfileBuffer + { + uint64_t mTimestamp; + float mTimespan; + const uint8_t* mCudaData; + uint32_t mBufLen; + uint32_t mVersion; + + template<typename TStreamType> + uint32_t streamify( TStreamType& inStream, const EventHeader& ) + { + uint32_t writtenSize = inStream.streamify("Timestamp", mTimestamp); + writtenSize += inStream.streamify("Timespan", mTimespan); + writtenSize += inStream.streamify("CudaData", mCudaData, mBufLen); + writtenSize += inStream.streamify("BufLen", mBufLen); + writtenSize += inStream.streamify("Version", mVersion); + return writtenSize; + } + + bool operator==( const CUDAProfileBuffer& other ) const + { + return mTimestamp == other.mTimestamp + && mTimespan == other.mTimespan + && mBufLen == other.mBufLen + && memcmp( mCudaData, other.mCudaData, mBufLen ) == 0 + && mVersion == other.mVersion; + } + }; + + template<> inline EventTypes::Enum getEventType<CUDAProfileBuffer>() { return EventTypes::CUDAProfileBuffer; } + + //Provides a generic equal operation for event data objects. + template <typename TEventData> + struct EventDataEqualOperator + { + TEventData mData; + EventDataEqualOperator( const TEventData& inD ) : mData( inD ) {} + template<typename TDataType> bool operator()( const TDataType& inRhs ) const { return mData.toType( Type2Type<TDataType>() ) == inRhs; } + bool operator()() const { return false; } + }; + + /** + * Generic event container that combines and even header with the generic event data type. + * Provides unsafe and typesafe access to the event data. + */ + class Event + { + public: + typedef PX_PROFILE_UNION_7(StartEvent, StopEvent, RelativeStartEvent, RelativeStopEvent, EventValue, CUDAProfileBuffer, uint8_t) EventData; + + private: + EventHeader mHeader; + EventData mData; + public: + Event() {} + + template <typename TDataType> + Event( EventHeader inHeader, const TDataType& inData ) + : mHeader( inHeader ) + { + mData.init<TDataType>(inData); + } + + template<typename TDataType> + Event( uint16_t eventId, const TDataType& inData ) + : mHeader( getEventType<TDataType>(), eventId ) + { + mData.init<TDataType>(inData); + } + const EventHeader& getHeader() const { return mHeader; } + const EventData& getData() const { return mData; } + + template<typename TDataType> + const TDataType& getValue() const { PX_ASSERT( mHeader.mEventType == getEventType<TDataType>() ); return mData.toType<TDataType>(); } + + template<typename TDataType> + TDataType& getValue() { PX_ASSERT( mHeader.mEventType == getEventType<TDataType>() ); return mData.toType<TDataType>(); } + + template<typename TRetVal, typename TOperator> + inline TRetVal visit( TOperator inOp ) const; + + bool operator==( const Event& inOther ) const + { + if ( !(mHeader == inOther.mHeader ) ) return false; + if ( mHeader.mEventType ) + return inOther.visit<bool>( EventDataEqualOperator<EventData>( mData ) ); + return true; + } + }; + + //Combining the above union type with an event type means that an object can get the exact + //data out of the union. Using this function means that all callsites will be forced to + //deal with the newer datatypes and that the switch statement only exists in once place. + //Implements conversion from enum -> datatype + template<typename TRetVal, typename TOperator> + TRetVal visit( EventTypes::Enum inEventType, const Event::EventData& inData, TOperator inOperator ) + { + switch( inEventType ) + { + case EventTypes::StartEvent: return inOperator( inData.toType( Type2Type<StartEvent>() ) ); + case EventTypes::StopEvent: return inOperator( inData.toType( Type2Type<StopEvent>() ) ); + case EventTypes::RelativeStartEvent: return inOperator( inData.toType( Type2Type<RelativeStartEvent>() ) ); + case EventTypes::RelativeStopEvent: return inOperator( inData.toType( Type2Type<RelativeStopEvent>() ) ); + case EventTypes::EventValue: return inOperator( inData.toType( Type2Type<EventValue>() ) ); + //obsolete, placeholder to skip data from PhysX SDKs < 3.4 + case EventTypes::CUDAProfileBuffer: return inOperator( inData.toType( Type2Type<CUDAProfileBuffer>() ) ); + case EventTypes::Unknown: break; + } + uint8_t type = static_cast<uint8_t>( inEventType ); + return inOperator( type ); + } + + template<typename TRetVal, typename TOperator> + inline TRetVal Event::visit( TOperator inOp ) const + { + return physx::profile::visit<TRetVal>( static_cast<EventTypes::Enum>(mHeader.mEventType), mData, inOp ); + } +} } + +#endif // PXPVDSDK_PXPROFILEEVENTS_H diff --git a/PxShared/src/pvd/src/PxProfileMemory.h b/PxShared/src/pvd/src/PxProfileMemory.h new file mode 100644 index 0000000..30e8bdc --- /dev/null +++ b/PxShared/src/pvd/src/PxProfileMemory.h @@ -0,0 +1,99 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef PXPVDSDK_PXPROFILEMEMORY_H +#define PXPVDSDK_PXPROFILEMEMORY_H + +#include "PxProfileBase.h" +#include "PxProfileEventBufferClientManager.h" +#include "PxProfileEventSender.h" +#include "PsBroadcast.h" + +namespace physx { namespace profile { + + /** + \brief Record events so a late-connecting client knows about + all outstanding allocations + */ + class PxProfileMemoryEventRecorder : public shdfnd::AllocationListener + { + protected: + virtual ~PxProfileMemoryEventRecorder(){} + public: + /** + \brief Set the allocation listener + \param inListener Allocation listener. + */ + virtual void setListener(AllocationListener* inListener) = 0; + /** + \brief Release the instance. + */ + virtual void release() = 0; + + /** + \brief Create the profile memory event recorder. + \param inAllocator Allocation callback. + */ + static PxProfileMemoryEventRecorder& createRecorder(PxAllocatorCallback* inAllocator); + }; + + /** + \brief Stores memory events into the memory buffer. + */ + class PxProfileMemoryEventBuffer + : public shdfnd::AllocationListener //add a new event to the buffer + , public PxProfileEventBufferClientManager //add clients to handle the serialized memory events + , public PxProfileEventFlusher //flush the buffer + { + protected: + virtual ~PxProfileMemoryEventBuffer(){} + public: + + /** + \brief Release the instance. + */ + virtual void release() = 0; + + /** + \brief Create a non-mutex-protected event buffer. + \param inAllocator Allocation callback. + \param inBufferSize Internal buffer size. + */ + static PxProfileMemoryEventBuffer& createMemoryEventBuffer(PxAllocatorCallback& inAllocator, uint32_t inBufferSize = 0x1000); + }; + + + +} } // namespace physx + + +#endif // PXPVDSDK_PXPROFILEMEMORY_H + + diff --git a/PxShared/src/pvd/src/PxProfileMemoryBuffer.h b/PxShared/src/pvd/src/PxProfileMemoryBuffer.h new file mode 100644 index 0000000..ae957ce --- /dev/null +++ b/PxShared/src/pvd/src/PxProfileMemoryBuffer.h @@ -0,0 +1,193 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef PXPVDSDK_PXPROFILEMEMORYBUFFER_H +#define PXPVDSDK_PXPROFILEMEMORYBUFFER_H + +#include "PxProfileBase.h" +#include "PsAllocator.h" +#include "foundation/PxMemory.h" + +namespace physx { namespace profile { + + template<typename TAllocator = typename shdfnd::AllocatorTraits<uint8_t>::Type > + class MemoryBuffer : public TAllocator + { + uint8_t* mBegin; + uint8_t* mEnd; + uint8_t* mCapacityEnd; + + public: + MemoryBuffer( const TAllocator& inAlloc = TAllocator() ) : TAllocator( inAlloc ), mBegin( 0 ), mEnd( 0 ), mCapacityEnd( 0 ) {} + ~MemoryBuffer() + { + if ( mBegin ) TAllocator::deallocate( mBegin ); + } + uint32_t size() const { return static_cast<uint32_t>( mEnd - mBegin ); } + uint32_t capacity() const { return static_cast<uint32_t>( mCapacityEnd - mBegin ); } + uint8_t* begin() { return mBegin; } + uint8_t* end() { return mEnd; } + void setEnd(uint8_t* nEnd) { mEnd = nEnd; } + const uint8_t* begin() const { return mBegin; } + const uint8_t* end() const { return mEnd; } + void clear() { mEnd = mBegin; } + uint32_t write( uint8_t inValue ) + { + growBuf( 1 ); + *mEnd = inValue; + ++mEnd; + return 1; + } + + template<typename TDataType> + uint32_t write( const TDataType& inValue ) + { + uint32_t writtenSize = sizeof(TDataType); + growBuf(writtenSize); + const uint8_t* __restrict readPtr = reinterpret_cast< const uint8_t* >( &inValue ); + uint8_t* __restrict writePtr = mEnd; + for ( uint32_t idx = 0; idx < sizeof(TDataType); ++idx ) writePtr[idx] = readPtr[idx]; + mEnd += writtenSize; + return writtenSize; + } + + template<typename TDataType> + uint32_t write( const TDataType* inValue, uint32_t inLength ) + { + if ( inValue && inLength ) + { + uint32_t writeSize = inLength * sizeof( TDataType ); + growBuf( writeSize ); + PxMemCopy( mBegin + size(), inValue, writeSize ); + mEnd += writeSize; + return writeSize; + } + return 0; + } + + // used by atomic write. Store the data and write the end afterwards + // we dont check the buffer size, it should not resize on the fly + template<typename TDataType> + uint32_t write(const TDataType* inValue, uint32_t inLength, int32_t index) + { + if (inValue && inLength) + { + uint32_t writeSize = inLength * sizeof(TDataType); + PX_ASSERT(mBegin + index + writeSize < mCapacityEnd); + PxMemCopy(mBegin + index, inValue, writeSize); + return writeSize; + } + return 0; + } + + void growBuf( uint32_t inAmount ) + { + uint32_t newSize = size() + inAmount; + reserve( newSize ); + } + void resize( uint32_t inAmount ) + { + reserve( inAmount ); + mEnd = mBegin + inAmount; + } + void reserve( uint32_t newSize ) + { + uint32_t currentSize = size(); + if ( newSize >= capacity() ) + { + const uint32_t allocSize = mBegin ? newSize * 2 : newSize; + + uint8_t* newData = static_cast<uint8_t*>(TAllocator::allocate(allocSize, __FILE__, __LINE__)); + memset(newData, 0xf,allocSize); + if ( mBegin ) + { + PxMemCopy( newData, mBegin, currentSize ); + TAllocator::deallocate( mBegin ); + } + mBegin = newData; + mEnd = mBegin + currentSize; + mCapacityEnd = mBegin + allocSize; + } + } + }; + + + class TempMemoryBuffer + { + uint8_t* mBegin; + uint8_t* mEnd; + uint8_t* mCapacityEnd; + + public: + TempMemoryBuffer(uint8_t* data, int32_t size) : mBegin(data), mEnd(data), mCapacityEnd(data + size) {} + ~TempMemoryBuffer() + { + } + uint32_t size() const { return static_cast<uint32_t>(mEnd - mBegin); } + uint32_t capacity() const { return static_cast<uint32_t>(mCapacityEnd - mBegin); } + const uint8_t* begin() { return mBegin; } + uint8_t* end() { return mEnd; } + const uint8_t* begin() const { return mBegin; } + const uint8_t* end() const { return mEnd; } + uint32_t write(uint8_t inValue) + { + *mEnd = inValue; + ++mEnd; + return 1; + } + + template<typename TDataType> + uint32_t write(const TDataType& inValue) + { + uint32_t writtenSize = sizeof(TDataType); + const uint8_t* __restrict readPtr = reinterpret_cast<const uint8_t*>(&inValue); + uint8_t* __restrict writePtr = mEnd; + for (uint32_t idx = 0; idx < sizeof(TDataType); ++idx) writePtr[idx] = readPtr[idx]; + mEnd += writtenSize; + return writtenSize; + } + + template<typename TDataType> + uint32_t write(const TDataType* inValue, uint32_t inLength) + { + if (inValue && inLength) + { + uint32_t writeSize = inLength * sizeof(TDataType); + PxMemCopy(mBegin + size(), inValue, writeSize); + mEnd += writeSize; + return writeSize; + } + return 0; + } + }; + +}} + +#endif // PXPVDSDK_PXPROFILEMEMORYBUFFER_H diff --git a/PxShared/src/pvd/src/PxProfileMemoryEventBuffer.h b/PxShared/src/pvd/src/PxProfileMemoryEventBuffer.h new file mode 100644 index 0000000..7cc50b6 --- /dev/null +++ b/PxShared/src/pvd/src/PxProfileMemoryEventBuffer.h @@ -0,0 +1,156 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef PXPVDSDK_PXPROFILEMEMORYEVENTBUFFER_H +#define PXPVDSDK_PXPROFILEMEMORYEVENTBUFFER_H + +#include "PxProfileDataBuffer.h" +#include "PxProfileMemoryEvents.h" +#include "PxProfileMemoryEventTypes.h" +#include "PxProfileScopedMutexLock.h" +#include "PxProfileAllocatorWrapper.h" + +#include "PsHash.h" +#include "PsHashMap.h" +#include "PsUserAllocated.h" + +namespace physx { namespace profile { + + template<typename TMutex, + typename TScopedLock> + class MemoryEventBuffer : public DataBuffer<TMutex, TScopedLock> + { + public: + typedef DataBuffer<TMutex, TScopedLock> TBaseType; + typedef typename TBaseType::TMutexType TMutexType; + typedef typename TBaseType::TScopedLockType TScopedLockType; + typedef typename TBaseType::TU8AllocatorType TU8AllocatorType; + typedef typename TBaseType::TMemoryBufferType TMemoryBufferType; + typedef typename TBaseType::TBufferClientArray TBufferClientArray; + typedef shdfnd::HashMap<const char*, uint32_t, shdfnd::Hash<const char*>, TU8AllocatorType> TCharPtrToHandleMap; + + protected: + TCharPtrToHandleMap mStringTable; + + public: + + MemoryEventBuffer( PxAllocatorCallback& cback + , uint32_t inBufferFullAmount + , TMutexType* inBufferMutex ) + : TBaseType( &cback, inBufferFullAmount, inBufferMutex, "struct physx::profile::MemoryEvent" ) + , mStringTable( TU8AllocatorType( TBaseType::getWrapper(), "MemoryEventStringBuffer" ) ) + { + } + + uint32_t getHandle( const char* inData ) + { + if ( inData == NULL ) inData = ""; + const typename TCharPtrToHandleMap::Entry* result( mStringTable.find( inData ) ); + if ( result ) + return result->second; + uint32_t hdl = mStringTable.size() + 1; + mStringTable.insert( inData, hdl ); + StringTableEvent theEvent; + theEvent.init( inData, hdl ); + sendEvent( theEvent ); + return hdl; + } + + void onAllocation( size_t inSize, const char* inType, const char* inFile, uint32_t inLine, uint64_t addr ) + { + if ( addr == 0 ) + return; + uint32_t typeHdl( getHandle( inType ) ); + uint32_t fileHdl( getHandle( inFile ) ); + AllocationEvent theEvent; + theEvent.init( inSize, typeHdl, fileHdl, inLine, addr ); + sendEvent( theEvent ); + } + + void onDeallocation( uint64_t addr ) + { + if ( addr == 0 ) + return; + DeallocationEvent theEvent; + theEvent.init( addr ); + sendEvent( theEvent ); + } + + void flushProfileEvents() + { + TBaseType::flushEvents(); + } + + protected: + + template<typename TDataType> + void sendEvent( TDataType inType ) + { + MemoryEventHeader theHeader( getMemoryEventType<TDataType>() ); + inType.setup( theHeader ); + theHeader.streamify( TBaseType::mSerializer ); + inType.streamify( TBaseType::mSerializer, theHeader ); + if ( TBaseType::mDataArray.size() >= TBaseType::mBufferFullAmount ) + flushProfileEvents(); + } + }; + + class PxProfileMemoryEventBufferImpl : public shdfnd::UserAllocated + , public PxProfileMemoryEventBuffer + { + typedef MemoryEventBuffer<PxProfileEventMutex, NullLock> TMemoryBufferType; + TMemoryBufferType mBuffer; + + public: + PxProfileMemoryEventBufferImpl( PxAllocatorCallback& alloc, uint32_t inBufferFullAmount ) + : mBuffer( alloc, inBufferFullAmount, NULL ) + { + } + + virtual void onAllocation( size_t size, const char* typeName, const char* filename, int line, void* allocatedMemory ) + { + mBuffer.onAllocation( size, typeName, filename, uint32_t(line), PX_PROFILE_POINTER_TO_U64( allocatedMemory ) ); + } + virtual void onDeallocation( void* allocatedMemory ) + { + mBuffer.onDeallocation( PX_PROFILE_POINTER_TO_U64( allocatedMemory ) ); + } + + virtual void addClient( PxProfileEventBufferClient& inClient ) { mBuffer.addClient( inClient ); } + virtual void removeClient( PxProfileEventBufferClient& inClient ) { mBuffer.removeClient( inClient ); } + virtual bool hasClients() const { return mBuffer.hasClients(); } + + virtual void flushProfileEvents() { mBuffer.flushProfileEvents(); } + + virtual void release(){ PX_PROFILE_DELETE( mBuffer.getWrapper().getAllocator(), this ); } + }; +}} + +#endif // PXPVDSDK_PXPROFILEMEMORYEVENTBUFFER_H diff --git a/PxShared/src/pvd/src/PxProfileMemoryEventParser.h b/PxShared/src/pvd/src/PxProfileMemoryEventParser.h new file mode 100644 index 0000000..feb8063 --- /dev/null +++ b/PxShared/src/pvd/src/PxProfileMemoryEventParser.h @@ -0,0 +1,185 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef PXPVDSDK_PXPROFILEMEMORYEVENTPARSER_H +#define PXPVDSDK_PXPROFILEMEMORYEVENTPARSER_H + +#include "PxProfileMemoryEvents.h" +#include "PxProfileAllocatorWrapper.h" +#include "PxProfileEventSerialization.h" + +#include "PsHashMap.h" +#include "PsString.h" + +namespace physx { namespace profile { + + template<bool TSwapBytes, typename TParserType, typename THandlerType> + bool parseEventData( TParserType& inParser, const uint8_t* inData, uint32_t inLength, THandlerType* inHandler ); + + template<bool TSwapBytes> + struct MemoryEventParser + { + typedef PxProfileWrapperReflectionAllocator<uint8_t> TAllocatorType; + typedef shdfnd::HashMap<uint32_t, char*, shdfnd::Hash<uint32_t>, TAllocatorType > THdlToStringMap; + typedef EventDeserializer<TSwapBytes> TDeserializerType; + + PxProfileAllocatorWrapper mWrapper; + THdlToStringMap mHdlToStringMap; + TDeserializerType mDeserializer; + + MemoryEventParser( PxAllocatorCallback& inAllocator ) + : mWrapper( inAllocator ) + , mHdlToStringMap( TAllocatorType( mWrapper ) ) + , mDeserializer ( 0, 0 ) + { + } + + ~MemoryEventParser() + { + for ( THdlToStringMap::Iterator iter( mHdlToStringMap.getIterator() ); iter.done() == false; ++iter ) + mWrapper.getAllocator().deallocate( reinterpret_cast<void*>(iter->second) ); + } + + template<typename TOperator> + void parse(const StringTableEvent&, const MemoryEventHeader& inHeader, TOperator& inOperator) + { + StringTableEvent evt; + evt.init(); + evt.streamify( mDeserializer, inHeader ); + uint32_t len = static_cast<uint32_t>( strlen( evt.mString ) ); + char* newStr = static_cast<char*>( mWrapper.getAllocator().allocate( len + 1, "const char*", __FILE__, __LINE__ ) ); + shdfnd::strlcpy( newStr, len+1, evt.mString ); + mHdlToStringMap[evt.mHandle] = newStr; + inOperator( inHeader, evt ); + } + + const char* getString( uint32_t inHdl ) + { + const THdlToStringMap::Entry* entry = mHdlToStringMap.find( inHdl ); + if ( entry ) return entry->second; + return ""; + } + + //Slow reverse lookup used only for testing. + uint32_t getHandle( const char* inStr ) + { + for ( THdlToStringMap::Iterator iter = mHdlToStringMap.getIterator(); + !iter.done(); + ++iter ) + { + if ( safeStrEq( iter->second, inStr ) ) + return iter->first; + } + return 0; + } + + template<typename TOperator> + void parse(const AllocationEvent&, const MemoryEventHeader& inHeader, TOperator& inOperator) + { + AllocationEvent evt; + evt.streamify( mDeserializer, inHeader ); + inOperator( inHeader, evt ); + } + + template<typename TOperator> + void parse(const DeallocationEvent&, const MemoryEventHeader& inHeader, TOperator& inOperator) + { + DeallocationEvent evt; + evt.streamify( mDeserializer, inHeader ); + inOperator( inHeader, evt ); + } + + template<typename TOperator> + void parse(const FullAllocationEvent&, const MemoryEventHeader&, TOperator& ) + { + PX_ASSERT( false ); //will never happen. + } + + template<typename THandlerType> + void parseEventData( const uint8_t* inData, uint32_t inLength, THandlerType* inOperator ) + { + physx::profile::parseEventData<TSwapBytes>( *this, inData, inLength, inOperator ); + } + }; + + + template<typename THandlerType, bool TSwapBytes> + struct MemoryEventParseOperator + { + MemoryEventParser<TSwapBytes>* mParser; + THandlerType* mOperator; + MemoryEventHeader* mHeader; + MemoryEventParseOperator( MemoryEventParser<TSwapBytes>* inParser, THandlerType* inOperator, MemoryEventHeader* inHeader ) + : mParser( inParser ) + , mOperator( inOperator ) + , mHeader( inHeader ) + { + } + + bool wasSuccessful() { return mParser->mDeserializer.mFail == false; } + + bool parseHeader() + { + mHeader->streamify( mParser->mDeserializer ); + return wasSuccessful(); + } + + template<typename TDataType> + bool operator()( const TDataType& inType ) + { + mParser->parse( inType, *mHeader, *mOperator ); + return wasSuccessful(); + } + + bool operator()( uint8_t ) { PX_ASSERT( false ); return false;} + }; + + template<bool TSwapBytes, typename TParserType, typename THandlerType> + inline bool parseEventData( TParserType& inParser, const uint8_t* inData, uint32_t inLength, THandlerType* inHandler ) + { + inParser.mDeserializer = EventDeserializer<TSwapBytes>( inData, inLength ); + MemoryEvent::EventData crapData; + uint32_t eventCount = 0; + MemoryEventHeader theHeader; + MemoryEventParseOperator<THandlerType, TSwapBytes> theOp( &inParser, inHandler, &theHeader ); + while( inParser.mDeserializer.mLength && inParser.mDeserializer.mFail == false) + { + if ( theOp.parseHeader() ) + { + if( visit<bool>( theHeader.getType(), crapData, theOp ) == false ) + inParser.mDeserializer.mFail = true; + } + ++eventCount; + } + return inParser.mDeserializer.mFail == false; + } +}} + +#endif // PXPVDSDK_PXPROFILEMEMORYEVENTPARSER_H diff --git a/PxShared/src/pvd/src/PxProfileMemoryEventRecorder.h b/PxShared/src/pvd/src/PxProfileMemoryEventRecorder.h new file mode 100644 index 0000000..a3d1ed8 --- /dev/null +++ b/PxShared/src/pvd/src/PxProfileMemoryEventRecorder.h @@ -0,0 +1,147 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef PXPVDSDK_PXPROFILEMEMORYEVENTRECORDER_H +#define PXPVDSDK_PXPROFILEMEMORYEVENTRECORDER_H + + +#include "PxProfileBase.h" +#include "PxProfileAllocatorWrapper.h" +#include "PxProfileMemoryEvents.h" +#include "PxProfileMemoryEventTypes.h" + +#include "PsHashMap.h" +#include "PsUserAllocated.h" +#include "PsBroadcast.h" +#include "PxProfileMemory.h" + +namespace physx { namespace profile { + + //Remember outstanding events. + //Remembers allocations, forwards them to a listener if one is attached + //and will forward all outstanding allocations to a listener when one is + //attached. + struct MemoryEventRecorder : public shdfnd::AllocationListener + { + typedef PxProfileWrapperReflectionAllocator<uint8_t> TAllocatorType; + typedef shdfnd::HashMap<uint64_t,FullAllocationEvent,shdfnd::Hash<uint64_t>,TAllocatorType> THashMapType; + + PxProfileAllocatorWrapper mWrapper; + THashMapType mOutstandingAllocations; + AllocationListener* mListener; + + MemoryEventRecorder( PxAllocatorCallback* inFoundation ) + : mWrapper( inFoundation ) + , mOutstandingAllocations( TAllocatorType( mWrapper ) ) + , mListener( NULL ) + { + } + + static uint64_t ToU64( void* inData ) { return PX_PROFILE_POINTER_TO_U64( inData ); } + static void* ToVoidPtr( uint64_t inData ) { return reinterpret_cast<void*>(size_t(inData)); } + virtual void onAllocation( size_t size, const char* typeName, const char* filename, int line, void* allocatedMemory ) + { + onAllocation( size, typeName, filename, uint32_t(line), ToU64( allocatedMemory ) ); + } + + void onAllocation( size_t size, const char* typeName, const char* filename, uint32_t line, uint64_t allocatedMemory ) + { + if ( allocatedMemory == 0 ) + return; + FullAllocationEvent theEvent; + theEvent.init( size, typeName, filename, line, allocatedMemory ); + mOutstandingAllocations.insert( allocatedMemory, theEvent ); + if ( mListener != NULL ) mListener->onAllocation( size, typeName, filename, int(line), ToVoidPtr(allocatedMemory) ); + } + + virtual void onDeallocation( void* allocatedMemory ) + { + onDeallocation( ToU64( allocatedMemory ) ); + } + + void onDeallocation( uint64_t allocatedMemory ) + { + if ( allocatedMemory == 0 ) + return; + mOutstandingAllocations.erase( allocatedMemory ); + if ( mListener != NULL ) mListener->onDeallocation( ToVoidPtr( allocatedMemory ) ); + } + + void flushProfileEvents() {} + + void setListener( AllocationListener* inListener ) + { + mListener = inListener; + if ( mListener ) + { + for ( THashMapType::Iterator iter = mOutstandingAllocations.getIterator(); + !iter.done(); + ++iter ) + { + const FullAllocationEvent& evt( iter->second ); + mListener->onAllocation( evt.mSize, evt.mType, evt.mFile, int(evt.mLine), ToVoidPtr( evt.mAddress ) ); + } + } + } + }; + + class PxProfileMemoryEventRecorderImpl : public shdfnd::UserAllocated + , public physx::profile::PxProfileMemoryEventRecorder + { + MemoryEventRecorder mRecorder; + public: + PxProfileMemoryEventRecorderImpl( PxAllocatorCallback* inFnd ) + : mRecorder( inFnd ) + { + } + + virtual void onAllocation( size_t size, const char* typeName, const char* filename, int line, void* allocatedMemory ) + { + mRecorder.onAllocation( size, typeName, filename, line, allocatedMemory ); + } + + virtual void onDeallocation( void* allocatedMemory ) + { + mRecorder.onDeallocation( allocatedMemory ); + } + + virtual void setListener( AllocationListener* inListener ) + { + mRecorder.setListener( inListener ); + } + + virtual void release() + { + PX_PROFILE_DELETE( mRecorder.mWrapper.getAllocator(), this ); + } + }; + +}} +#endif // PXPVDSDK_PXPROFILEMEMORYEVENTRECORDER_H diff --git a/PxShared/src/pvd/src/PxProfileMemoryEventReflexiveWriter.h b/PxShared/src/pvd/src/PxProfileMemoryEventReflexiveWriter.h new file mode 100644 index 0000000..75fbd03 --- /dev/null +++ b/PxShared/src/pvd/src/PxProfileMemoryEventReflexiveWriter.h @@ -0,0 +1,71 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PXPVDSDK_PXPROFILEMEMORYEVENTREFLEXIVEWRITER_H +#define PXPVDSDK_PXPROFILEMEMORYEVENTREFLEXIVEWRITER_H + +#include "PxProfileMemoryBuffer.h" +#include "PxProfileFoundationWrapper.h" +#include "PxProfileMemoryEvents.h" + +namespace physx { namespace profile { + + struct MemoryEventReflexiveWriter + { + typedef PxProfileWrapperReflectionAllocator<uint8_t> TAllocatorType; + typedef MemoryBuffer<TAllocatorType> TMemoryBufferType; + typedef EventSerializer<TMemoryBufferType> TSerializerType; + + + PxProfileAllocatorWrapper mWrapper; + TMemoryBufferType mBuffer; + TSerializerType mSerializer; + + MemoryEventReflexiveWriter( PxAllocatorCallback* inFoundation ) + : mWrapper( inFoundation ) + , mBuffer( TAllocatorType( mWrapper ) ) + , mSerializer( &mBuffer ) + { + } + + template<typename TDataType> + void operator()( const MemoryEventHeader& inHeader, const TDataType& inType ) + { + //copy to get rid of const. + MemoryEventHeader theHeader( inHeader ); + TDataType theData( inType ); + + //write them out. + theHeader.streamify( mSerializer ); + theData.streamify( mSerializer, theHeader ); + } + }; +}} + +#endif // PXPVDSDK_PXPROFILEMEMORYEVENTREFLEXIVEWRITER_H
\ No newline at end of file diff --git a/PxShared/src/pvd/src/PxProfileMemoryEventSummarizer.h b/PxShared/src/pvd/src/PxProfileMemoryEventSummarizer.h new file mode 100644 index 0000000..788636e --- /dev/null +++ b/PxShared/src/pvd/src/PxProfileMemoryEventSummarizer.h @@ -0,0 +1,304 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef PXPVDSDK_PXPROFILEMEMORYEVENTSUMMARIZER_H +#define PXPVDSDK_PXPROFILEMEMORYEVENTSUMMARIZER_H + +#include "PxProfileBase.h" +#include "PxProfileAllocatorWrapper.h" +#include "PxProfileMemoryEvents.h" +#include "PxProfileMemoryEventRecorder.h" +#include "PxProfileMemoryEventParser.h" + +#include "PsHashMap.h" + +namespace physx { namespace profile { + + struct MemoryEventSummarizerEntry + { + uint32_t mType; + uint32_t mFile; + uint32_t mLine; + + MemoryEventSummarizerEntry( const AllocationEvent& evt ) + : mType( evt.mType ) + , mFile( evt.mFile ) + , mLine( evt.mLine ) + { + } + + MemoryEventSummarizerEntry( uint32_t tp, uint32_t f, uint32_t line ) + : mType( tp ) + , mFile( f ) + , mLine( line ) + { + } + }; +}} + + +namespace physx { namespace shdfnd { + + template <> + struct Hash<physx::profile::MemoryEventSummarizerEntry> + { + public: + uint32_t operator()(const physx::profile::MemoryEventSummarizerEntry& entry) const + { + //Combine hash values in a semi-reasonable way. + return Hash<uint32_t>()( entry.mType ) + ^ Hash<uint32_t>()( entry.mFile ) + ^ Hash<uint32_t>()( entry.mLine ); + } + + bool operator()(const physx::profile::MemoryEventSummarizerEntry& lhs, const physx::profile::MemoryEventSummarizerEntry& rhs) const + { + return lhs.mType == rhs.mType + && lhs.mFile == rhs.mFile + && lhs.mLine == rhs.mLine; + } + + bool equal(const physx::profile::MemoryEventSummarizerEntry& lhs, const physx::profile::MemoryEventSummarizerEntry& rhs) const + { + return lhs.mType == rhs.mType + && lhs.mFile == rhs.mFile + && lhs.mLine == rhs.mLine; + } + }; +}} + +namespace physx { namespace profile { + + struct MemoryEventSummarizerAllocatedValue + { + MemoryEventSummarizerEntry mEntry; + uint32_t mSize; + MemoryEventSummarizerAllocatedValue( MemoryEventSummarizerEntry en, uint32_t sz ) + : mEntry( en ) + , mSize( sz ) + { + } + }; + + template<typename TSummarizerType> + struct SummarizerParseHandler + { + TSummarizerType* mSummarizer; + SummarizerParseHandler( TSummarizerType* inType ) + : mSummarizer( inType ) + { + } + template<typename TDataType> + void operator()( const MemoryEventHeader& inHeader, const TDataType& inType ) + { + mSummarizer->handleParsedData( inHeader, inType ); + } + }; + + template<typename TForwardType> + struct MemoryEventForward + { + TForwardType* mForward; + MemoryEventForward( TForwardType& inForward ) + : mForward( &inForward ) + { + } + template<typename TDataType> + void operator()( const MemoryEventHeader& inHeader, const TDataType& inType ) + { + TForwardType& theForward( *mForward ); + theForward( inHeader, inType ); + } + }; + + struct NullMemoryEventHandler + { + template<typename TDataType> + void operator()( const MemoryEventHeader&, const TDataType&) + { + } + }; + + template<typename TForwardType> + struct NewEntryOperatorForward + { + TForwardType* mForward; + NewEntryOperatorForward( TForwardType& inForward ) + : mForward( &inForward ) + { + } + void operator()( const MemoryEventSummarizerEntry& inEntry, const char* inTypeStr, const char* inFileStr, uint32_t inTotalsArrayIndex ) + { + TForwardType& theType( *mForward ); + theType( inEntry, inTypeStr, inFileStr, inTotalsArrayIndex ); + } + }; + + struct NullNewEntryOperator + { + void operator()( const MemoryEventSummarizerEntry&, const char*, const char*, uint32_t) + { + } + }; + + //Very specialized class meant to take a stream of memory events + //endian-convert it. + //Produce a new stream + //And keep track of the events in a meaningful way. + //It collapses the allocations into groupings keyed + //by file, line, and type. + template<bool TSwapBytes + , typename TNewEntryOperator + , typename MemoryEventHandler> + struct MemoryEventSummarizer : public PxProfileEventBufferClient + { + typedef MemoryEventSummarizer< TSwapBytes, TNewEntryOperator, MemoryEventHandler > TThisType; + typedef PxProfileWrapperReflectionAllocator<MemoryEventSummarizerEntry> TAllocatorType; + typedef shdfnd::HashMap<MemoryEventSummarizerEntry, uint32_t, shdfnd::Hash<MemoryEventSummarizerEntry>, TAllocatorType> TSummarizeEntryToU32Hash; + typedef shdfnd::HashMap<uint64_t, MemoryEventSummarizerAllocatedValue, shdfnd::Hash<uint64_t>, TAllocatorType> TU64ToSummarizerValueHash; + PxProfileAllocatorWrapper mWrapper; + TSummarizeEntryToU32Hash mEntryIndexHash; + PxProfileArray<int32_t> mTotalsArray; + MemoryEventParser<TSwapBytes> mParser; + TU64ToSummarizerValueHash mOutstandingAllocations; + TNewEntryOperator mNewEntryOperator; + MemoryEventHandler mEventHandler; + + + MemoryEventSummarizer( PxAllocatorCallback& inAllocator + , TNewEntryOperator inNewEntryOperator + , MemoryEventHandler inEventHandler) + + : mWrapper( inAllocator ) + , mEntryIndexHash( TAllocatorType( mWrapper ) ) + , mTotalsArray( mWrapper ) + , mParser( inAllocator ) + , mOutstandingAllocations( mWrapper ) + , mNewEntryOperator( inNewEntryOperator ) + , mEventHandler( inEventHandler ) + { + } + virtual ~MemoryEventSummarizer(){} + + //parse this data block. This will endian-convert the data if necessary + //and then + void handleData( const uint8_t* inData, uint32_t inLen ) + { + SummarizerParseHandler<TThisType> theHandler( this ); + parseEventData<TSwapBytes>( mParser, inData, inLen, &theHandler ); + } + + template<typename TDataType> + void handleParsedData( const MemoryEventHeader& inHeader, const TDataType& inData ) + { + //forward it to someone who might care + mEventHandler( inHeader, inData ); + //handle the parsed data. + doHandleParsedData( inData ); + } + + template<typename TDataType> + void doHandleParsedData( const TDataType& ) {} + + void doHandleParsedData( const AllocationEvent& inEvt ) + { + onAllocation( inEvt.mSize, inEvt.mType, inEvt.mFile, inEvt.mLine, inEvt.mAddress ); + } + + void doHandleParsedData( const DeallocationEvent& inEvt ) + { + onDeallocation( inEvt.mAddress ); + } + + uint32_t getOrCreateEntryIndex( const MemoryEventSummarizerEntry& inEvent ) + { + uint32_t index = 0; + const TSummarizeEntryToU32Hash::Entry* entry( mEntryIndexHash.find(inEvent ) ); + if ( !entry ) + { + index = mTotalsArray.size(); + mTotalsArray.pushBack( 0 ); + mEntryIndexHash.insert( inEvent, index ); + + //Force a string lookup and such here. + mNewEntryOperator( inEvent, mParser.getString( inEvent.mType), mParser.getString( inEvent.mFile ), index ); + } + else + index = entry->second; + return index; + } + + //Keep a running total of what is going on, letting a listener know when new events happen. + void onMemoryEvent( const MemoryEventSummarizerEntry& inEvent, int32_t inSize ) + { + MemoryEventSummarizerEntry theEntry( inEvent ); + uint32_t index = getOrCreateEntryIndex( theEntry ); + mTotalsArray[index] += inSize; + } + + void onAllocation( uint32_t inSize, uint32_t inType, uint32_t inFile, uint32_t inLine, uint64_t inAddress ) + { + MemoryEventSummarizerEntry theEntry( inType, inFile, inLine ); + onMemoryEvent( theEntry, static_cast<int32_t>( inSize ) ); + mOutstandingAllocations.insert( inAddress, MemoryEventSummarizerAllocatedValue( theEntry, inSize ) ); + } + + void onDeallocation( uint64_t inAddress ) + { + const TU64ToSummarizerValueHash::Entry* existing( mOutstandingAllocations.find( inAddress ) ); + if ( existing ) + { + const MemoryEventSummarizerAllocatedValue& data( existing->second ); + onMemoryEvent( data.mEntry, -1 * static_cast<int32_t>( data.mSize ) ); + mOutstandingAllocations.erase( inAddress ); + } + //Not much we can do with an deallocation when we didn't track the allocation. + } + + int32_t getTypeTotal( const char* inTypeName, const char* inFilename, uint32_t inLine ) + { + uint32_t theType( mParser.getHandle( inTypeName ) ); + uint32_t theFile( mParser.getHandle( inFilename ) ); + uint32_t theLine = inLine; //all test lines are 50... + uint32_t index = getOrCreateEntryIndex( MemoryEventSummarizerEntry( theType, theFile, theLine ) ); + return mTotalsArray[index]; + } + + virtual void handleBufferFlush( const uint8_t* inData, uint32_t inLength ) + { + handleData( inData, inLength ); + } + + virtual void handleClientRemoved() {} + }; + +}} + +#endif // PXPVDSDK_PXPROFILEMEMORYEVENTSUMMARIZER_H diff --git a/PxShared/src/pvd/src/PxProfileMemoryEventTypes.h b/PxShared/src/pvd/src/PxProfileMemoryEventTypes.h new file mode 100644 index 0000000..c737451 --- /dev/null +++ b/PxShared/src/pvd/src/PxProfileMemoryEventTypes.h @@ -0,0 +1,90 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PXPVDSDK_PXPROFILEMEMORYEVENTTYPES_H +#define PXPVDSDK_PXPROFILEMEMORYEVENTTYPES_H + +#include "PxProfileBase.h" +#include "PxProfileEventBufferClientManager.h" +#include "PxProfileEventSender.h" +#include "PsBroadcast.h" + +namespace physx { namespace profile { + + struct PxProfileMemoryEventType + { + enum Enum + { + Unknown = 0, + Allocation, + Deallocation + }; + }; + + struct PxProfileBulkMemoryEvent + { + uint64_t mAddress; + uint32_t mDatatype; + uint32_t mFile; + uint32_t mLine; + uint32_t mSize; + PxProfileMemoryEventType::Enum mType; + + PxProfileBulkMemoryEvent(){} + + PxProfileBulkMemoryEvent( uint32_t size, uint32_t type, uint32_t file, uint32_t line, uint64_t addr ) + : mAddress( addr ) + , mDatatype( type ) + , mFile( file ) + , mLine( line ) + , mSize( size ) + , mType( PxProfileMemoryEventType::Allocation ) + { + } + + PxProfileBulkMemoryEvent( uint64_t addr ) + : mAddress( addr ) + , mDatatype( 0 ) + , mFile( 0 ) + , mLine( 0 ) + , mSize( 0 ) + , mType( PxProfileMemoryEventType::Deallocation ) + { + } + }; + + class PxProfileBulkMemoryEventHandler + { + protected: + virtual ~PxProfileBulkMemoryEventHandler(){} + public: + virtual void handleEvents( const PxProfileBulkMemoryEvent* inEvents, uint32_t inBufferSize ) = 0; + static void parseEventBuffer( const uint8_t* inBuffer, uint32_t inBufferSize, PxProfileBulkMemoryEventHandler& inHandler, bool inSwapBytes, PxAllocatorCallback* inAlloc ); + }; +} } + +#endif // PXPVDSDK_PXPROFILEMEMORYEVENTTYPES_H diff --git a/PxShared/src/pvd/src/PxProfileMemoryEvents.h b/PxShared/src/pvd/src/PxProfileMemoryEvents.h new file mode 100644 index 0000000..6fcb032 --- /dev/null +++ b/PxShared/src/pvd/src/PxProfileMemoryEvents.h @@ -0,0 +1,411 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef PXPVDSDK_PXPROFILEMEMORYEVENTS_H +#define PXPVDSDK_PXPROFILEMEMORYEVENTS_H + +#include "PxProfileEvents.h" + +//Memory events define their own event stream + +namespace physx { namespace profile { + struct MemoryEventTypes + { + enum Enum + { + Unknown = 0, + StringTableEvent, //introduce a new mapping of const char* -> integer + AllocationEvent, + DeallocationEvent, + FullAllocationEvent + }; + }; + + template<unsigned numBits, typename TDataType> + inline unsigned char convertToNBits( TDataType inType ) + { + uint8_t conversion = static_cast<uint8_t>( inType ); + PX_ASSERT( conversion < (1 << numBits) ); + return conversion; + } + + template<typename TDataType> + inline unsigned char convertToTwoBits( TDataType inType ) + { + return convertToNBits<2>( inType ); + } + + template<typename TDataType> + inline unsigned char convertToFourBits( TDataType inType ) + { + return convertToNBits<4>( inType ); + } + + inline EventStreamCompressionFlags::Enum fromNumber( uint8_t inNum ) { return static_cast<EventStreamCompressionFlags::Enum>( inNum ); } + + template<unsigned lhs, unsigned rhs> + inline void compileCheckSize() + { + PX_COMPILE_TIME_ASSERT( lhs <= rhs ); + } + + //Used for predictable bit fields. + template<typename TDataType + , uint8_t TNumBits + , uint8_t TOffset + , typename TInputType> + struct BitMaskSetter + { + //Create a mask that masks out the orginal value shift into place + static TDataType createOffsetMask() { return TDataType(createMask() << TOffset); } + //Create a mask of TNumBits number of tis + static TDataType createMask() { return static_cast<TDataType>((1 << TNumBits) - 1); } + void setValue( TDataType& inCurrent, TInputType inData ) + { + PX_ASSERT( inData < ( 1 << TNumBits ) ); + + //Create a mask to remove the current value. + TDataType theMask = TDataType(~(createOffsetMask())); + //Clear out current value. + inCurrent = TDataType(inCurrent & theMask); + //Create the new value. + TDataType theAddition = static_cast<TDataType>( inData << TOffset ); + //or it into the existing value. + inCurrent = TDataType(inCurrent | theAddition); + } + + TInputType getValue( TDataType inCurrent ) + { + return static_cast<TInputType>( ( inCurrent >> TOffset ) & createMask() ); + } + }; + + + struct MemoryEventHeader + { + uint16_t mValue; + + typedef BitMaskSetter<uint16_t, 4, 0, uint8_t> TTypeBitmask; + typedef BitMaskSetter<uint16_t, 2, 4, uint8_t> TAddrCompressBitmask; + typedef BitMaskSetter<uint16_t, 2, 6, uint8_t> TTypeCompressBitmask; + typedef BitMaskSetter<uint16_t, 2, 8, uint8_t> TFnameCompressBitmask; + typedef BitMaskSetter<uint16_t, 2, 10, uint8_t> TSizeCompressBitmask; + typedef BitMaskSetter<uint16_t, 2, 12, uint8_t> TLineCompressBitmask; + + //That leaves size as the only thing not compressed usually. + + MemoryEventHeader( MemoryEventTypes::Enum inType = MemoryEventTypes::Unknown ) + : mValue( 0 ) + { + uint8_t defaultCompression( convertToTwoBits( EventStreamCompressionFlags::U64 ) ); + TTypeBitmask().setValue( mValue, convertToFourBits( inType ) ); + TAddrCompressBitmask().setValue( mValue, defaultCompression ); + TTypeCompressBitmask().setValue( mValue, defaultCompression ); + TFnameCompressBitmask().setValue( mValue, defaultCompression ); + TSizeCompressBitmask().setValue( mValue, defaultCompression ); + TLineCompressBitmask().setValue( mValue, defaultCompression ); + } + + MemoryEventTypes::Enum getType() const { return static_cast<MemoryEventTypes::Enum>( TTypeBitmask().getValue( mValue ) ); } + +#define DEFINE_MEMORY_HEADER_COMPRESSION_ACCESSOR( name ) \ + void set##name( EventStreamCompressionFlags::Enum inEnum ) { T##name##Bitmask().setValue( mValue, convertToTwoBits( inEnum ) ); } \ + EventStreamCompressionFlags::Enum get##name() const { return fromNumber( T##name##Bitmask().getValue( mValue ) ); } + + DEFINE_MEMORY_HEADER_COMPRESSION_ACCESSOR( AddrCompress ) + DEFINE_MEMORY_HEADER_COMPRESSION_ACCESSOR( TypeCompress ) + DEFINE_MEMORY_HEADER_COMPRESSION_ACCESSOR( FnameCompress ) + DEFINE_MEMORY_HEADER_COMPRESSION_ACCESSOR( SizeCompress ) + DEFINE_MEMORY_HEADER_COMPRESSION_ACCESSOR( LineCompress ) + +#undef DEFINE_MEMORY_HEADER_COMPRESSION_ACCESSOR + + bool operator==( const MemoryEventHeader& inOther ) const + { + return mValue == inOther.mValue; + } + template<typename TStreamType> + void streamify( TStreamType& inStream ) + { + inStream.streamify( "Header", mValue ); + } + }; + + //Declaration of type level getMemoryEventType function that maps enumeration event types to datatypes + template<typename TDataType> + inline MemoryEventTypes::Enum getMemoryEventType() { PX_ASSERT( false ); return MemoryEventTypes::Unknown; } + + inline bool safeStrEq( const char* lhs, const char* rhs ) + { + if ( lhs == rhs ) + return true; + //If they aren't equal, and one of them is null, + //then they can't be equal. + //This is assuming that the null char* is not equal to + //the empty "" char*. + if ( !lhs || !rhs ) + return false; + + return ::strcmp( lhs, rhs ) == 0; + } + + struct StringTableEvent + { + const char* mString; + uint32_t mHandle; + + void init( const char* inStr = "", uint32_t inHdl = 0 ) + { + mString = inStr; + mHandle = inHdl; + } + + void init( const StringTableEvent& inData ) + { + mString = inData.mString; + mHandle = inData.mHandle; + } + + bool operator==( const StringTableEvent& inOther ) const + { + return mHandle == inOther.mHandle + && safeStrEq( mString, inOther.mString ); + } + + void setup( MemoryEventHeader& ) const {} + + template<typename TStreamType> + void streamify( TStreamType& inStream, const MemoryEventHeader& ) + { + inStream.streamify( "String", mString ); + inStream.streamify( "Handle", mHandle ); + } + }; + template<> inline MemoryEventTypes::Enum getMemoryEventType<StringTableEvent>() { return MemoryEventTypes::StringTableEvent; } + + struct MemoryEventData + { + uint64_t mAddress; + void init( uint64_t addr ) + { + mAddress = addr; + } + + void init( const MemoryEventData& inData) + { + mAddress = inData.mAddress; + } + + bool operator==( const MemoryEventData& inOther ) const + { + return mAddress == inOther.mAddress; + } + + void setup( MemoryEventHeader& inHeader ) const + { + inHeader.setAddrCompress( findCompressionValue( mAddress ) ); + } + + template<typename TStreamType> + void streamify( TStreamType& inStream, const MemoryEventHeader& inHeader ) + { + inStream.streamify( "Address", mAddress, inHeader.getAddrCompress() ); + } + }; + + struct AllocationEvent : public MemoryEventData + { + uint32_t mSize; + uint32_t mType; + uint32_t mFile; + uint32_t mLine; + void init( size_t size = 0, uint32_t type = 0, uint32_t file = 0, uint32_t line = 0, uint64_t addr = 0 ) + { + MemoryEventData::init( addr ); + mSize = static_cast<uint32_t>( size ); + mType = type; + mFile = file; + mLine = line; + } + + void init( const AllocationEvent& inData ) + { + MemoryEventData::init( inData ); + mSize = inData.mSize; + mType = inData.mType; + mFile = inData.mFile; + mLine = inData.mLine; + } + + bool operator==( const AllocationEvent& inOther ) const + { + return MemoryEventData::operator==( inOther ) + && mSize == inOther.mSize + && mType == inOther.mType + && mFile == inOther.mFile + && mLine == inOther.mLine; + } + + void setup( MemoryEventHeader& inHeader ) const + { + inHeader.setTypeCompress( findCompressionValue( mType ) ); + inHeader.setFnameCompress( findCompressionValue( mFile ) ); + inHeader.setSizeCompress( findCompressionValue( mSize ) ); + inHeader.setLineCompress( findCompressionValue( mLine ) ); + MemoryEventData::setup( inHeader ); + } + + template<typename TStreamType> + void streamify( TStreamType& inStream, const MemoryEventHeader& inHeader ) + { + inStream.streamify( "Size", mSize, inHeader.getSizeCompress() ); + inStream.streamify( "Type", mType, inHeader.getTypeCompress() ); + inStream.streamify( "File", mFile, inHeader.getFnameCompress() ); + inStream.streamify( "Line", mLine, inHeader.getLineCompress() ); + MemoryEventData::streamify( inStream, inHeader ); + } + }; + template<> inline MemoryEventTypes::Enum getMemoryEventType<AllocationEvent>() { return MemoryEventTypes::AllocationEvent; } + + + struct FullAllocationEvent : public MemoryEventData + { + size_t mSize; + const char* mType; + const char* mFile; + uint32_t mLine; + void init( size_t size, const char* type, const char* file, uint32_t line, uint64_t addr ) + { + MemoryEventData::init( addr ); + mSize = size; + mType = type; + mFile = file; + mLine = line; + } + + void init( const FullAllocationEvent& inData ) + { + MemoryEventData::init( inData ); + mSize = inData.mSize; + mType = inData.mType; + mFile = inData.mFile; + mLine = inData.mLine; + } + + bool operator==( const FullAllocationEvent& inOther ) const + { + return MemoryEventData::operator==( inOther ) + && mSize == inOther.mSize + && safeStrEq( mType, inOther.mType ) + && safeStrEq( mFile, inOther.mFile ) + && mLine == inOther.mLine; + } + + void setup( MemoryEventHeader& ) const {} + }; + + template<> inline MemoryEventTypes::Enum getMemoryEventType<FullAllocationEvent>() { return MemoryEventTypes::FullAllocationEvent; } + + struct DeallocationEvent : public MemoryEventData + { + void init( uint64_t addr = 0 ) { MemoryEventData::init( addr ); } + void init( const DeallocationEvent& inData ) { MemoryEventData::init( inData ); } + }; + + template<> inline MemoryEventTypes::Enum getMemoryEventType<DeallocationEvent>() { return MemoryEventTypes::DeallocationEvent; } + + class MemoryEvent + { + public: + typedef PX_PROFILE_UNION_5(StringTableEvent, AllocationEvent, DeallocationEvent, FullAllocationEvent, uint8_t) EventData; + + private: + MemoryEventHeader mHeader; + EventData mData; + public: + + MemoryEvent() {} + MemoryEvent( MemoryEventHeader inHeader, const EventData& inData = EventData() ) + : mHeader( inHeader ) + , mData( inData ) + { + } + + template<typename TDataType> + MemoryEvent( const TDataType& inType ) + : mHeader( getMemoryEventType<TDataType>() ) + , mData( inType ) + { + //set the appropriate compression bits. + inType.setup( mHeader ); + } + const MemoryEventHeader& getHeader() const { return mHeader; } + const EventData& getData() const { return mData; } + + template<typename TDataType> + const TDataType& getValue() const { PX_ASSERT( mHeader.getType() == getMemoryEventType<TDataType>() ); return mData.toType<TDataType>(); } + + template<typename TDataType> + TDataType& getValue() { PX_ASSERT( mHeader.getType() == getMemoryEventType<TDataType>() ); return mData.toType<TDataType>(); } + + template<typename TRetVal, typename TOperator> + inline TRetVal visit( TOperator inOp ) const; + + bool operator==( const MemoryEvent& inOther ) const + { + if ( !(mHeader == inOther.mHeader ) ) return false; + if ( mHeader.getType() ) + return inOther.visit<bool>( EventDataEqualOperator<EventData>( mData ) ); + return true; + } + }; + + template<typename TRetVal, typename TOperator> + inline TRetVal visit( MemoryEventTypes::Enum inEventType, const MemoryEvent::EventData& inData, TOperator inOperator ) + { + switch( inEventType ) + { + case MemoryEventTypes::StringTableEvent: return inOperator( inData.toType( Type2Type<StringTableEvent>() ) ); + case MemoryEventTypes::AllocationEvent: return inOperator( inData.toType( Type2Type<AllocationEvent>() ) ); + case MemoryEventTypes::DeallocationEvent: return inOperator( inData.toType( Type2Type<DeallocationEvent>() ) ); + case MemoryEventTypes::FullAllocationEvent: return inOperator( inData.toType( Type2Type<FullAllocationEvent>() ) ); + case MemoryEventTypes::Unknown: return inOperator( static_cast<uint8_t>( inEventType ) ); + } + return TRetVal(); + } + + template<typename TRetVal, typename TOperator> + inline TRetVal MemoryEvent::visit( TOperator inOp ) const + { + return physx::profile::visit<TRetVal>( mHeader.getType(), mData, inOp ); + } +}} + +#endif // PXPVDSDK_PXPROFILEMEMORYEVENTS_H diff --git a/PxShared/src/pvd/src/PxProfileScopedEvent.h b/PxShared/src/pvd/src/PxProfileScopedEvent.h new file mode 100644 index 0000000..953fcf8 --- /dev/null +++ b/PxShared/src/pvd/src/PxProfileScopedEvent.h @@ -0,0 +1,150 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PXPVDSDK_PXPROFILESCOPEDEVENT_H +#define PXPVDSDK_PXPROFILESCOPEDEVENT_H + +#include "PxProfileBase.h" +#include "PxProfileEventId.h" +#include "PxProfileCompileTimeEventFilter.h" + +namespace physx { namespace profile { + +#define TO_PXPVDSDK_PXPROFILEEVENTID( subsystem, eventId ) PxProfileEventId( SubsystemIds::subsystem, EventIds::subsystem##eventId ); + + /** + \brief Template version of startEvent, called directly on provided profile buffer. + + \param inBuffer Profile event buffer. + \param inId Profile event id. + \param inContext Profile event context. + */ + template<bool TEnabled, typename TBufferType> + inline void startEvent( TBufferType* inBuffer, const PxProfileEventId& inId, uint64_t inContext ) + { + if ( TEnabled && inBuffer ) inBuffer->startEvent( inId, inContext ); + } + + /** + \brief Template version of stopEvent, called directly on provided profile buffer. + + \param inBuffer Profile event buffer. + \param inId Profile event id. + \param inContext Profile event context. + */ + template<bool TEnabled, typename TBufferType> + inline void stopEvent( TBufferType* inBuffer, const PxProfileEventId& inId, uint64_t inContext ) + { + if ( TEnabled && inBuffer ) inBuffer->stopEvent( inId, inContext ); + } + + /** + \brief Template version of startEvent, called directly on provided profile buffer. + + \param inEnabled If profile event is enabled. + \param inBuffer Profile event buffer. + \param inId Profile event id. + \param inContext Profile event context. + */ + template<typename TBufferType> + inline void startEvent( bool inEnabled, TBufferType* inBuffer, const PxProfileEventId& inId, uint64_t inContext ) + { + if ( inEnabled && inBuffer ) inBuffer->startEvent( inId, inContext ); + } + + /** + \brief Template version of stopEvent, called directly on provided profile buffer. + + \param inEnabled If profile event is enabled. + \param inBuffer Profile event buffer. + \param inId Profile event id. + \param inContext Profile event context. + */ + template<typename TBufferType> + inline void stopEvent( bool inEnabled, TBufferType* inBuffer, const PxProfileEventId& inId, uint64_t inContext ) + { + if ( inEnabled && inBuffer ) inBuffer->stopEvent( inId, inContext ); + } + + /** + \brief Template version of eventValue, called directly on provided profile buffer. + + \param inEnabled If profile event is enabled. + \param inBuffer Profile event buffer. + \param inId Profile event id. + \param inContext Profile event context. + \param inValue Event value. + */ + template<typename TBufferType> + inline void eventValue( bool inEnabled, TBufferType* inBuffer, const PxProfileEventId& inId, uint64_t inContext, int64_t inValue ) + { + if ( inEnabled && inBuffer ) inBuffer->eventValue( inId, inContext, inValue ); + } + + template<bool TEnabled, typename TBufferType, uint16_t eventId> + struct ScopedEventWithContext + { + uint64_t mContext; + TBufferType* mBuffer; + ScopedEventWithContext( TBufferType* inBuffer, uint64_t inContext) + : mContext ( inContext ) + , mBuffer( inBuffer ) + { + startEvent<true>( mBuffer, PxProfileEventId(eventId), mContext ); + } + ~ScopedEventWithContext() + { + stopEvent<true>( mBuffer, PxProfileEventId(eventId), mContext ); + } + }; + + template<typename TBufferType, uint16_t eventId> + struct ScopedEventWithContext<false,TBufferType,eventId> { ScopedEventWithContext( TBufferType*, uint64_t) {} }; + + template<typename TBufferType> + struct DynamicallyEnabledScopedEvent + { + TBufferType* mBuffer; + PxProfileEventId mId; + uint64_t mContext; + DynamicallyEnabledScopedEvent( TBufferType* inBuffer, const PxProfileEventId& inId, uint64_t inContext) + : mBuffer( inBuffer ) + , mId( inId ) + , mContext( inContext ) + { + if(mBuffer) + startEvent( mId.compileTimeEnabled, mBuffer, mId, mContext ); + } + ~DynamicallyEnabledScopedEvent() + { + if(mBuffer) + stopEvent( mId.compileTimeEnabled, mBuffer, mId, mContext ); + } + }; +}} + +#endif // PXPVDSDK_PXPROFILESCOPEDEVENT_H diff --git a/PxShared/src/pvd/src/PxProfileScopedMutexLock.h b/PxShared/src/pvd/src/PxProfileScopedMutexLock.h new file mode 100644 index 0000000..9d21cb8 --- /dev/null +++ b/PxShared/src/pvd/src/PxProfileScopedMutexLock.h @@ -0,0 +1,64 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef PXPVDSDK_PXPROFILESCOPEDMUTEXLOCK_H +#define PXPVDSDK_PXPROFILESCOPEDMUTEXLOCK_H + +#include "PxProfileBase.h" + +namespace physx { namespace profile { + + /** + * Generic class to wrap any mutex type that has lock and unlock methods + */ + template<typename TMutexType> + struct ScopedLockImpl + { + TMutexType* mMutex; + ScopedLockImpl( TMutexType* inM ) : mMutex( inM ) + { + if ( mMutex ) mMutex->lock(); + } + ~ScopedLockImpl() + { + if ( mMutex ) mMutex->unlock(); + } + }; + + /** + * Null locking system that does nothing. + */ + struct NullLock + { + template<typename TDataType> NullLock( TDataType*) {} + }; +}} + +#endif // PXPVDSDK_PXPROFILESCOPEDMUTEXLOCK_H diff --git a/PxShared/src/pvd/src/PxProfileZone.h b/PxShared/src/pvd/src/PxProfileZone.h new file mode 100644 index 0000000..1573c2f --- /dev/null +++ b/PxShared/src/pvd/src/PxProfileZone.h @@ -0,0 +1,142 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PXPVDSDK_PXPROFILEZONE_H +#define PXPVDSDK_PXPROFILEZONE_H + +#include "foundation/PxPreprocessor.h" + +#include "PxProfileEventBufferClientManager.h" +#include "PxProfileEventNames.h" +#include "PxProfileEventSender.h" + +namespace physx { + class PxAllocatorCallback; + + namespace profile { + + class PxProfileZoneManager; + + /** + \brief The profiling system was setup in the expectation that there would be several + systems that each had its own island of profile information. PhysX, client code, + and APEX would be the first examples of these. Each one of these islands is represented + by a profile zone. + + A profile zone combines a name, a place where all the events coming from its interface + can flushed, and a mapping from event number to full event name. + + It also provides a top level filtering service where profile events + can be filtered by event id. + + The profile zone implements a system where if there is no one + listening to events it doesn't provide a mechanism to send them. In this way + the event system is short circuited when there aren't any clients. + + All functions on this interface should be considered threadsafe. + + @see PxProfileZoneClientManager, PxProfileNameProvider, PxProfileEventSender, PxProfileEventFlusher + */ + class PxProfileZone : public PxProfileZoneClientManager + , public PxProfileNameProvider + , public PxProfileEventSender + , public PxProfileEventFlusher + { + protected: + virtual ~PxProfileZone(){} + public: + /** + \brief Get profile zone name. + \return Zone name. + */ + virtual const char* getName() = 0; + /** + \brief Release the profile zone. + */ + virtual void release() = 0; + + /** + \brief Set profile zone manager for the zone. + \param inMgr Profile zone manager. + */ + virtual void setProfileZoneManager(PxProfileZoneManager* inMgr) = 0; + /** + \brief Get profile zone manager for the zone. + \return Profile zone manager. + */ + virtual PxProfileZoneManager* getProfileZoneManager() = 0; + + /** + \brief Get or create a new event id for a given name. + If you pass in a previously defined event name (including one returned) + from the name provider) you will just get the same event id back. + \param inName Profile event name. + */ + virtual uint16_t getEventIdForName( const char* inName ) = 0; + + /** + \brief Specifies that it is a safe point to flush read-write name map into + read-only map. Make sure getEventIdForName is not called from a different thread. + */ + virtual void flushEventIdNameMap() = 0; + + /** + \brief Reserve a contiguous set of profile event ids for a set of names. + + This function does not do any meaningful error checking other than to ensure + that if it does generate new ids they are contiguous. If the first name is already + registered, that is the ID that will be returned regardless of what other + names are registered. Thus either use this function alone (without the above + function) or don't use it. + If you register "one","two","three" and the function returns an id of 4, then + "one" is mapped to 4, "two" is mapped to 5, and "three" is mapped to 6. + + \param inNames set of names to register. + \param inLen Length of the name list. + + \return The first id associated with the first name. The rest of the names + will be associated with monotonically incrementing uint16_t values from the first + id. + */ + virtual uint16_t getEventIdsForNames( const char** inNames, uint32_t inLen ) = 0; + + /** + \brief Create a new profile zone. + + \param inAllocator memory allocation is controlled through the foundation if one is passed in. + \param inSDKName Name of the profile zone; useful for clients to understand where events came from. + \param inNames Mapping from event id -> event name. + \param inEventBufferByteSize Size of the canonical event buffer. This does not need to be a large number + as profile events are fairly small individually. + \return a profile zone implementation. + */ + static PX_FOUNDATION_API PxProfileZone& createProfileZone(PxAllocatorCallback* inAllocator, const char* inSDKName, PxProfileNames inNames = PxProfileNames(), uint32_t inEventBufferByteSize = 0x10000 /*64k*/); + + }; +} } + +#endif // PXPVDSDK_PXPROFILEZONE_H diff --git a/PxShared/src/pvd/src/PxProfileZoneImpl.h b/PxShared/src/pvd/src/PxProfileZoneImpl.h new file mode 100644 index 0000000..981180f --- /dev/null +++ b/PxShared/src/pvd/src/PxProfileZoneImpl.h @@ -0,0 +1,318 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef PXPVDSDK_PXPROFILEZONEIMPL_H +#define PXPVDSDK_PXPROFILEZONEIMPL_H + +#include "PxProfileZone.h" +#include "PxProfileEventFilter.h" +#include "PxProfileZoneManager.h" +#include "PxProfileContextProviderImpl.h" +#include "PxProfileScopedMutexLock.h" +#include "PxProfileEventBufferAtomic.h" +#include "PsMutex.h" + +namespace physx { namespace profile { + + /** + \brief Simple event filter that enables all events. + */ + struct PxProfileNullEventFilter + { + void setEventEnabled( const PxProfileEventId&, bool) { PX_ASSERT(false); } + bool isEventEnabled( const PxProfileEventId&) const { return true; } + }; + + typedef shdfnd::MutexT<PxProfileWrapperReflectionAllocator<uint8_t> > TZoneMutexType; + typedef ScopedLockImpl<TZoneMutexType> TZoneLockType; + typedef EventBuffer< PxDefaultContextProvider, TZoneMutexType, TZoneLockType, PxProfileNullEventFilter > TZoneEventBufferType; + //typedef EventBufferAtomic< PxDefaultContextProvider, TZoneMutexType, TZoneLockType, PxProfileNullEventFilter > TZoneEventBufferType; + + template<typename TNameProvider> + class ZoneImpl : TZoneEventBufferType //private inheritance intended + , public PxProfileZone + , public PxProfileEventBufferClient + { + typedef shdfnd::MutexT<PxProfileWrapperReflectionAllocator<uint8_t> > TMutexType; + typedef PxProfileHashMap<const char*, uint32_t> TNameToEvtIndexMap; + //ensure we don't reuse event ids. + typedef PxProfileHashMap<uint16_t, const char*> TEvtIdToNameMap; + typedef TMutexType::ScopedLock TLockType; + + + const char* mName; + PxProfileAllocatorWrapper mWrapper; + mutable TMutexType mMutex; + PxProfileArray<PxProfileEventName> mEventNames; + // to avoid locking, read-only and read-write map exist + TNameToEvtIndexMap mNameToEvtIndexMapR; + TNameToEvtIndexMap mNameToEvtIndexMapRW; + //ensure we don't reuse event ids. + TEvtIdToNameMap mEvtIdToNameMap; + + PxProfileZoneManager* mProfileZoneManager; + + PxProfileArray<PxProfileZoneClient*> mClients; + volatile bool mEventsActive; + + PX_NOCOPY(ZoneImpl<TNameProvider>) + public: + ZoneImpl( PxAllocatorCallback* inAllocator, const char* inName, uint32_t bufferSize = 0x10000 /*64k*/, const TNameProvider& inProvider = TNameProvider() ) + : TZoneEventBufferType( inAllocator, bufferSize, PxDefaultContextProvider(), NULL, PxProfileNullEventFilter() ) + , mName( inName ) + , mWrapper( inAllocator ) + , mMutex( PxProfileWrapperReflectionAllocator<uint8_t>( mWrapper ) ) + , mEventNames( mWrapper ) + , mNameToEvtIndexMapR( mWrapper ) + , mNameToEvtIndexMapRW(mWrapper) + , mEvtIdToNameMap( mWrapper ) + , mProfileZoneManager( NULL ) + , mClients( mWrapper ) + , mEventsActive( false ) + { + TZoneEventBufferType::setBufferMutex( &mMutex ); + //Initialize the event name structure with existing names from the name provider. + PxProfileNames theNames( inProvider.getProfileNames() ); + for ( uint32_t idx = 0; idx < theNames.eventCount; ++idx ) + { + const PxProfileEventName& theName (theNames.events[idx]); + doAddName( theName.name, theName.eventId.eventId, theName.eventId.compileTimeEnabled ); + } + TZoneEventBufferType::addClient( *this ); + } + + virtual ~ZoneImpl() { + if ( mProfileZoneManager != NULL ) + mProfileZoneManager->removeProfileZone( *this ); + mProfileZoneManager = NULL; + TZoneEventBufferType::removeClient( *this ); + } + + void doAddName( const char* inName, uint16_t inEventId, bool inCompileTimeEnabled ) + { + TLockType theLocker( mMutex ); + mEvtIdToNameMap.insert( inEventId, inName ); + uint32_t idx = static_cast<uint32_t>( mEventNames.size() ); + mNameToEvtIndexMapRW.insert( inName, idx ); + mEventNames.pushBack( PxProfileEventName( inName, PxProfileEventId( inEventId, inCompileTimeEnabled ) ) ); + } + + virtual void flushEventIdNameMap() + { + // copy the RW map into R map + if (mNameToEvtIndexMapRW.size()) + { + for (TNameToEvtIndexMap::Iterator iter = mNameToEvtIndexMapRW.getIterator(); !iter.done(); ++iter) + { + mNameToEvtIndexMapR.insert(iter->first, iter->second); + } + mNameToEvtIndexMapRW.clear(); + } + } + + virtual uint16_t getEventIdForName( const char* inName ) + { + return getEventIdsForNames( &inName, 1 ); + } + + virtual uint16_t getEventIdsForNames( const char** inNames, uint32_t inLen ) + { + if ( inLen == 0 ) + return 0; + + // search the read-only map first + const TNameToEvtIndexMap::Entry* theEntry( mNameToEvtIndexMapR.find( inNames[0] ) ); + if ( theEntry ) + return mEventNames[theEntry->second].eventId; + + TLockType theLocker(mMutex); + + const TNameToEvtIndexMap::Entry* theReEntry(mNameToEvtIndexMapRW.find(inNames[0])); + if (theReEntry) + return mEventNames[theReEntry->second].eventId; + + //Else git R dun. + uint16_t nameSize = static_cast<uint16_t>( mEventNames.size() ); + //We don't allow 0 as an event id. + uint16_t eventId = nameSize; + //Find a contiguous set of unique event ids + bool foundAnEventId = false; + do + { + foundAnEventId = false; + ++eventId; + for ( uint16_t idx = 0; idx < inLen && foundAnEventId == false; ++idx ) + foundAnEventId = mEvtIdToNameMap.find( uint16_t(eventId + idx) ) != NULL; + } + while( foundAnEventId ); + + uint32_t clientCount = mClients.size(); + for ( uint16_t nameIdx = 0; nameIdx < inLen; ++nameIdx ) + { + uint16_t newId = uint16_t(eventId + nameIdx); + doAddName( inNames[nameIdx], newId, true ); + for( uint32_t clientIdx =0; clientIdx < clientCount; ++clientIdx ) + mClients[clientIdx]->handleEventAdded( PxProfileEventName( inNames[nameIdx], PxProfileEventId( newId ) ) ); + } + + return eventId; + } + + virtual void setProfileZoneManager(PxProfileZoneManager* inMgr) + { + mProfileZoneManager = inMgr; + } + + virtual PxProfileZoneManager* getProfileZoneManager() + { + return mProfileZoneManager; + } + + + + const char* getName() { return mName; } + + PxProfileEventBufferClient* getEventBufferClient() { return this; } + + //SDK implementation + + void addClient( PxProfileZoneClient& inClient ) + { + TLockType lock( mMutex ); + mClients.pushBack( &inClient ); + mEventsActive = true; + } + + void removeClient( PxProfileZoneClient& inClient ) + { + TLockType lock( mMutex ); + for ( uint32_t idx =0; idx < mClients.size(); ++idx ) + { + if ( mClients[idx] == &inClient ) + { + inClient.handleClientRemoved(); + mClients.replaceWithLast( idx ); + break; + } + } + mEventsActive = mClients.size() != 0; + } + + virtual bool hasClients() const + { + return mEventsActive; + } + + virtual PxProfileNames getProfileNames() const + { + TLockType theLocker( mMutex ); + const PxProfileEventName* theNames = mEventNames.begin(); + uint32_t theEventCount = uint32_t(mEventNames.size()); + return PxProfileNames( theEventCount, theNames ); + } + + virtual void release() + { + PX_PROFILE_DELETE( mWrapper.getAllocator(), this ); + } + + //Implementation chaining the buffer flush to our clients + virtual void handleBufferFlush( const uint8_t* inData, uint32_t inLength ) + { + TLockType theLocker( mMutex ); + + uint32_t clientCount = mClients.size(); + for( uint32_t idx =0; idx < clientCount; ++idx ) + mClients[idx]->handleBufferFlush( inData, inLength ); + } + //Happens if something removes all the clients from the manager. + virtual void handleClientRemoved() {} + + //Send a profile event, optionally with a context. Events are sorted by thread + //and context in the client side. + virtual void startEvent( uint16_t inId, uint64_t contextId) + { + if( mEventsActive ) + { + TZoneEventBufferType::startEvent( inId, contextId ); + } + } + virtual void stopEvent( uint16_t inId, uint64_t contextId) + { + if( mEventsActive ) + { + TZoneEventBufferType::stopEvent( inId, contextId ); + } + } + + virtual void startEvent( uint16_t inId, uint64_t contextId, uint32_t threadId) + { + if( mEventsActive ) + { + TZoneEventBufferType::startEvent( inId, contextId, threadId ); + } + } + virtual void stopEvent( uint16_t inId, uint64_t contextId, uint32_t threadId ) + { + if( mEventsActive ) + { + TZoneEventBufferType::stopEvent( inId, contextId, threadId ); + } + } + + virtual void atEvent(uint16_t inId, uint64_t contextId, uint32_t threadId, uint64_t start, uint64_t stop) + { + if (mEventsActive) + { + TZoneEventBufferType::startEvent(inId, threadId, contextId, 0, 0, start); + TZoneEventBufferType::stopEvent(inId, threadId, contextId, 0, 0, stop); + } + } + + /** + * Set an specific events value. This is different than the profiling value + * for the event; it is a value recorded and kept around without a timestamp associated + * with it. This value is displayed when the event itself is processed. + */ + virtual void eventValue( uint16_t inId, uint64_t contextId, int64_t inValue ) + { + if( mEventsActive ) + { + TZoneEventBufferType::eventValue( inId, contextId, inValue ); + } + } + virtual void flushProfileEvents() + { + TZoneEventBufferType::flushProfileEvents(); + } + }; + +}} +#endif // PXPVDSDK_PXPROFILEZONEIMPL_H diff --git a/PxShared/src/pvd/src/PxProfileZoneManager.h b/PxShared/src/pvd/src/PxProfileZoneManager.h new file mode 100644 index 0000000..9668460 --- /dev/null +++ b/PxShared/src/pvd/src/PxProfileZoneManager.h @@ -0,0 +1,155 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PXPVDSDK_PXPROFILEZONEMANAGER_H +#define PXPVDSDK_PXPROFILEZONEMANAGER_H + +#include "PxProfileEventSender.h" +#include "PxProfileEventNames.h" + +namespace physx { + + class PxAllocatorCallback; + + namespace profile { + + class PxProfileZone; + class PxProfileNameProvider; + + /** + \brief Profile zone handler for zone add/remove notification. + */ + class PxProfileZoneHandler + { + protected: + virtual ~PxProfileZoneHandler(){} + public: + /** + \brief On zone added notification + + \note Not a threadsafe call; handlers are expected to be able to handle + this from any thread. + + \param inSDK Added zone. + */ + virtual void onZoneAdded( PxProfileZone& inSDK ) = 0; + /** + \brief On zone removed notification + + \note Not a threadsafe call; handlers are expected to be able to handle + this from any thread. + + \param inSDK removed zone. + */ + virtual void onZoneRemoved( PxProfileZone& inSDK ) = 0; + }; + + /** + \brief The profiling system was setup in the expectation that there would be several + systems that each had its own island of profile information. PhysX, client code, + and APEX would be the first examples of these. Each one of these islands is represented + by a profile zone. + + The Manager is a singleton-like object where all these different systems can be registered + so that clients of the profiling system can have one point to capture *all* profiling events. + + Flushing the manager implies that you want to loop through all the profile zones and flush + each one. + + @see PxProfileEventFlusher + */ + class PxProfileZoneManager + : public PxProfileEventFlusher //Tell all SDK's to flush their queue of profile events. + { + protected: + virtual ~PxProfileZoneManager(){} + public: + /** + \brief Add new profile zone for the manager. + \note Threadsafe call, can be done from any thread. Handlers that are already connected + will get a new callback on the current thread. + + \param inSDK Profile zone to add. + */ + virtual void addProfileZone( PxProfileZone& inSDK ) = 0; + /** + \brief Removes profile zone from the manager. + \note Threadsafe call, can be done from any thread. Handlers that are already connected + will get a new callback on the current thread. + + \param inSDK Profile zone to remove. + */ + virtual void removeProfileZone( PxProfileZone& inSDK ) = 0; + + /** + \brief Add profile zone handler callback for the profile zone notifications. + + \note Threadsafe call. The new handler will immediately be notified about all + known SDKs. + + \param inHandler Profile zone handler to add. + */ + virtual void addProfileZoneHandler( PxProfileZoneHandler& inHandler ) = 0; + /** + \brief Removes profile zone handler callback for the profile zone notifications. + + \note Threadsafe call. The new handler will immediately be notified about all + known SDKs. + + \param inHandler Profile zone handler to remove. + */ + virtual void removeProfileZoneHandler( PxProfileZoneHandler& inHandler ) = 0; + + + /** + \brief Create a new profile zone. This means you don't need access to a PxFoundation to + create your profile zone object, and your object is automatically registered with + the profile zone manager. + + You still need to release your object when you are finished with it. + \param inSDKName Name of the SDK object. + \param inNames Option set of event id to name mappings. + \param inEventBufferByteSize rough maximum size of the event buffer. May exceed this size + by sizeof one event. When full an immediate call to all listeners is made. + */ + virtual PxProfileZone& createProfileZone( const char* inSDKName, PxProfileNames inNames = PxProfileNames(), uint32_t inEventBufferByteSize = 0x4000 /*16k*/ ) = 0; + + /** + \brief Releases the profile manager instance. + */ + virtual void release() = 0; + + /** + \brief Create the profile zone manager. + \param inAllocatorCallback Allocator callback. + */ + static PxProfileZoneManager& createProfileZoneManager(PxAllocatorCallback* inAllocatorCallback ); + }; + +} } + +#endif // PXPVDSDK_PXPROFILEZONEMANAGER_H diff --git a/PxShared/src/pvd/src/PxProfileZoneManagerImpl.h b/PxShared/src/pvd/src/PxProfileZoneManagerImpl.h new file mode 100644 index 0000000..6542917 --- /dev/null +++ b/PxShared/src/pvd/src/PxProfileZoneManagerImpl.h @@ -0,0 +1,174 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef PXPVDSDK_PXPROFILEZONEMANAGERIMPL_H +#define PXPVDSDK_PXPROFILEZONEMANAGERIMPL_H + +#include "PxProfileZoneManager.h" +#include "PxProfileBase.h" +#include "PxProfileScopedMutexLock.h" +#include "PxProfileZone.h" +#include "PxProfileAllocatorWrapper.h" + +#include "PsArray.h" +#include "PsMutex.h" + +namespace physx { namespace profile { + + struct NullEventNameProvider : public PxProfileNameProvider + { + virtual PxProfileNames getProfileNames() const { return PxProfileNames( 0, 0 ); } + }; + + class ZoneManagerImpl : public PxProfileZoneManager + { + typedef ScopedLockImpl<shdfnd::Mutex> TScopedLockType; + PxProfileAllocatorWrapper mWrapper; + PxProfileArray<PxProfileZone*> mZones; + PxProfileArray<PxProfileZoneHandler*> mHandlers; + shdfnd::Mutex mMutex; + + ZoneManagerImpl( const ZoneManagerImpl& inOther ); + ZoneManagerImpl& operator=( const ZoneManagerImpl& inOther ); + + public: + + ZoneManagerImpl(PxAllocatorCallback* inFoundation) + : mWrapper( inFoundation ) + , mZones( mWrapper ) + , mHandlers( mWrapper ) + {} + + virtual ~ZoneManagerImpl() + { + //This assert would mean that a profile zone is outliving us. + //This will cause a crash when the profile zone is released. + PX_ASSERT( mZones.size() == 0 ); + while( mZones.size() ) + removeProfileZone( *mZones.back() ); + } + + virtual void addProfileZone( PxProfileZone& inSDK ) + { + TScopedLockType lock( &mMutex ); + + if ( inSDK.getProfileZoneManager() != NULL ) + { + if ( inSDK.getProfileZoneManager() == this ) + return; + else //there must be two managers in the system somehow. + { + PX_ASSERT( false ); + inSDK.getProfileZoneManager()->removeProfileZone( inSDK ); + } + } + mZones.pushBack( &inSDK ); + inSDK.setProfileZoneManager( this ); + for ( uint32_t idx =0; idx < mHandlers.size(); ++idx ) + mHandlers[idx]->onZoneAdded( inSDK ); + } + + virtual void removeProfileZone( PxProfileZone& inSDK ) + { + TScopedLockType lock( &mMutex ); + if ( inSDK.getProfileZoneManager() == NULL ) + return; + + else if ( inSDK.getProfileZoneManager() != this ) + { + PX_ASSERT( false ); + inSDK.getProfileZoneManager()->removeProfileZone( inSDK ); + return; + } + + inSDK.setProfileZoneManager( NULL ); + for ( uint32_t idx = 0; idx < mZones.size(); ++idx ) + { + if ( mZones[idx] == &inSDK ) + { + for ( uint32_t handler =0; handler < mHandlers.size(); ++handler ) + mHandlers[handler]->onZoneRemoved( inSDK ); + mZones.replaceWithLast( idx ); + } + } + } + + virtual void flushProfileEvents() + { + uint32_t sdkCount = mZones.size(); + for ( uint32_t idx = 0; idx < sdkCount; ++idx ) + mZones[idx]->flushProfileEvents(); + } + + virtual void addProfileZoneHandler( PxProfileZoneHandler& inHandler ) + { + TScopedLockType lock( &mMutex ); + mHandlers.pushBack( &inHandler ); + for ( uint32_t idx = 0; idx < mZones.size(); ++idx ) + inHandler.onZoneAdded( *mZones[idx] ); + } + + virtual void removeProfileZoneHandler( PxProfileZoneHandler& inHandler ) + { + TScopedLockType lock( &mMutex ); + for( uint32_t idx = 0; idx < mZones.size(); ++idx ) + inHandler.onZoneRemoved( *mZones[idx] ); + for( uint32_t idx = 0; idx < mHandlers.size(); ++idx ) + { + if ( mHandlers[idx] == &inHandler ) + mHandlers.replaceWithLast( idx ); + } + } + + virtual PxProfileZone& createProfileZone( const char* inSDKName, PxProfileNameProvider* inProvider, uint32_t inEventBufferByteSize ) + { + NullEventNameProvider nullProvider; + if ( inProvider == NULL ) + inProvider = &nullProvider; + return createProfileZone( inSDKName, inProvider->getProfileNames(), inEventBufferByteSize ); + } + + + virtual PxProfileZone& createProfileZone( const char* inSDKName, PxProfileNames inNames, uint32_t inEventBufferByteSize ) + { + PxProfileZone& retval( PxProfileZone::createProfileZone( &mWrapper.getAllocator(), inSDKName, inNames, inEventBufferByteSize ) ); + addProfileZone( retval ); + return retval; + } + + virtual void release() + { + PX_PROFILE_DELETE( mWrapper.getAllocator(), this ); + } + }; +} } + + +#endif // PXPVDSDK_PXPROFILEZONEMANAGERIMPL_H diff --git a/PxShared/src/pvd/src/PxPvd.cpp b/PxShared/src/pvd/src/PxPvd.cpp new file mode 100644 index 0000000..4e1eb09 --- /dev/null +++ b/PxShared/src/pvd/src/PxPvd.cpp @@ -0,0 +1,56 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "pvd/PxPvd.h" + +#include "PxPvdImpl.h" + +namespace physx +{ +namespace pvdsdk +{ + +ForwardingAllocator gForwardingAllocator; +PxAllocatorCallback* gPvdAllocatorCallback = &gForwardingAllocator; + +void SetPvdAllocatorCallback(PxAllocatorCallback* inAllocatorCallback) +{ + gPvdAllocatorCallback = inAllocatorCallback; +} + +} // namespace pvdsdk + +PxPvd* PxCreatePvd(PxFoundation& foundation) +{ + pvdsdk::gPvdAllocatorCallback = &foundation.getAllocatorCallback(); + pvdsdk::PvdImpl::initialize(); + return pvdsdk::PvdImpl::getInstance(); +} + +} // namespace physx diff --git a/PxShared/src/pvd/src/PxPvdBits.h b/PxShared/src/pvd/src/PxPvdBits.h new file mode 100644 index 0000000..b763065 --- /dev/null +++ b/PxShared/src/pvd/src/PxPvdBits.h @@ -0,0 +1,173 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PXPVDSDK_PXPVDBITS_H +#define PXPVDSDK_PXPVDBITS_H + +#include "PxPvdObjectModelBaseTypes.h" + +namespace physx +{ +namespace pvdsdk +{ + +// Marshallers cannot assume src is aligned, but they can assume dest is aligned. +typedef void (*TSingleMarshaller)(const uint8_t* src, uint8_t* dest); +typedef void (*TBlockMarshaller)(const uint8_t* src, uint8_t* dest, uint32_t numItems); + +template <uint8_t ByteCount> +static inline void doSwapBytes(uint8_t* __restrict inData) +{ + for(uint32_t idx = 0; idx < ByteCount / 2; ++idx) + { + uint32_t endIdx = ByteCount - idx - 1; + uint8_t theTemp = inData[idx]; + inData[idx] = inData[endIdx]; + inData[endIdx] = theTemp; + } +} + +template <uint8_t ByteCount> +static inline void doSwapBytes(uint8_t* __restrict inData, uint32_t itemCount) +{ + uint8_t* end = inData + itemCount * ByteCount; + for(; inData < end; inData += ByteCount) + doSwapBytes<ByteCount>(inData); +} + +static inline void swapBytes(uint8_t* __restrict dataPtr, uint32_t numBytes, uint32_t itemWidth) +{ + uint32_t numItems = numBytes / itemWidth; + switch(itemWidth) + { + case 1: + break; + case 2: + doSwapBytes<2>(dataPtr, numItems); + break; + case 4: + doSwapBytes<4>(dataPtr, numItems); + break; + case 8: + doSwapBytes<8>(dataPtr, numItems); + break; + case 16: + doSwapBytes<16>(dataPtr, numItems); + break; + default: + PX_ASSERT(false); + break; + } +} + +template <uint8_t TByteCount, bool TShouldSwap> +struct PvdByteSwapper +{ + void swapBytes(uint8_t* __restrict inData) + { + doSwapBytes<TByteCount>(inData); + } + void swapBytes(uint8_t* __restrict inData, uint32_t itemCount) + { + doSwapBytes<TByteCount>(inData, itemCount); + } + void swapBytes(uint8_t* __restrict dataPtr, uint32_t numBytes, uint32_t itemWidth) + { + physx::pvdsdk::swapBytes(dataPtr, numBytes, itemWidth); + } +}; + +struct PvdNullSwapper +{ + + void swapBytes(uint8_t* __restrict) + { + } + void swapBytes(uint8_t* __restrict, uint32_t) + { + } + void swapBytes(uint8_t* __restrict, uint32_t, uint32_t) + { + } +}; +// Anything that doesn't need swapping gets the null swapper +template <uint8_t TByteCount> +struct PvdByteSwapper<TByteCount, false> : public PvdNullSwapper +{ +}; +// A 1 byte byte swapper can't really do anything. +template <> +struct PvdByteSwapper<1, true> : public PvdNullSwapper +{ +}; + +static inline void swapBytes(uint8_t&) +{ +} +static inline void swapBytes(int8_t&) +{ +} +static inline void swapBytes(uint16_t& inData) +{ + doSwapBytes<2>(reinterpret_cast<uint8_t*>(&inData)); +} +static inline void swapBytes(int16_t& inData) +{ + doSwapBytes<2>(reinterpret_cast<uint8_t*>(&inData)); +} +static inline void swapBytes(uint32_t& inData) +{ + doSwapBytes<4>(reinterpret_cast<uint8_t*>(&inData)); +} +static inline void swapBytes(int32_t& inData) +{ + doSwapBytes<4>(reinterpret_cast<uint8_t*>(&inData)); +} +static inline void swapBytes(float& inData) +{ + doSwapBytes<4>(reinterpret_cast<uint8_t*>(&inData)); +} +static inline void swapBytes(uint64_t& inData) +{ + doSwapBytes<8>(reinterpret_cast<uint8_t*>(&inData)); +} +static inline void swapBytes(int64_t& inData) +{ + doSwapBytes<8>(reinterpret_cast<uint8_t*>(&inData)); +} +static inline void swapBytes(double& inData) +{ + doSwapBytes<8>(reinterpret_cast<uint8_t*>(&inData)); +} + +static inline bool checkLength(const uint8_t* inStart, const uint8_t* inStop, uint32_t inLength) +{ + return static_cast<uint32_t>(inStop - inStart) >= inLength; +} +} +} +#endif // PXPVDSDK_PXPVDBITS_H diff --git a/PxShared/src/pvd/src/PxPvdByteStreams.h b/PxShared/src/pvd/src/PxPvdByteStreams.h new file mode 100644 index 0000000..fff3c4f --- /dev/null +++ b/PxShared/src/pvd/src/PxPvdByteStreams.h @@ -0,0 +1,155 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PXPVDSDK_PXPVDBYTESTREAMS_H +#define PXPVDSDK_PXPVDBYTESTREAMS_H +#include "PxPvdObjectModelBaseTypes.h" + +namespace physx +{ +namespace pvdsdk +{ + +static inline uint32_t strLen(const char* inStr) +{ + uint32_t len = 0; + if(inStr) + { + while(*inStr) + { + ++len; + ++inStr; + } + } + return len; +} + +class PvdInputStream +{ + protected: + virtual ~PvdInputStream() + { + } + + public: + // Return false if you can't write the number of bytes requested + // But make an absolute best effort to read the data... + virtual bool read(uint8_t* buffer, uint32_t& len) = 0; + + template <typename TDataType> + bool read(TDataType* buffer, uint32_t numItems) + { + uint32_t expected = numItems; + uint32_t amountToRead = numItems * sizeof(TDataType); + read(reinterpret_cast<uint8_t*>(buffer), amountToRead); + numItems = amountToRead / sizeof(TDataType); + PX_ASSERT(numItems == expected); + return expected == numItems; + } + + template <typename TDataType> + PvdInputStream& operator>>(TDataType& data) + { + uint32_t dataSize = static_cast<uint32_t>(sizeof(TDataType)); + bool success = read(reinterpret_cast<uint8_t*>(&data), dataSize); + // PX_ASSERT( success ); + // PX_ASSERT( dataSize == sizeof( data ) ); + (void)success; + return *this; + } +}; + +struct ByteSwappingPvdInputStream +{ + protected: + ByteSwappingPvdInputStream& operator=(ByteSwappingPvdInputStream& other); + + public: + PvdInputStream& mStream; + ByteSwappingPvdInputStream(PvdInputStream& stream) : mStream(stream) + { + } + + template <typename TDataType> + bool read(TDataType* buffer, uint32_t& numItems) + { + bool retval = mStream.read(buffer, numItems); + for(uint32_t idx = 0; idx < numItems; ++idx) + swapBytes(buffer[idx]); + return retval; + } + + template <typename TDataType> + ByteSwappingPvdInputStream& operator>>(TDataType& data) + { + mStream >> data; + swapBytes(data); + return *this; + } +}; + +class PvdOutputStream +{ + protected: + virtual ~PvdOutputStream() + { + } + + public: + // Return false if you can't write the number of bytes requested + // But make an absolute best effort to write the data... + virtual bool write(const uint8_t* buffer, uint32_t len) = 0; + virtual bool directCopy(PvdInputStream& inStream, uint32_t len) = 0; + + template <typename TDataType> + bool write(const TDataType* buffer, uint32_t numItems) + { + return write(reinterpret_cast<const uint8_t*>(buffer), numItems * sizeof(TDataType)); + } + + template <typename TDataType> + PvdOutputStream& operator<<(const TDataType& data) + { + bool success = write(reinterpret_cast<const uint8_t*>(&data), sizeof(data)); + PX_ASSERT(success); + (void)success; + return *this; + } + + PvdOutputStream& operator<<(const char* inString) + { + if(inString && *inString) + { + uint32_t len(strLen(inString)); + write(inString, len); + } + return *this; + } +}; +} +} +#endif // PXPVDSDK_PXPVDBYTESTREAMS_H diff --git a/PxShared/src/pvd/src/PxPvdCommStreamEventSink.h b/PxShared/src/pvd/src/PxPvdCommStreamEventSink.h new file mode 100644 index 0000000..57e8635 --- /dev/null +++ b/PxShared/src/pvd/src/PxPvdCommStreamEventSink.h @@ -0,0 +1,55 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PXPVDSDK_PXPVDCOMMSTREAMEVENTSINK_H +#define PXPVDSDK_PXPVDCOMMSTREAMEVENTSINK_H + +#include "PxPvdObjectModelBaseTypes.h" +#include "PxPvdCommStreamEvents.h" +#include "PxPvdCommStreamTypes.h" + +namespace physx +{ +namespace pvdsdk +{ + +class PvdCommStreamEventSink +{ + public: + template <typename TStreamType> + static void writeStreamEvent(const EventSerializeable& evt, PvdCommStreamEventTypes::Enum evtType, TStreamType& stream) + { + EventStreamifier<TStreamType> streamifier_concrete(stream); + PvdEventSerializer& streamifier(streamifier_concrete); + streamifier.streamify(evtType); + const_cast<EventSerializeable&>(evt).serialize(streamifier); + } +}; + +} // pvd +} // physx +#endif // PXPVDSDK_PXPVDCOMMSTREAMEVENTSINK_H diff --git a/PxShared/src/pvd/src/PxPvdCommStreamEvents.h b/PxShared/src/pvd/src/PxPvdCommStreamEvents.h new file mode 100644 index 0000000..81770da --- /dev/null +++ b/PxShared/src/pvd/src/PxPvdCommStreamEvents.h @@ -0,0 +1,987 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PXPVDSDK_PXPVDCOMMSTREAMEVENTS_H +#define PXPVDSDK_PXPVDCOMMSTREAMEVENTS_H + +#include "foundation/PxVec3.h" +#include "foundation/PxFlags.h" + +#include "PxPvdObjectModelBaseTypes.h" +#include "PsTime.h" + +namespace physx +{ +namespace pvdsdk +{ + +struct CommStreamFlagTypes +{ + enum Enum + { + Is64BitPtr = 1 + }; +}; + +typedef PxFlags<CommStreamFlagTypes::Enum, uint32_t> CommStreamFlags; + +template <typename TDataType> +struct PvdCommVariableSizedEventCheck +{ + bool variable_size_check; +}; + +// Pick out the events that are possibly very large. +// This helps us keep our buffers close to the size the user requested. +#define DECLARE_TYPE_VARIABLE_SIZED(type) \ + template <> \ + struct PvdCommVariableSizedEventCheck<type> \ + { \ + uint32_t variable_size_check; \ + }; + +struct NameHandleValue; +struct StreamPropMessageArg; +struct StringHandleEvent; +struct CreateClass; +struct DeriveClass; +struct CreateProperty; +struct CreatePropertyMessage; +struct CreateInstance; +struct SetPropertyValue; +struct BeginSetPropertyValue; +struct AppendPropertyValueData; +struct EndSetPropertyValue; +struct SetPropertyMessage; +struct BeginPropertyMessageGroup; +struct SendPropertyMessageFromGroup; +struct EndPropertyMessageGroup; +struct CreateDestroyInstanceProperty; +struct PushBackObjectRef; +struct RemoveObjectRef; +struct BeginSection; +struct EndSection; +struct SetPickable; +struct SetColor; +struct SetIsTopLevel; +struct SetCamera; +struct AddProfileZone; +struct AddProfileZoneEvent; +struct StreamEndEvent; +struct ErrorMessage; +struct OriginShift; +struct DestroyInstance; + +#define DECLARE_COMM_STREAM_EVENTS \ + \ +DECLARE_PVD_COMM_STREAM_EVENT(StringHandleEvent) \ +DECLARE_PVD_COMM_STREAM_EVENT(CreateClass) \ +DECLARE_PVD_COMM_STREAM_EVENT(DeriveClass) \ +DECLARE_PVD_COMM_STREAM_EVENT(CreateProperty) \ +DECLARE_PVD_COMM_STREAM_EVENT(CreatePropertyMessage) \ +DECLARE_PVD_COMM_STREAM_EVENT(CreateInstance) \ +DECLARE_PVD_COMM_STREAM_EVENT(SetPropertyValue) \ +DECLARE_PVD_COMM_STREAM_EVENT(BeginSetPropertyValue) \ +DECLARE_PVD_COMM_STREAM_EVENT(AppendPropertyValueData) \ +DECLARE_PVD_COMM_STREAM_EVENT(EndSetPropertyValue) \ +DECLARE_PVD_COMM_STREAM_EVENT(SetPropertyMessage) \ +DECLARE_PVD_COMM_STREAM_EVENT(BeginPropertyMessageGroup) \ +DECLARE_PVD_COMM_STREAM_EVENT(SendPropertyMessageFromGroup) \ +DECLARE_PVD_COMM_STREAM_EVENT(EndPropertyMessageGroup) \ +DECLARE_PVD_COMM_STREAM_EVENT(DestroyInstance) \ +DECLARE_PVD_COMM_STREAM_EVENT(PushBackObjectRef) \ +DECLARE_PVD_COMM_STREAM_EVENT(RemoveObjectRef) \ +DECLARE_PVD_COMM_STREAM_EVENT(BeginSection) \ +DECLARE_PVD_COMM_STREAM_EVENT(EndSection) \ +DECLARE_PVD_COMM_STREAM_EVENT(SetPickable) \ +DECLARE_PVD_COMM_STREAM_EVENT(SetColor) \ +DECLARE_PVD_COMM_STREAM_EVENT(SetIsTopLevel) \ +DECLARE_PVD_COMM_STREAM_EVENT(SetCamera) \ +DECLARE_PVD_COMM_STREAM_EVENT(AddProfileZone) \ +DECLARE_PVD_COMM_STREAM_EVENT(AddProfileZoneEvent) \ +DECLARE_PVD_COMM_STREAM_EVENT(StreamEndEvent) \ +DECLARE_PVD_COMM_STREAM_EVENT(ErrorMessage) \ +DECLARE_PVD_COMM_STREAM_EVENT_NO_COMMA(OriginShift) + +struct PvdCommStreamEventTypes +{ + enum Enum + { + Unknown = 0, +#define DECLARE_PVD_COMM_STREAM_EVENT(x) x, +#define DECLARE_PVD_COMM_STREAM_EVENT_NO_COMMA(x) x + DECLARE_COMM_STREAM_EVENTS +#undef DECLARE_PVD_COMM_STREAM_EVENT_NO_COMMA +#undef DECLARE_PVD_COMM_STREAM_EVENT + , Last + }; +}; + +template <typename TDataType> +struct DatatypeToCommEventType +{ + bool compile_error; +}; +template <PvdCommStreamEventTypes::Enum TEnumType> +struct CommEventTypeToDatatype +{ + bool compile_error; +}; + +#define DECLARE_PVD_COMM_STREAM_EVENT(x) \ + template <> \ + struct DatatypeToCommEventType<x> \ + { \ + enum Enum \ + { \ + EEventTypeMap = PvdCommStreamEventTypes::x \ + }; \ + }; \ + template <> \ + struct CommEventTypeToDatatype<PvdCommStreamEventTypes::x> \ + { \ + typedef x TEventType; \ + }; +#define DECLARE_PVD_COMM_STREAM_EVENT_NO_COMMA(x) \ + \ +template<> struct DatatypeToCommEventType<x> \ + { \ + enum Enum \ + { \ + EEventTypeMap = PvdCommStreamEventTypes::x \ + }; \ + }; \ + \ +template<> struct CommEventTypeToDatatype<PvdCommStreamEventTypes::x> \ + { \ + typedef x TEventType; \ + }; + +DECLARE_COMM_STREAM_EVENTS +#undef DECLARE_PVD_COMM_STREAM_EVENT_NO_COMMA +#undef DECLARE_PVD_COMM_STREAM_EVENT + +template <typename TDataType> +PvdCommStreamEventTypes::Enum getCommStreamEventType() +{ + return static_cast<PvdCommStreamEventTypes::Enum>(DatatypeToCommEventType<TDataType>::EEventTypeMap); +} + +struct StreamNamespacedName +{ + StringHandle mNamespace; // StringHandle handles + StringHandle mName; + StreamNamespacedName(StringHandle ns = 0, StringHandle nm = 0) : mNamespace(ns), mName(nm) + { + } +}; + +class EventSerializeable; + +class PvdEventSerializer +{ + protected: + virtual ~PvdEventSerializer() + { + } + + public: + virtual void streamify(uint8_t& val) = 0; + virtual void streamify(uint16_t& val) = 0; + virtual void streamify(uint32_t& val) = 0; + virtual void streamify(float& val) = 0; + virtual void streamify(uint64_t& val) = 0; + virtual void streamify(String& val) = 0; + virtual void streamify(DataRef<const uint8_t>& data) = 0; + virtual void streamify(DataRef<NameHandleValue>& data) = 0; + virtual void streamify(DataRef<StreamPropMessageArg>& data) = 0; + virtual void streamify(DataRef<StringHandle>& data) = 0; + + void streamify(StringHandle& hdl) + { + streamify(hdl.mHandle); + } + void streamify(CommStreamFlags& flags) + { + uint32_t val(flags); + streamify(val); + flags = CommStreamFlags(val); + } + + void streamify(PvdCommStreamEventTypes::Enum& val) + { + uint8_t detyped = static_cast<uint8_t>(val); + streamify(detyped); + val = static_cast<PvdCommStreamEventTypes::Enum>(detyped); + } + void streamify(PropertyType::Enum& val) + { + uint8_t detyped = static_cast<uint8_t>(val); + streamify(detyped); + val = static_cast<PropertyType::Enum>(detyped); + } + + void streamify(bool& val) + { + uint8_t detyped = uint8_t(val ? 1 : 0); + streamify(detyped); + val = detyped ? true : false; + } + + void streamify(StreamNamespacedName& name) + { + streamify(name.mNamespace); + streamify(name.mName); + } + + void streamify(PvdColor& color) + { + streamify(color.r); + streamify(color.g); + streamify(color.b); + streamify(color.a); + } + + void streamify(PxVec3& vec) + { + streamify(vec.x); + streamify(vec.y); + streamify(vec.z); + } + + static uint32_t measure(const EventSerializeable& evt); +}; + +class EventSerializeable +{ + protected: + virtual ~EventSerializeable() + { + } + + public: + virtual void serialize(PvdEventSerializer& serializer) = 0; +}; + +/** Numbers generated from random.org +129919156 17973702 401496246 144984007 336950759 +907025328 837150850 679717896 601529147 269478202 +*/ +struct StreamInitialization : public EventSerializeable +{ + static uint32_t getStreamId() + { + return 837150850; + } + static uint32_t getStreamVersion() + { + return 1; + } + + uint32_t mStreamId; + uint32_t mStreamVersion; + uint64_t mTimestampNumerator; + uint64_t mTimestampDenominator; + CommStreamFlags mStreamFlags; + StreamInitialization() + : mStreamId(getStreamId()) + , mStreamVersion(getStreamVersion()) + , mTimestampNumerator(physx::shdfnd::Time::getCounterFrequency().mNumerator * 10) + , mTimestampDenominator(physx::shdfnd::Time::getCounterFrequency().mDenominator) + , mStreamFlags(sizeof(void*) == 4 ? 0 : 1) + { + } + + void serialize(PvdEventSerializer& s) + { + s.streamify(mStreamId); + s.streamify(mStreamVersion); + s.streamify(mTimestampNumerator); + s.streamify(mTimestampDenominator); + s.streamify(mStreamFlags); + } +}; + +struct EventGroup : public EventSerializeable +{ + uint32_t mDataSize; // in bytes, data directly follows this header + uint32_t mNumEvents; + uint64_t mStreamId; + uint64_t mTimestamp; + + EventGroup(uint32_t dataSize = 0, uint32_t numEvents = 0, uint64_t streamId = 0, uint64_t ts = 0) + : mDataSize(dataSize), mNumEvents(numEvents), mStreamId(streamId), mTimestamp(ts) + { + } + + void serialize(PvdEventSerializer& s) + { + s.streamify(mDataSize); + s.streamify(mNumEvents); + s.streamify(mStreamId); + s.streamify(mTimestamp); + } +}; + +struct StringHandleEvent : public EventSerializeable +{ + String mString; + uint32_t mHandle; + StringHandleEvent(String str, uint32_t hdl) : mString(str), mHandle(hdl) + { + } + StringHandleEvent() + { + } + + void serialize(PvdEventSerializer& s) + { + s.streamify(mString); + s.streamify(mHandle); + } +}; + +DECLARE_TYPE_VARIABLE_SIZED(StringHandleEvent) + +typedef uint64_t Timestamp; + +struct CreateClass : public EventSerializeable +{ + StreamNamespacedName mName; + CreateClass(StreamNamespacedName nm) : mName(nm) + { + } + CreateClass() + { + } + + void serialize(PvdEventSerializer& s) + { + s.streamify(mName); + } +}; + +struct DeriveClass : public EventSerializeable +{ + StreamNamespacedName mParent; + StreamNamespacedName mChild; + + DeriveClass(StreamNamespacedName p, StreamNamespacedName c) : mParent(p), mChild(c) + { + } + DeriveClass() + { + } + + void serialize(PvdEventSerializer& s) + { + s.streamify(mParent); + s.streamify(mChild); + } +}; + +struct NameHandleValue : public EventSerializeable +{ + StringHandle mName; + uint32_t mValue; + NameHandleValue(StringHandle name, uint32_t val) : mName(name), mValue(val) + { + } + NameHandleValue() + { + } + + void serialize(PvdEventSerializer& s) + { + s.streamify(mName); + s.streamify(mValue); + } +}; +/*virtual PvdError createProperty( StreamNamespacedName clsName, StringHandle name, StringHandle semantic + , StreamNamespacedName dtypeName, PropertyType::Enum propertyType + , DataRef<NamedValue> values = DataRef<NamedValue>() ) = 0; */ +struct CreateProperty : public EventSerializeable +{ + StreamNamespacedName mClass; + StringHandle mName; + StringHandle mSemantic; + StreamNamespacedName mDatatypeName; + PropertyType::Enum mPropertyType; + DataRef<NameHandleValue> mValues; + + CreateProperty(StreamNamespacedName cls, StringHandle name, StringHandle semantic, StreamNamespacedName dtypeName, + PropertyType::Enum ptype, DataRef<NameHandleValue> values) + : mClass(cls), mName(name), mSemantic(semantic), mDatatypeName(dtypeName), mPropertyType(ptype), mValues(values) + { + } + CreateProperty() + { + } + + void serialize(PvdEventSerializer& s) + { + s.streamify(mClass); + s.streamify(mName); + s.streamify(mSemantic); + s.streamify(mDatatypeName); + s.streamify(mPropertyType); + s.streamify(mValues); + } +}; + +struct StreamPropMessageArg : public EventSerializeable +{ + StringHandle mPropertyName; + StreamNamespacedName mDatatypeName; + uint32_t mMessageOffset; + uint32_t mByteSize; + StreamPropMessageArg(StringHandle pname, StreamNamespacedName dtypeName, uint32_t offset, uint32_t byteSize) + : mPropertyName(pname), mDatatypeName(dtypeName), mMessageOffset(offset), mByteSize(byteSize) + { + } + + StreamPropMessageArg() + { + } + + void serialize(PvdEventSerializer& s) + { + s.streamify(mPropertyName); + s.streamify(mDatatypeName); + s.streamify(mMessageOffset); + s.streamify(mByteSize); + } +}; + +/* + virtual PvdError createPropertyMessage( StreamNamespacedName cls, StreamNamespacedName msgName + , DataRef<PropertyMessageArg> entries, uint32_t messageSizeInBytes ) = + 0;*/ +struct CreatePropertyMessage : public EventSerializeable +{ + StreamNamespacedName mClass; + StreamNamespacedName mMessageName; + DataRef<StreamPropMessageArg> mMessageEntries; + uint32_t mMessageByteSize; + + CreatePropertyMessage(StreamNamespacedName cls, StreamNamespacedName msgName, DataRef<StreamPropMessageArg> propArg, + uint32_t messageByteSize) + : mClass(cls), mMessageName(msgName), mMessageEntries(propArg), mMessageByteSize(messageByteSize) + { + } + CreatePropertyMessage() + { + } + + void serialize(PvdEventSerializer& s) + { + s.streamify(mClass); + s.streamify(mMessageName); + s.streamify(mMessageEntries); + s.streamify(mMessageByteSize); + } +}; + +/**Changing immediate data on instances*/ + +// virtual PvdError createInstance( StreamNamespacedName cls, uint64_t instance ) = 0; +struct CreateInstance : public EventSerializeable +{ + StreamNamespacedName mClass; + uint64_t mInstanceId; + + CreateInstance(StreamNamespacedName cls, uint64_t streamId) : mClass(cls), mInstanceId(streamId) + { + } + CreateInstance() + { + } + + void serialize(PvdEventSerializer& s) + { + s.streamify(mClass); + s.streamify(mInstanceId); + } +}; + +// virtual PvdError setPropertyValue( uint64_t instance, StringHandle name, DataRef<const uint8_t> data, +// StreamNamespacedName incomingTypeName ) = 0; +struct SetPropertyValue : public EventSerializeable +{ + uint64_t mInstanceId; + StringHandle mPropertyName; + DataRef<const uint8_t> mData; + StreamNamespacedName mIncomingTypeName; + uint32_t mNumItems; + + SetPropertyValue(uint64_t instance, StringHandle name, DataRef<const uint8_t> data, + StreamNamespacedName incomingTypeName, uint32_t numItems) + : mInstanceId(instance), mPropertyName(name), mData(data), mIncomingTypeName(incomingTypeName), mNumItems(numItems) + { + } + + SetPropertyValue() + { + } + + void serializeBeginning(PvdEventSerializer& s) + { + s.streamify(mInstanceId); + s.streamify(mPropertyName); + s.streamify(mIncomingTypeName); + s.streamify(mNumItems); + } + + void serialize(PvdEventSerializer& s) + { + serializeBeginning(s); + s.streamify(mData); + } +}; + +DECLARE_TYPE_VARIABLE_SIZED(SetPropertyValue) + +struct BeginSetPropertyValue : public EventSerializeable +{ + uint64_t mInstanceId; + StringHandle mPropertyName; + StreamNamespacedName mIncomingTypeName; + + BeginSetPropertyValue(uint64_t instance, StringHandle name, StreamNamespacedName incomingTypeName) + : mInstanceId(instance), mPropertyName(name), mIncomingTypeName(incomingTypeName) + { + } + BeginSetPropertyValue() + { + } + + void serialize(PvdEventSerializer& s) + { + s.streamify(mInstanceId); + s.streamify(mPropertyName); + s.streamify(mIncomingTypeName); + } +}; + +// virtual PvdError appendPropertyValueData( DataRef<const uint8_t> data ) = 0; +struct AppendPropertyValueData : public EventSerializeable +{ + DataRef<const uint8_t> mData; + uint32_t mNumItems; + AppendPropertyValueData(DataRef<const uint8_t> data, uint32_t numItems) : mData(data), mNumItems(numItems) + { + } + AppendPropertyValueData() + { + } + + void serialize(PvdEventSerializer& s) + { + s.streamify(mData); + s.streamify(mNumItems); + } +}; + +DECLARE_TYPE_VARIABLE_SIZED(AppendPropertyValueData) + +// virtual PvdError endSetPropertyValue() = 0; +struct EndSetPropertyValue : public EventSerializeable +{ + EndSetPropertyValue() + { + } + + void serialize(PvdEventSerializer&) + { + } +}; + +// virtual PvdError setPropertyMessage( uint64_t instance, StreamNamespacedName msgName, DataRef<const uint8_t> data ) = +// 0; +struct SetPropertyMessage : public EventSerializeable +{ + uint64_t mInstanceId; + StreamNamespacedName mMessageName; + DataRef<const uint8_t> mData; + + SetPropertyMessage(uint64_t instance, StreamNamespacedName msgName, DataRef<const uint8_t> data) + : mInstanceId(instance), mMessageName(msgName), mData(data) + { + } + + SetPropertyMessage() + { + } + + void serialize(PvdEventSerializer& s) + { + s.streamify(mInstanceId); + s.streamify(mMessageName); + s.streamify(mData); + } +}; + +DECLARE_TYPE_VARIABLE_SIZED(SetPropertyMessage) + +// virtual PvdError beginPropertyMessageGroup( StreamNamespacedName msgName ) = 0; +struct BeginPropertyMessageGroup : public EventSerializeable +{ + StreamNamespacedName mMsgName; + BeginPropertyMessageGroup(StreamNamespacedName msgName) : mMsgName(msgName) + { + } + BeginPropertyMessageGroup() + { + } + + void serialize(PvdEventSerializer& s) + { + s.streamify(mMsgName); + } +}; + +// virtual PvdError sendPropertyMessageFromGroup( uint64_t instance, DataRef<const uint8_t*> data ) = 0; +struct SendPropertyMessageFromGroup : public EventSerializeable +{ + uint64_t mInstance; + DataRef<const uint8_t> mData; + + SendPropertyMessageFromGroup(uint64_t instance, DataRef<const uint8_t> data) : mInstance(instance), mData(data) + { + } + SendPropertyMessageFromGroup() + { + } + + void serialize(PvdEventSerializer& s) + { + s.streamify(mInstance); + s.streamify(mData); + } +}; + +DECLARE_TYPE_VARIABLE_SIZED(SendPropertyMessageFromGroup) + +// virtual PvdError endPropertyMessageGroup() = 0; +struct EndPropertyMessageGroup : public EventSerializeable +{ + EndPropertyMessageGroup() + { + } + + void serialize(PvdEventSerializer&) + { + } +}; + +struct PushBackObjectRef : public EventSerializeable +{ + uint64_t mInstanceId; + StringHandle mProperty; + uint64_t mObjectRef; + + PushBackObjectRef(uint64_t instId, StringHandle prop, uint64_t objRef) + : mInstanceId(instId), mProperty(prop), mObjectRef(objRef) + { + } + + PushBackObjectRef() + { + } + + void serialize(PvdEventSerializer& s) + { + s.streamify(mInstanceId); + s.streamify(mProperty); + s.streamify(mObjectRef); + } +}; + +struct RemoveObjectRef : public EventSerializeable +{ + uint64_t mInstanceId; + StringHandle mProperty; + uint64_t mObjectRef; + + RemoveObjectRef(uint64_t instId, StringHandle prop, uint64_t objRef) + : mInstanceId(instId), mProperty(prop), mObjectRef(objRef) + { + } + + RemoveObjectRef() + { + } + + void serialize(PvdEventSerializer& s) + { + s.streamify(mInstanceId); + s.streamify(mProperty); + s.streamify(mObjectRef); + } +}; + +// virtual PvdError destroyInstance( uint64_t key ) = 0; +struct DestroyInstance : public EventSerializeable +{ + uint64_t mInstanceId; + DestroyInstance(uint64_t instance) : mInstanceId(instance) + { + } + DestroyInstance() + { + } + + void serialize(PvdEventSerializer& s) + { + s.streamify(mInstanceId); + } +}; + +// virtual PvdError beginSection( uint64_t sectionId, StringHandle name ) = 0; +struct BeginSection : public EventSerializeable +{ + uint64_t mSectionId; + StringHandle mName; + Timestamp mTimestamp; + BeginSection(uint64_t sectionId, StringHandle name, uint64_t timestamp) + : mSectionId(sectionId), mName(name), mTimestamp(timestamp) + { + } + BeginSection() + { + } + + void serialize(PvdEventSerializer& s) + { + s.streamify(mSectionId); + s.streamify(mName); + s.streamify(mTimestamp); + } +}; +// virtual PvdError endSection( uint64_t sectionId, StringHandle name ) = 0; +struct EndSection : public EventSerializeable +{ + uint64_t mSectionId; + StringHandle mName; + Timestamp mTimestamp; + EndSection(uint64_t sectionId, StringHandle name, uint64_t timestamp) + : mSectionId(sectionId), mName(name), mTimestamp(timestamp) + { + } + EndSection() + { + } + + void serialize(PvdEventSerializer& s) + { + s.streamify(mSectionId); + s.streamify(mName); + s.streamify(mTimestamp); + } +}; + +// virtual void setPickable( void* instance, bool pickable ) = 0; +struct SetPickable : public EventSerializeable +{ + uint64_t mInstanceId; + bool mPickable; + SetPickable(uint64_t instId, bool pick) : mInstanceId(instId), mPickable(pick) + { + } + SetPickable() + { + } + + void serialize(PvdEventSerializer& s) + { + s.streamify(mInstanceId); + s.streamify(mPickable); + } +}; +// virtual void setColor( void* instance, const PvdColor& color ) = 0; +struct SetColor : public EventSerializeable +{ + uint64_t mInstanceId; + PvdColor mColor; + SetColor(uint64_t instId, PvdColor color) : mInstanceId(instId), mColor(color) + { + } + SetColor() + { + } + + void serialize(PvdEventSerializer& s) + { + s.streamify(mInstanceId); + s.streamify(mColor); + } +}; + +// virtual void setColor( void* instance, const PvdColor& color ) = 0; +struct SetIsTopLevel : public EventSerializeable +{ + uint64_t mInstanceId; + bool mIsTopLevel; + + SetIsTopLevel(uint64_t instId, bool topLevel) : mInstanceId(instId), mIsTopLevel(topLevel) + { + } + SetIsTopLevel() : mIsTopLevel(false) + { + } + + void serialize(PvdEventSerializer& s) + { + s.streamify(mInstanceId); + s.streamify(mIsTopLevel); + } +}; + +struct SetCamera : public EventSerializeable +{ + String mName; + PxVec3 mPosition; + PxVec3 mUp; + PxVec3 mTarget; + SetCamera(String name, const PxVec3& pos, const PxVec3& up, const PxVec3& target) + : mName(name), mPosition(pos), mUp(up), mTarget(target) + { + } + SetCamera() : mName(NULL) + { + } + + void serialize(PvdEventSerializer& s) + { + s.streamify(mName); + s.streamify(mPosition); + s.streamify(mUp); + s.streamify(mTarget); + } +}; + +struct ErrorMessage : public EventSerializeable +{ + uint32_t mCode; + String mMessage; + String mFile; + uint32_t mLine; + + ErrorMessage(uint32_t code, String message, String file, uint32_t line) + : mCode(code), mMessage(message), mFile(file), mLine(line) + { + } + + ErrorMessage() : mMessage(NULL), mFile(NULL) + { + } + + void serialize(PvdEventSerializer& s) + { + s.streamify(mCode); + s.streamify(mMessage); + s.streamify(mFile); + s.streamify(mLine); + } +}; + +struct AddProfileZone : public EventSerializeable +{ + uint64_t mInstanceId; + String mName; + AddProfileZone(uint64_t iid, String nm) : mInstanceId(iid), mName(nm) + { + } + AddProfileZone() : mName(NULL) + { + } + + void serialize(PvdEventSerializer& s) + { + s.streamify(mInstanceId); + s.streamify(mName); + } +}; + +struct AddProfileZoneEvent : public EventSerializeable +{ + uint64_t mInstanceId; + String mName; + uint16_t mEventId; + bool mCompileTimeEnabled; + AddProfileZoneEvent(uint64_t iid, String nm, uint16_t eid, bool cte) + : mInstanceId(iid), mName(nm), mEventId(eid), mCompileTimeEnabled(cte) + { + } + AddProfileZoneEvent() + { + } + + void serialize(PvdEventSerializer& s) + { + s.streamify(mInstanceId); + s.streamify(mName); + s.streamify(mEventId); + s.streamify(mCompileTimeEnabled); + } +}; + +struct StreamEndEvent : public EventSerializeable +{ + String mName; + StreamEndEvent() : mName("StreamEnd") + { + } + + void serialize(PvdEventSerializer& s) + { + s.streamify(mName); + } +}; + +struct OriginShift : public EventSerializeable +{ + uint64_t mInstanceId; + PxVec3 mShift; + + OriginShift(uint64_t iid, const PxVec3& shift) : mInstanceId(iid), mShift(shift) + { + } + OriginShift() + { + } + + void serialize(PvdEventSerializer& s) + { + s.streamify(mInstanceId); + s.streamify(mShift); + } +}; +} // pvdsdk +} // physx + +#endif // PXPVDSDK_PXPVDCOMMSTREAMEVENTS_H diff --git a/PxShared/src/pvd/src/PxPvdCommStreamSDKEventTypes.h b/PxShared/src/pvd/src/PxPvdCommStreamSDKEventTypes.h new file mode 100644 index 0000000..4105730 --- /dev/null +++ b/PxShared/src/pvd/src/PxPvdCommStreamSDKEventTypes.h @@ -0,0 +1,32 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#define THERE_IS_NO_INCLUDE_GUARD_FOR_A_REASON + +DECLARE_PVD_COMM_STREAM_SDK_EVENT(SetPauseState) + +#undef THERE_IS_NO_INCLUDE_GUARD_FOR_A_REASON diff --git a/PxShared/src/pvd/src/PxPvdCommStreamTypes.h b/PxShared/src/pvd/src/PxPvdCommStreamTypes.h new file mode 100644 index 0000000..cbfda4d --- /dev/null +++ b/PxShared/src/pvd/src/PxPvdCommStreamTypes.h @@ -0,0 +1,262 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PXPVDSDK_PXPVDCOMMSTREAMTYPES_H +#define PXPVDSDK_PXPVDCOMMSTREAMTYPES_H + +#include "foundation/PxErrorCallback.h" +#include "pvd/PxPvdTransport.h" + +#include "PxPvdRenderBuffer.h" +#include "PxPvdObjectModelBaseTypes.h" +#include "PxPvdCommStreamEvents.h" +#include "PxPvdDataStream.h" +#include "PsMutex.h" + +namespace physx +{ +namespace profile +{ +class PxProfileZone; +class PxProfileMemoryEventBuffer; +} +namespace pvdsdk +{ +struct PvdErrorMessage; +class PvdObjectModelMetaData; + +DEFINE_PVD_TYPE_NAME_MAP(profile::PxProfileZone, "_debugger_", "PxProfileZone") +DEFINE_PVD_TYPE_NAME_MAP(profile::PxProfileMemoryEventBuffer, "_debugger_", "PxProfileMemoryEventBuffer") +DEFINE_PVD_TYPE_NAME_MAP(PvdErrorMessage, "_debugger_", "PvdErrorMessage") +// All event streams are on the 'events' property of objects of these types +static inline NamespacedName getMemoryEventTotalsClassName() +{ + return NamespacedName("_debugger", "MemoryEventTotals"); +} + +class PvdOMMetaDataProvider +{ + protected: + virtual ~PvdOMMetaDataProvider() + { + } + + public: + virtual void addRef() = 0; + virtual void release() = 0; + virtual PvdObjectModelMetaData& lock() = 0; + virtual void unlock() = 0; + virtual bool createInstance(const NamespacedName& clsName, const void* instance) = 0; + virtual bool isInstanceValid(const void* instance) = 0; + virtual void destroyInstance(const void* instance) = 0; + virtual int32_t getInstanceClassType(const void* instance) = 0; +}; + +class PvdCommStreamInternalConnection; + +class PvdConnectionListener +{ + protected: + virtual ~PvdConnectionListener() + { + } + + public: + virtual void onDisconnect(PvdCommStreamInternalConnection& connection) = 0; + virtual void onInstanceDestroyed(const void* instance) = 0; +}; + +class PvdCommStreamEmbeddedTypes +{ + public: + static const char* getProfileEventStreamSemantic() + { + return "profile event stream"; + } + static const char* getMemoryEventStreamSemantic() + { + return "memory event stream"; + } + static const char* getRendererEventStreamSemantic() + { + return "render event stream"; + } +}; + +class PvdCommStreamEventBufferClient; + +template <typename TStreamType> +struct EventStreamifier : public PvdEventSerializer +{ + TStreamType& mBuffer; + EventStreamifier(TStreamType& buf) : mBuffer(buf) + { + } + + template <typename TDataType> + void write(const TDataType& type) + { + mBuffer.write(reinterpret_cast<const uint8_t*>(&type), sizeof(TDataType)); + } + template <typename TDataType> + void write(const TDataType* type, uint32_t count) + { + mBuffer.write(reinterpret_cast<const uint8_t*>(type), count * sizeof(TDataType)); + } + + void writeRef(DataRef<const uint8_t> data) + { + uint32_t amount = static_cast<uint32_t>(data.size()); + write(amount); + write(data.begin(), amount); + } + void writeRef(DataRef<StringHandle> data) + { + uint32_t amount = static_cast<uint32_t>(data.size()); + write(amount); + write(data.begin(), amount); + } + template <typename TDataType> + void writeRef(DataRef<TDataType> data) + { + uint32_t amount = static_cast<uint32_t>(data.size()); + write(amount); + for(uint32_t idx = 0; idx < amount; ++idx) + { + TDataType& dtype(const_cast<TDataType&>(data[idx])); + dtype.serialize(*this); + } + } + + virtual void streamify(uint16_t& val) + { + write(val); + } + virtual void streamify(uint8_t& val) + { + write(val); + } + virtual void streamify(uint32_t& val) + { + write(val); + } + virtual void streamify(float& val) + { + write(val); + } + virtual void streamify(uint64_t& val) + { + write(val); + } + virtual void streamify(PvdDebugText& val) + { + write(val.color); + write(val.position); + write(val.size); + streamify(val.string); + } + + virtual void streamify(String& val) + { + uint32_t len = 0; + String temp = nonNull(val); + if(*temp) + len = static_cast<uint32_t>(strlen(temp) + 1); + write(len); + write(val, len); + } + virtual void streamify(DataRef<const uint8_t>& val) + { + writeRef(val); + } + virtual void streamify(DataRef<NameHandleValue>& val) + { + writeRef(val); + } + virtual void streamify(DataRef<StreamPropMessageArg>& val) + { + writeRef(val); + } + virtual void streamify(DataRef<StringHandle>& val) + { + writeRef(val); + } + + private: + EventStreamifier& operator=(const EventStreamifier&); +}; + +struct MeasureStream +{ + uint32_t mSize; + MeasureStream() : mSize(0) + { + } + template <typename TDataType> + void write(const TDataType& val) + { + mSize += sizeof(val); + } + template <typename TDataType> + void write(const TDataType*, uint32_t count) + { + mSize += sizeof(TDataType) * count; + } +}; + +struct DataStreamState +{ + enum Enum + { + Open, + SetPropertyValue, + PropertyMessageGroup + }; +}; + +class ExtendedEventSerializer : public PvdEventSerializer +{ + protected: + virtual ~ExtendedEventSerializer() + { + } + + public: + virtual void setData(DataRef<const uint8_t> eventData) = 0; + // True if this serializer performs byte swapping + virtual bool performsSwap() = 0; + + virtual bool isGood() = 0; + + virtual void release() = 0; + + static ExtendedEventSerializer& createInputSerializer(bool swapBytes); +}; + +} // pvdsdk +} // physx +#endif // PXPVDSDK_PXPVDCOMMSTREAMTYPES_H diff --git a/PxShared/src/pvd/src/PxPvdDataStream.cpp b/PxShared/src/pvd/src/PxPvdDataStream.cpp new file mode 100644 index 0000000..da13140 --- /dev/null +++ b/PxShared/src/pvd/src/PxPvdDataStream.cpp @@ -0,0 +1,870 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#include "PxPvdDataStream.h" +#include "PxPvdFoundation.h" +#include "PxPvdCommStreamEvents.h" +#include "PxPvdCommStreamEventSink.h" +#include "PxPvdCommStreamTypes.h" +#include "PxPvdDataStreamHelpers.h" +#include "PxPvdObjectModelInternalTypes.h" +#include "PxPvdImpl.h" +#include "PsTime.h" +#include "PsFoundation.h" +#include "foundation/PxMemory.h" + +using namespace physx; +using namespace physx::pvdsdk; +using namespace physx::shdfnd; + +namespace +{ + +struct ScopedMetaData +{ + PvdOMMetaDataProvider& mProvider; + PvdObjectModelMetaData& mMeta; + ScopedMetaData(PvdOMMetaDataProvider& provider) : mProvider(provider), mMeta(provider.lock()) + { + } + ~ScopedMetaData() + { + mProvider.unlock(); + } + PvdObjectModelMetaData* operator->() + { + return &mMeta; + } + + private: + ScopedMetaData& operator=(const ScopedMetaData&); +}; + +struct PropertyDefinitionHelper : public PvdPropertyDefinitionHelper +{ + PvdDataStream* mStream; + PvdOMMetaDataProvider& mProvider; + Array<char> mNameBuffer; + Array<uint32_t> mNameStack; + Array<NamedValue> mNamedValues; + Array<PropertyMessageArg> mPropertyMessageArgs; + + PropertyDefinitionHelper(PvdOMMetaDataProvider& provider) + : mStream(NULL) + , mProvider(provider) + , mNameBuffer("PropertyDefinitionHelper::mNameBuffer") + , mNameStack("PropertyDefinitionHelper::mNameStack") + , mNamedValues("PropertyDefinitionHelper::mNamedValues") + , mPropertyMessageArgs("PropertyDefinitionHelper::mPropertyMessageArgs") + { + } + void setStream(PvdDataStream* stream) + { + mStream = stream; + } + + inline void appendStrToBuffer(const char* str) + { + if(str == NULL) + return; + size_t strLen = strlen(str); + size_t endBufOffset = mNameBuffer.size(); + size_t resizeLen = endBufOffset; + // account for null + if(mNameBuffer.empty()) + resizeLen += 1; + else + endBufOffset -= 1; + + mNameBuffer.resize(static_cast<uint32_t>(resizeLen + strLen)); + char* endPtr = mNameBuffer.begin() + endBufOffset; + PxMemCopy(endPtr, str, static_cast<uint32_t>(strLen)); + } + + virtual void pushName(const char* nm, const char* appender = ".") + { + size_t nameBufLen = mNameBuffer.size(); + mNameStack.pushBack(static_cast<uint32_t>(nameBufLen)); + if(mNameBuffer.empty() == false) + appendStrToBuffer(appender); + appendStrToBuffer(nm); + mNameBuffer.back() = 0; + } + + virtual void pushBracketedName(const char* inName, const char* leftBracket = "[", const char* rightBracket = "]") + { + size_t nameBufLen = mNameBuffer.size(); + mNameStack.pushBack(static_cast<uint32_t>(nameBufLen)); + appendStrToBuffer(leftBracket); + appendStrToBuffer(inName); + appendStrToBuffer(rightBracket); + mNameBuffer.back() = 0; + } + + virtual void popName() + { + if(mNameStack.empty()) + return; + mNameBuffer.resize(static_cast<uint32_t>(mNameStack.back())); + mNameStack.popBack(); + if(mNameBuffer.empty() == false) + mNameBuffer.back() = 0; + } + + virtual const char* getTopName() + { + if(mNameBuffer.size()) + return mNameBuffer.begin(); + return ""; + } + virtual void clearNameStack() + { + mNameBuffer.clear(); + mNameStack.clear(); + } + + virtual void addNamedValue(const char* name, uint32_t value) + { + mNamedValues.pushBack(NamedValue(name, value)); + } + virtual void clearNamedValues() + { + mNamedValues.clear(); + } + + virtual DataRef<NamedValue> getNamedValues() + { + return DataRef<NamedValue>(mNamedValues.begin(), mNamedValues.size()); + } + + virtual void createProperty(const NamespacedName& clsName, const char* inSemantic, const NamespacedName& dtypeName, + PropertyType::Enum propType) + { + mStream->createProperty(clsName, getTopName(), inSemantic, dtypeName, propType, getNamedValues()); + clearNamedValues(); + } + const char* registerStr(const char* str) + { + ScopedMetaData scopedProvider(mProvider); + return scopedProvider->getStringTable().registerStr(str); + } + virtual void addPropertyMessageArg(const NamespacedName& inDatatype, uint32_t inOffset, uint32_t inSize) + { + mPropertyMessageArgs.pushBack(PropertyMessageArg(registerStr(getTopName()), inDatatype, inOffset, inSize)); + } + virtual void addPropertyMessage(const NamespacedName& clsName, const NamespacedName& msgName, + uint32_t inStructSizeInBytes) + { + if(mPropertyMessageArgs.empty()) + { + PX_ASSERT(false); + return; + } + mStream->createPropertyMessage( + clsName, msgName, DataRef<PropertyMessageArg>(mPropertyMessageArgs.begin(), mPropertyMessageArgs.size()), + inStructSizeInBytes); + } + virtual void clearPropertyMessageArgs() + { + mPropertyMessageArgs.clear(); + } + + private: + PropertyDefinitionHelper& operator=(const PropertyDefinitionHelper&); +}; + +class PvdMemPool +{ + // Link List + Array<uint8_t*> mMemBuffer; + uint32_t mLength; + uint32_t mBufIndex; + + // 4k for one page + static const int BUFFER_LENGTH = 4096; + PX_NOCOPY(PvdMemPool) + public: + PvdMemPool(const char* bufDataName) : mMemBuffer(bufDataName), mLength(0), mBufIndex(0) + { + grow(); + } + + ~PvdMemPool() + { + for(uint32_t i = 0; i < mMemBuffer.size(); i++) + { + PX_FREE(mMemBuffer[i]); + } + } + + void grow() + { + if(mBufIndex + 1 < mMemBuffer.size()) + { + mBufIndex++; + } + else + { + uint8_t* Buf = reinterpret_cast<uint8_t*>(PX_ALLOC(BUFFER_LENGTH, "PvdMemPool::mMemBuffer.buf")); + mMemBuffer.pushBack(Buf); + mBufIndex = mMemBuffer.size() - 1; + } + mLength = 0; + } + + void* allocate(uint32_t length) + { + if(length > uint32_t(BUFFER_LENGTH)) + return NULL; + + if(length + mLength > uint32_t(BUFFER_LENGTH)) + grow(); + + void* mem = reinterpret_cast<void*>(&mMemBuffer[mBufIndex][mLength]); + mLength += length; + return mem; + } + + void clear() + { + mLength = 0; + mBufIndex = 0; + } +}; +struct PvdOutStream : public PvdDataStream, public UserAllocated +{ + HashMap<String, uint32_t> mStringHashMap; + PvdOMMetaDataProvider& mMetaDataProvider; + Array<uint8_t> mTempBuffer; + PropertyDefinitionHelper mPropertyDefinitionHelper; + DataStreamState::Enum mStreamState; + + ClassDescription mSPVClass; + PropertyMessageDescription mMessageDesc; + // Set property value and SetPropertyMessage calls require + // us to write the data out to a separate buffer + // when strings are involved. + ForwardingMemoryBuffer mSPVBuffer; + uint32_t mEventCount; + uint32_t mPropertyMessageSize; + bool mConnected; + uint64_t mStreamId; + Array<PvdCommand*> mPvdCommandArray; + PvdMemPool mPvdCommandPool; + PxPvdTransport& mTransport; + + PvdOutStream(PxPvdTransport& transport, PvdOMMetaDataProvider& provider, uint64_t streamId) + : mStringHashMap("PvdOutStream::mStringHashMap") + , mMetaDataProvider(provider) + , mTempBuffer("PvdOutStream::mTempBuffer") + , mPropertyDefinitionHelper(mMetaDataProvider) + , mStreamState(DataStreamState::Open) + , mSPVBuffer("PvdCommStreamBufferedEventSink::mSPVBuffer") + , mEventCount(0) + , mPropertyMessageSize(0) + , mConnected(true) + , mStreamId(streamId) + , mPvdCommandArray("PvdCommStreamBufferedEventSink::mPvdCommandArray") + , mPvdCommandPool("PvdCommStreamBufferedEventSink::mPvdCommandPool") + , mTransport(transport) + { + mPropertyDefinitionHelper.setStream(this); + } + virtual ~PvdOutStream() + { + } + + virtual void release() + { + PVD_DELETE(this); + } + + StringHandle toStream(String nm) + { + if(nm == NULL || *nm == 0) + return 0; + const HashMap<String, uint32_t>::Entry* entry(mStringHashMap.find(nm)); + if(entry) + return entry->second; + ScopedMetaData meta(mMetaDataProvider); + StringHandle hdl = meta->getStringTable().strToHandle(nm); + nm = meta->getStringTable().handleToStr(hdl); + handlePvdEvent(StringHandleEvent(nm, hdl)); + mStringHashMap.insert(nm, hdl); + return hdl; + } + + StreamNamespacedName toStream(const NamespacedName& nm) + { + return StreamNamespacedName(toStream(nm.mNamespace), toStream(nm.mName)); + } + + bool isClassExist(const NamespacedName& nm) + { + ScopedMetaData meta(mMetaDataProvider); + return meta->findClass(nm).hasValue(); + } + + bool createMetaClass(const NamespacedName& nm) + { + ScopedMetaData meta(mMetaDataProvider); + meta->getOrCreateClass(nm); + return true; + } + + bool deriveMetaClass(const NamespacedName& parent, const NamespacedName& child) + { + ScopedMetaData meta(mMetaDataProvider); + return meta->deriveClass(parent, child); + } + +// You will notice that some functions are #pragma'd out throughout this file. +// This is because they are only called from asserts which means they aren't +// called in release. This causes warnings when building using snc which break +// the build. +#if PX_DEBUG + + bool propertyExists(const NamespacedName& nm, String pname) + { + ScopedMetaData meta(mMetaDataProvider); + return meta->findProperty(nm, pname).hasValue(); + } + +#endif + + PvdError boolToError(bool val) + { + if(val) + return PvdErrorType::Success; + return PvdErrorType::NetworkError; + } + + // PvdMetaDataStream + virtual PvdError createClass(const NamespacedName& nm) + { + PX_ASSERT(mStreamState == DataStreamState::Open); +#if PX_DEBUG + PX_ASSERT(isClassExist(nm) == false); +#endif + createMetaClass(nm); + return boolToError(handlePvdEvent(CreateClass(toStream(nm)))); + } + + virtual PvdError deriveClass(const NamespacedName& parent, const NamespacedName& child) + { + PX_ASSERT(mStreamState == DataStreamState::Open); +#if PX_DEBUG + PX_ASSERT(isClassExist(parent)); + PX_ASSERT(isClassExist(child)); +#endif + deriveMetaClass(parent, child); + return boolToError(handlePvdEvent(DeriveClass(toStream(parent), toStream(child)))); + } + + template <typename TDataType> + TDataType* allocTemp(uint32_t numItems) + { + uint32_t desiredBytes = numItems * sizeof(TDataType); + if(desiredBytes > mTempBuffer.size()) + mTempBuffer.resize(desiredBytes); + TDataType* retval = reinterpret_cast<TDataType*>(mTempBuffer.begin()); + if(numItems) + { + PVD_FOREACH(idx, numItems) new (retval + idx) TDataType(); + } + return retval; + } + +#if PX_DEBUG + + // Property datatypes need to be uniform. + // At this point, the data stream cannot handle properties that + // A struct with a float member and a char member would work. + // A struct with a float member and a long member would work (more efficiently). + bool isValidPropertyDatatype(const NamespacedName& dtypeName) + { + ScopedMetaData meta(mMetaDataProvider); + ClassDescription clsDesc(meta->findClass(dtypeName)); + return clsDesc.mRequiresDestruction == false; + } + +#endif + + NamespacedName createMetaProperty(const NamespacedName& clsName, String name, String semantic, + const NamespacedName& dtypeName, PropertyType::Enum propertyType) + { + ScopedMetaData meta(mMetaDataProvider); + int32_t dtypeType = meta->findClass(dtypeName)->mClassId; + NamespacedName typeName = dtypeName; + if(dtypeType == getPvdTypeForType<String>()) + { + dtypeType = getPvdTypeForType<StringHandle>(); + typeName = getPvdNamespacedNameForType<StringHandle>(); + } + Option<PropertyDescription> propOpt = + meta->createProperty(meta->findClass(clsName)->mClassId, name, semantic, dtypeType, propertyType); + PX_ASSERT(propOpt.hasValue()); + PX_UNUSED(propOpt); + return typeName; + } + + virtual PvdError createProperty(const NamespacedName& clsName, String name, String semantic, + const NamespacedName& incomingDtypeName, PropertyType::Enum propertyType, + DataRef<NamedValue> values) + { + PX_ASSERT(mStreamState == DataStreamState::Open); +#if PX_DEBUG + PX_ASSERT(isClassExist(clsName)); + PX_ASSERT(propertyExists(clsName, name) == false); +#endif + NamespacedName dtypeName(incomingDtypeName); + if(safeStrEq(dtypeName.mName, "VoidPtr")) + dtypeName.mName = "ObjectRef"; +#if PX_DEBUG + PX_ASSERT(isClassExist(dtypeName)); + PX_ASSERT(isValidPropertyDatatype(dtypeName)); +#endif + NamespacedName typeName = createMetaProperty(clsName, name, semantic, dtypeName, propertyType); + // Can't have arrays of strings or arrays of string handles due to the difficulty + // of quickly dealing with them on the network receiving side. + if(propertyType == PropertyType::Array && safeStrEq(typeName.mName, "StringHandle")) + { + PX_ASSERT(false); + return PvdErrorType::ArgumentError; + } + uint32_t numItems = values.size(); + NameHandleValue* streamValues = allocTemp<NameHandleValue>(numItems); + PVD_FOREACH(idx, numItems) + streamValues[idx] = NameHandleValue(toStream(values[idx].mName), values[idx].mValue); + CreateProperty evt(toStream(clsName), toStream(name), toStream(semantic), toStream(typeName), propertyType, + DataRef<NameHandleValue>(streamValues, numItems)); + return boolToError(handlePvdEvent(evt)); + } + + bool createMetaPropertyMessage(const NamespacedName& cls, const NamespacedName& msgName, + DataRef<PropertyMessageArg> entries, uint32_t messageSizeInBytes) + { + ScopedMetaData meta(mMetaDataProvider); + return meta->createPropertyMessage(cls, msgName, entries, messageSizeInBytes).hasValue(); + } +#if PX_DEBUG + + bool messageExists(const NamespacedName& msgName) + { + ScopedMetaData meta(mMetaDataProvider); + return meta->findPropertyMessage(msgName).hasValue(); + } + +#endif + + virtual PvdError createPropertyMessage(const NamespacedName& cls, const NamespacedName& msgName, + DataRef<PropertyMessageArg> entries, uint32_t messageSizeInBytes) + { + PX_ASSERT(mStreamState == DataStreamState::Open); +#if PX_DEBUG + PX_ASSERT(isClassExist(cls)); + PX_ASSERT(messageExists(msgName) == false); +#endif + createMetaPropertyMessage(cls, msgName, entries, messageSizeInBytes); + uint32_t numItems = entries.size(); + StreamPropMessageArg* streamValues = allocTemp<StreamPropMessageArg>(numItems); + PVD_FOREACH(idx, numItems) + streamValues[idx] = + StreamPropMessageArg(toStream(entries[idx].mPropertyName), toStream(entries[idx].mDatatypeName), + entries[idx].mMessageOffset, entries[idx].mByteSize); + CreatePropertyMessage evt(toStream(cls), toStream(msgName), + DataRef<StreamPropMessageArg>(streamValues, numItems), messageSizeInBytes); + return boolToError(handlePvdEvent(evt)); + } + + uint64_t toStream(const void* instance) + { + return PVD_POINTER_TO_U64(instance); + } + virtual PvdError createInstance(const NamespacedName& cls, const void* instance) + { + PX_ASSERT(isInstanceValid(instance) == false); + PX_ASSERT(mStreamState == DataStreamState::Open); + bool success = mMetaDataProvider.createInstance(cls, instance); + PX_ASSERT(success); + (void)success; + return boolToError(handlePvdEvent(CreateInstance(toStream(cls), toStream(instance)))); + } + + virtual bool isInstanceValid(const void* instance) + { + return mMetaDataProvider.isInstanceValid(instance); + } + +#if PX_DEBUG + + // If the property will fit or is already completely in memory + bool checkPropertyType(const void* instance, String name, const NamespacedName& incomingType) + { + int32_t instType = mMetaDataProvider.getInstanceClassType(instance); + ScopedMetaData meta(mMetaDataProvider); + Option<PropertyDescription> prop = meta->findProperty(instType, name); + if(prop.hasValue() == false) + return false; + int32_t propType = prop->mDatatype; + int32_t incomingTypeId = meta->findClass(incomingType)->mClassId; + if(incomingTypeId != getPvdTypeForType<VoidPtr>()) + { + MarshalQueryResult result = meta->checkMarshalling(incomingTypeId, propType); + bool possible = result.needsMarshalling == false || result.canMarshal; + return possible; + } + else + { + if(propType != getPvdTypeForType<ObjectRef>()) + return false; + } + return true; + } + +#endif + + DataRef<const uint8_t> bufferPropertyValue(ClassDescriptionSizeInfo info, DataRef<const uint8_t> data) + { + uint32_t realSize = info.mByteSize; + uint32_t numItems = data.size() / realSize; + if(info.mPtrOffsets.size() != 0) + { + mSPVBuffer.clear(); + PVD_FOREACH(item, numItems) + { + const uint8_t* itemPtr = data.begin() + item * realSize; + mSPVBuffer.write(itemPtr, realSize); + PVD_FOREACH(stringIdx, info.mPtrOffsets.size()) + { + PtrOffset offset(info.mPtrOffsets[stringIdx]); + if(offset.mOffsetType == PtrOffsetType::VoidPtrOffset) + continue; + const char* strPtr; + physx::intrinsics::memCopy(&strPtr, itemPtr + offset.mOffset, sizeof(char*)); + strPtr = nonNull(strPtr); + uint32_t len = safeStrLen(strPtr) + 1; + mSPVBuffer.write(strPtr, len); + } + } + data = DataRef<const uint8_t>(mSPVBuffer.begin(), mSPVBuffer.size()); + } + return data; + } + + virtual PvdError setPropertyValue(const void* instance, String name, DataRef<const uint8_t> data, + const NamespacedName& incomingTypeName) + { + + PX_ASSERT(isInstanceValid(instance)); +#if PX_DEBUG + PX_ASSERT(isClassExist(incomingTypeName)); +#endif + PX_ASSERT(mStreamState == DataStreamState::Open); + ClassDescription clsDesc; + { + ScopedMetaData meta(mMetaDataProvider); + clsDesc = meta->findClass(incomingTypeName); + } + uint32_t realSize = clsDesc.getNativeSize(); + uint32_t numItems = data.size() / realSize; + data = bufferPropertyValue(clsDesc.getNativeSizeInfo(), data); + SetPropertyValue evt(toStream(instance), toStream(name), data, toStream(incomingTypeName), numItems); + return boolToError(handlePvdEvent(evt)); + } + + // Else if the property is very large (contact reports) you can send it in chunks. + virtual PvdError beginSetPropertyValue(const void* instance, String name, const NamespacedName& incomingTypeName) + { + PX_ASSERT(isInstanceValid(instance)); +#if PX_DEBUG + PX_ASSERT(isClassExist(incomingTypeName)); + PX_ASSERT(checkPropertyType(instance, name, incomingTypeName)); +#endif + PX_ASSERT(mStreamState == DataStreamState::Open); + mStreamState = DataStreamState::SetPropertyValue; + { + ScopedMetaData meta(mMetaDataProvider); + mSPVClass = meta->findClass(incomingTypeName); + } + BeginSetPropertyValue evt(toStream(instance), toStream(name), toStream(incomingTypeName)); + return boolToError(handlePvdEvent(evt)); + } + + virtual PvdError appendPropertyValueData(DataRef<const uint8_t> data) + { + uint32_t realSize = mSPVClass.getNativeSize(); + uint32_t numItems = data.size() / realSize; + data = bufferPropertyValue(mSPVClass.getNativeSizeInfo(), data); + PX_ASSERT(mStreamState == DataStreamState::SetPropertyValue); + return boolToError(handlePvdEvent(AppendPropertyValueData(data, numItems))); + } + virtual PvdError endSetPropertyValue() + { + PX_ASSERT(mStreamState == DataStreamState::SetPropertyValue); + mStreamState = DataStreamState::Open; + return boolToError(handlePvdEvent(EndSetPropertyValue())); + } + +#if PX_DEBUG + + bool checkPropertyMessage(const void* instance, const NamespacedName& msgName) + { + int32_t clsId = mMetaDataProvider.getInstanceClassType(instance); + ScopedMetaData meta(mMetaDataProvider); + PropertyMessageDescription desc(meta->findPropertyMessage(msgName)); + bool retval = meta->isDerivedFrom(clsId, desc.mClassId); + return retval; + } + +#endif + + DataRef<const uint8_t> bufferPropertyMessage(const PropertyMessageDescription& desc, DataRef<const uint8_t> data) + { + if(desc.mStringOffsets.size()) + { + mSPVBuffer.clear(); + mSPVBuffer.write(data.begin(), data.size()); + PVD_FOREACH(idx, desc.mStringOffsets.size()) + { + const char* strPtr; + physx::intrinsics::memCopy(&strPtr, data.begin() + desc.mStringOffsets[idx], sizeof(char*)); + strPtr = nonNull(strPtr); + uint32_t len = safeStrLen(strPtr) + 1; + mSPVBuffer.write(strPtr, len); + } + data = DataRef<const uint8_t>(mSPVBuffer.begin(), mSPVBuffer.end()); + } + return data; + } + + virtual PvdError setPropertyMessage(const void* instance, const NamespacedName& msgName, DataRef<const uint8_t> data) + { + ScopedMetaData meta(mMetaDataProvider); + PX_ASSERT(isInstanceValid(instance)); +#if PX_DEBUG + PX_ASSERT(messageExists(msgName)); + PX_ASSERT(checkPropertyMessage(instance, msgName)); +#endif + PropertyMessageDescription desc(meta->findPropertyMessage(msgName)); + if(data.size() < desc.mMessageByteSize) + { + PX_ASSERT(false); + return PvdErrorType::ArgumentError; + } + data = bufferPropertyMessage(desc, data); + PX_ASSERT(mStreamState == DataStreamState::Open); + return boolToError(handlePvdEvent(SetPropertyMessage(toStream(instance), toStream(msgName), data))); + } + +#if PX_DEBUG + + bool checkBeginPropertyMessageGroup(const NamespacedName& msgName) + { + ScopedMetaData meta(mMetaDataProvider); + PropertyMessageDescription desc(meta->findPropertyMessage(msgName)); + return desc.mStringOffsets.size() == 0; + } + +#endif + // If you need to send of lot of identical messages, this avoids a hashtable lookup per message. + virtual PvdError beginPropertyMessageGroup(const NamespacedName& msgName) + { +#if PX_DEBUG + PX_ASSERT(messageExists(msgName)); + PX_ASSERT(checkBeginPropertyMessageGroup(msgName)); +#endif + PX_ASSERT(mStreamState == DataStreamState::Open); + mStreamState = DataStreamState::PropertyMessageGroup; + ScopedMetaData meta(mMetaDataProvider); + mMessageDesc = meta->findPropertyMessage(msgName); + return boolToError(handlePvdEvent(BeginPropertyMessageGroup(toStream(msgName)))); + } + + virtual PvdError sendPropertyMessageFromGroup(const void* instance, DataRef<const uint8_t> data) + { + PX_ASSERT(mStreamState == DataStreamState::PropertyMessageGroup); + PX_ASSERT(isInstanceValid(instance)); +#if PX_DEBUG + PX_ASSERT(checkPropertyMessage(instance, mMessageDesc.mMessageName)); +#endif + if(mMessageDesc.mMessageByteSize != data.size()) + { + PX_ASSERT(false); + return PvdErrorType::ArgumentError; + } + if(data.size() < mMessageDesc.mMessageByteSize) + return PvdErrorType::ArgumentError; + data = bufferPropertyMessage(mMessageDesc, data); + return boolToError(handlePvdEvent(SendPropertyMessageFromGroup(toStream(instance), data))); + } + virtual PvdError endPropertyMessageGroup() + { + PX_ASSERT(mStreamState == DataStreamState::PropertyMessageGroup); + mStreamState = DataStreamState::Open; + return boolToError(handlePvdEvent(EndPropertyMessageGroup())); + } + virtual PvdError pushBackObjectRef(const void* instance, String propName, const void* data) + { + PX_ASSERT(isInstanceValid(instance)); + PX_ASSERT(isInstanceValid(data)); + PX_ASSERT(mStreamState == DataStreamState::Open); + return boolToError(handlePvdEvent(PushBackObjectRef(toStream(instance), toStream(propName), toStream(data)))); + } + virtual PvdError removeObjectRef(const void* instance, String propName, const void* data) + { + PX_ASSERT(isInstanceValid(instance)); + PX_ASSERT(isInstanceValid(data)); + PX_ASSERT(mStreamState == DataStreamState::Open); + return boolToError(handlePvdEvent(RemoveObjectRef(toStream(instance), toStream(propName), toStream(data)))); + } + // Instance elimination. + virtual PvdError destroyInstance(const void* instance) + { + PX_ASSERT(isInstanceValid(instance)); + PX_ASSERT(mStreamState == DataStreamState::Open); + mMetaDataProvider.destroyInstance(instance); + return boolToError(handlePvdEvent(DestroyInstance(toStream(instance)))); + } + + // Profiling hooks + virtual PvdError beginSection(const void* instance, String name) + { + PX_ASSERT(mStreamState == DataStreamState::Open); + return boolToError(handlePvdEvent( + BeginSection(toStream(instance), toStream(name), Time::getCurrentCounterValue()))); + } + + virtual PvdError endSection(const void* instance, String name) + { + PX_ASSERT(mStreamState == DataStreamState::Open); + return boolToError(handlePvdEvent( + EndSection(toStream(instance), toStream(name), Time::getCurrentCounterValue()))); + } + + virtual PvdError originShift(const void* scene, PxVec3 shift) + { + PX_ASSERT(mStreamState == DataStreamState::Open); + return boolToError(handlePvdEvent(OriginShift(toStream(scene), shift))); + } + + virtual void addProfileZone(void* zone, const char* name) + { + handlePvdEvent(AddProfileZone(toStream(zone), name)); + } + virtual void addProfileZoneEvent(void* zone, const char* name, uint16_t eventId, bool compileTimeEnabled) + { + handlePvdEvent(AddProfileZoneEvent(toStream(zone), name, eventId, compileTimeEnabled)); + } + + // add a variable sized event + void addEvent(const EventSerializeable& evt, PvdCommStreamEventTypes::Enum evtType) + { + MeasureStream measure; + PvdCommStreamEventSink::writeStreamEvent(evt, evtType, measure); + EventGroup evtGroup(measure.mSize, 1, mStreamId, Time::getCurrentCounterValue()); + EventStreamifier<PxPvdTransport> streamifier(mTransport.lock()); + evtGroup.serialize(streamifier); + PvdCommStreamEventSink::writeStreamEvent(evt, evtType, mTransport); + mTransport.unlock(); + } + + void setIsTopLevelUIElement(const void* instance, bool topLevel) + { + addEvent(SetIsTopLevel(static_cast<uint64_t>(reinterpret_cast<size_t>(instance)), topLevel), + getCommStreamEventType<SetIsTopLevel>()); + } + + void sendErrorMessage(uint32_t code, const char* message, const char* file, uint32_t line) + { + addEvent(ErrorMessage(code, message, file, line), getCommStreamEventType<ErrorMessage>()); + } + + void updateCamera(const char* name, const PxVec3& origin, const PxVec3& up, const PxVec3& target) + { + addEvent(SetCamera(name, origin, up, target), getCommStreamEventType<SetCamera>()); + } + + template <typename TEventType> + bool handlePvdEvent(const TEventType& evt) + { + addEvent(evt, getCommStreamEventType<TEventType>()); + return mConnected; + } + + virtual PvdPropertyDefinitionHelper& getPropertyDefinitionHelper() + { + mPropertyDefinitionHelper.clearBufferedData(); + return mPropertyDefinitionHelper; + } + + virtual bool isConnected() + { + return mConnected; + } + + virtual void* allocateMemForCmd(uint32_t length) + { + return mPvdCommandPool.allocate(length); + } + + virtual void pushPvdCommand(PvdCommand& cmd) + { + mPvdCommandArray.pushBack(&cmd); + } + + virtual void flushPvdCommand() + { + uint32_t cmdQueueSize = mPvdCommandArray.size(); + for(uint32_t i = 0; i < cmdQueueSize; i++) + { + if(mPvdCommandArray[i]) + { + // if(mPvdCommandArray[i]->canRun(*this)) + mPvdCommandArray[i]->run(*this); + mPvdCommandArray[i]->~PvdCommand(); + } + } + mPvdCommandArray.clear(); + mPvdCommandPool.clear(); + } + + PX_NOCOPY(PvdOutStream) +}; +} + +PvdDataStream* PvdDataStream::create(PxPvd* pvd) +{ + if(pvd == NULL) + { + getFoundation().error(PxErrorCode::eINVALID_PARAMETER, __FILE__, __LINE__, "PvdDataStream::create - pvd must be non-NULL!"); + return NULL; + } + + PvdImpl* pvdImpl = static_cast<PvdImpl*>(pvd); + return PVD_NEW(PvdOutStream)(*pvdImpl->getTransport(), pvdImpl->getMetaDataProvider(), pvdImpl->getNextStreamId()); +} diff --git a/PxShared/src/pvd/src/PxPvdDefaultFileTransport.cpp b/PxShared/src/pvd/src/PxPvdDefaultFileTransport.cpp new file mode 100644 index 0000000..e3499a6 --- /dev/null +++ b/PxShared/src/pvd/src/PxPvdDefaultFileTransport.cpp @@ -0,0 +1,123 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "pvd/PxPvdTransport.h" +#include "foundation/PxAssert.h" + +#include "PxPvdDefaultFileTransport.h" + +namespace physx +{ +namespace pvdsdk +{ + +PvdDefaultFileTransport::PvdDefaultFileTransport(const char* name) : mConnected(false), mWrittenData(0), mLocked(false) +{ + mFileBuffer = PX_NEW(PsFileBuffer)(name, PxFileBuf::OPEN_WRITE_ONLY); +} + +PvdDefaultFileTransport::~PvdDefaultFileTransport() +{ +} + +bool PvdDefaultFileTransport::connect() +{ + PX_ASSERT(mFileBuffer); + mConnected = mFileBuffer->isOpen(); + return mConnected; +} + +void PvdDefaultFileTransport::disconnect() +{ + mConnected = false; +} + +bool PvdDefaultFileTransport::isConnected() +{ + return mConnected; +} + +bool PvdDefaultFileTransport::write(const uint8_t* inBytes, uint32_t inLength) +{ + PX_ASSERT(mLocked); + PX_ASSERT(mFileBuffer); + if (mConnected) + { + uint32_t len = mFileBuffer->write(inBytes, inLength); + mWrittenData += len; + return len == inLength; + } + else + return false; +} + +PxPvdTransport& PvdDefaultFileTransport::lock() +{ + mMutex.lock(); + PX_ASSERT(!mLocked); + mLocked = true; + return *this; +} + +void PvdDefaultFileTransport::unlock() +{ + PX_ASSERT(mLocked); + mLocked = false; + mMutex.unlock(); +} + +void PvdDefaultFileTransport::flush() +{ +} + +uint64_t PvdDefaultFileTransport::getWrittenDataSize() +{ + return mWrittenData; +} + +void PvdDefaultFileTransport::release() +{ + if (mFileBuffer) + { + mFileBuffer->close(); + delete mFileBuffer; + } + mFileBuffer = NULL; + PX_DELETE(this); +} + +} // namespace pvdsdk + +PxPvdTransport* PxDefaultPvdFileTransportCreate(const char* name) +{ + return PX_NEW(pvdsdk::PvdDefaultFileTransport)(name); +} + +} // namespace physx + diff --git a/PxShared/src/pvd/src/PxPvdDefaultFileTransport.h b/PxShared/src/pvd/src/PxPvdDefaultFileTransport.h new file mode 100644 index 0000000..9f4166f --- /dev/null +++ b/PxShared/src/pvd/src/PxPvdDefaultFileTransport.h @@ -0,0 +1,77 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PXPVDSDK_PXPVDDEFAULTFILETRANSPORT_H +#define PXPVDSDK_PXPVDDEFAULTFILETRANSPORT_H + +#include "pvd/PxPvdTransport.h" + +#include "PsUserAllocated.h" +#include "PsFileBuffer.h" +#include "PsMutex.h" + +namespace physx +{ +namespace pvdsdk +{ + +class PvdDefaultFileTransport : public physx::PxPvdTransport, public physx::shdfnd::UserAllocated +{ + PX_NOCOPY(PvdDefaultFileTransport) + public: + PvdDefaultFileTransport(const char* name); + virtual ~PvdDefaultFileTransport(); + + virtual bool connect(); + virtual void disconnect(); + virtual bool isConnected(); + + virtual bool write(const uint8_t* inBytes, uint32_t inLength); + + virtual PxPvdTransport& lock(); + virtual void unlock(); + + virtual void flush(); + + virtual uint64_t getWrittenDataSize(); + + virtual void release(); + + private: + physx::PsFileBuffer* mFileBuffer; + bool mConnected; + uint64_t mWrittenData; + physx::shdfnd::Mutex mMutex; + bool mLocked; // for debug, remove it when finished +}; + +} // pvdsdk +} // physx + +#endif // PXPVDSDK_PXPVDDEFAULTFILETRANSPORT_H diff --git a/PxShared/src/pvd/src/PxPvdDefaultSocketTransport.cpp b/PxShared/src/pvd/src/PxPvdDefaultSocketTransport.cpp new file mode 100644 index 0000000..48b94b1 --- /dev/null +++ b/PxShared/src/pvd/src/PxPvdDefaultSocketTransport.cpp @@ -0,0 +1,136 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "pvd/PxPvdTransport.h" + +#include "PxPvdDefaultSocketTransport.h" + +namespace physx +{ +namespace pvdsdk +{ +PvdDefaultSocketTransport::PvdDefaultSocketTransport(const char* host, int port, unsigned int timeoutInMilliseconds) +: mHost(host), mPort(uint16_t(port)), mTimeout(timeoutInMilliseconds), mConnected(false), mWrittenData(0) +{ +} + +PvdDefaultSocketTransport::~PvdDefaultSocketTransport() +{ +} + +bool PvdDefaultSocketTransport::connect() +{ + if(mConnected) + return true; + + if(mSocket.connect(mHost, mPort, mTimeout)) + { + mSocket.setBlocking(true); + mConnected = true; + } + return mConnected; +} + +void PvdDefaultSocketTransport::disconnect() +{ + mSocket.flush(); + mSocket.disconnect(); + mConnected = false; +} + +bool PvdDefaultSocketTransport::isConnected() +{ + return mSocket.isConnected(); +} + +bool PvdDefaultSocketTransport::write(const uint8_t* inBytes, uint32_t inLength) +{ + if(mConnected) + { + if(inLength == 0) + return true; + + uint32_t amountWritten = 0; + uint32_t totalWritten = 0; + do + { + // Sockets don't have to write as much as requested, so we need + // to wrap this call in a do/while loop. + // If they don't write any bytes then we consider them disconnected. + amountWritten = mSocket.write(inBytes, inLength); + inLength -= amountWritten; + inBytes += amountWritten; + totalWritten += amountWritten; + } while(inLength && amountWritten); + + if(amountWritten == 0) + return false; + + mWrittenData += totalWritten; + + return true; + } + else + return false; +} + +PxPvdTransport& PvdDefaultSocketTransport::lock() +{ + mMutex.lock(); + return *this; +} + +void PvdDefaultSocketTransport::unlock() +{ + mMutex.unlock(); +} + +void PvdDefaultSocketTransport::flush() +{ + mSocket.flush(); +} + +uint64_t PvdDefaultSocketTransport::getWrittenDataSize() +{ + return mWrittenData; +} + +void PvdDefaultSocketTransport::release() +{ + PX_DELETE(this); +} + +} // namespace pvdsdk + +PxPvdTransport* PxDefaultPvdSocketTransportCreate(const char* host, int port, unsigned int timeoutInMilliseconds) +{ + return PX_NEW(pvdsdk::PvdDefaultSocketTransport)(host, port, timeoutInMilliseconds); +} + +} // namespace physx diff --git a/PxShared/src/pvd/src/PxPvdDefaultSocketTransport.h b/PxShared/src/pvd/src/PxPvdDefaultSocketTransport.h new file mode 100644 index 0000000..b02b934 --- /dev/null +++ b/PxShared/src/pvd/src/PxPvdDefaultSocketTransport.h @@ -0,0 +1,79 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PXPVDSDK_PXPVDDEFAULTSOCKETTRANSPORT_H +#define PXPVDSDK_PXPVDDEFAULTSOCKETTRANSPORT_H + +#include "pvd/PxPvdTransport.h" + +#include "PsUserAllocated.h" +#include "PsSocket.h" +#include "PsMutex.h" + +namespace physx +{ +namespace pvdsdk +{ +class PvdDefaultSocketTransport : public PxPvdTransport, public shdfnd::UserAllocated +{ + PX_NOCOPY(PvdDefaultSocketTransport) + public: + PvdDefaultSocketTransport(const char* host, int port, unsigned int timeoutInMilliseconds); + virtual ~PvdDefaultSocketTransport(); + + virtual bool connect(); + virtual void disconnect(); + virtual bool isConnected(); + + virtual bool write(const uint8_t* inBytes, uint32_t inLength); + + virtual void flush(); + + virtual PxPvdTransport& lock(); + virtual void unlock(); + + virtual uint64_t getWrittenDataSize(); + + virtual void release(); + + private: + shdfnd::Socket mSocket; + const char* mHost; + uint16_t mPort; + unsigned int mTimeout; + bool mConnected; + uint64_t mWrittenData; + shdfnd::Mutex mMutex; + bool mlocked; +}; + +} // pvdsdk +} // physx + +#endif // PXPVDSDK_PXPVDDEFAULTSOCKETTRANSPORT_H diff --git a/PxShared/src/pvd/src/PxPvdFoundation.h b/PxShared/src/pvd/src/PxPvdFoundation.h new file mode 100644 index 0000000..90fc77f --- /dev/null +++ b/PxShared/src/pvd/src/PxPvdFoundation.h @@ -0,0 +1,504 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PXPVDSDK_PXPVDFOUNDATION_H +#define PXPVDSDK_PXPVDFOUNDATION_H + +#include "foundation/PxVec3.h" +#include "foundation/PxTransform.h" +#include "foundation/PxBounds3.h" + +#include "PsArray.h" +#include "PsHashMap.h" +#include "PsHashSet.h" +#include "PsPool.h" +#include "PsString.h" + +#include "PxPvdObjectModelBaseTypes.h" + +namespace physx +{ +namespace pvdsdk +{ + +extern PxAllocatorCallback* gPvdAllocatorCallback; + +class ForwardingAllocator : public PxAllocatorCallback +{ + void* allocate(size_t size, const char* typeName, const char* filename, int line) + { + return shdfnd::getAllocator().allocate(size, typeName, filename, line); + } + void deallocate(void* ptr) + { + shdfnd::getAllocator().deallocate(ptr); + } +}; + +class RawMemoryBuffer +{ + uint8_t* mBegin; + uint8_t* mEnd; + uint8_t* mCapacityEnd; + const char* mBufDataName; + + public: + RawMemoryBuffer(const char* name) : mBegin(0), mEnd(0), mCapacityEnd(0),mBufDataName(name) + { + PX_UNUSED(mBufDataName); + } + ~RawMemoryBuffer() + { + if(mBegin) + PX_FREE(mBegin); + } + uint32_t size() const + { + return static_cast<uint32_t>(mEnd - mBegin); + } + uint32_t capacity() const + { + return static_cast<uint32_t>(mCapacityEnd - mBegin); + } + uint8_t* begin() + { + return mBegin; + } + uint8_t* end() + { + return mEnd; + } + const uint8_t* begin() const + { + return mBegin; + } + const uint8_t* end() const + { + return mEnd; + } + void clear() + { + mEnd = mBegin; + } + const char* cStr() + { + if(mEnd && (*mEnd != 0)) + write(0); + return reinterpret_cast<const char*>(mBegin); + } + uint32_t write(uint8_t inValue) + { + *growBuf(1) = inValue; + return 1; + } + + template <typename TDataType> + uint32_t write(const TDataType& inValue) + { + const uint8_t* __restrict readPtr = reinterpret_cast<const uint8_t*>(&inValue); + uint8_t* __restrict writePtr = growBuf(sizeof(TDataType)); + for(uint32_t idx = 0; idx < sizeof(TDataType); ++idx) + writePtr[idx] = readPtr[idx]; + return sizeof(TDataType); + } + + template <typename TDataType> + uint32_t write(const TDataType* inValue, uint32_t inLength) + { + uint32_t writeSize = inLength * sizeof(TDataType); + if(inValue && inLength) + { + physx::intrinsics::memCopy(growBuf(writeSize), inValue, writeSize); + } + if(inLength && !inValue) + { + PX_ASSERT(false); + // You can't not write something, because that will cause + // the receiving end to crash. + for(uint32_t idx = 0; idx < writeSize; ++idx) + write(0); + } + return writeSize; + } + + uint8_t* growBuf(uint32_t inAmount) + { + uint32_t offset = size(); + uint32_t newSize = offset + inAmount; + reserve(newSize); + mEnd += inAmount; + return mBegin + offset; + } + void writeZeros(uint32_t inAmount) + { + uint32_t offset = size(); + growBuf(inAmount); + physx::intrinsics::memZero(begin() + offset, inAmount); + } + void reserve(uint32_t newSize) + { + uint32_t currentSize = size(); + if(newSize && newSize >= capacity()) + { + uint32_t newDataSize = newSize > 4096 ? newSize + (newSize >> 2) : newSize*2; + uint8_t* newData = static_cast<uint8_t*>(PX_ALLOC(newDataSize, mBufDataName)); + if(mBegin) + { + physx::intrinsics::memCopy(newData, mBegin, currentSize); + PX_FREE(mBegin); + } + mBegin = newData; + mEnd = mBegin + currentSize; + mCapacityEnd = mBegin + newDataSize; + } + } +}; + +struct ForwardingMemoryBuffer : public RawMemoryBuffer +{ + ForwardingMemoryBuffer(const char* bufDataName) : RawMemoryBuffer(bufDataName) + { + } + + ForwardingMemoryBuffer& operator<<(const char* inString) + { + if(inString && *inString) + { + uint32_t len = static_cast<uint32_t>(strlen(inString)); + write(inString, len); + } + return *this; + } + + template <typename TDataType> + inline ForwardingMemoryBuffer& toStream(const char* inFormat, const TDataType inData) + { + char buffer[128] = { 0 }; + shdfnd::snprintf(buffer, 128, inFormat, inData); + *this << buffer; + return *this; + } + + inline ForwardingMemoryBuffer& operator<<(bool inData) + { + *this << (inData ? "true" : "false"); + return *this; + } + inline ForwardingMemoryBuffer& operator<<(int32_t inData) + { + return toStream("%d", inData); + } + inline ForwardingMemoryBuffer& operator<<(uint16_t inData) + { + return toStream("%u", uint32_t(inData)); + } + inline ForwardingMemoryBuffer& operator<<(uint8_t inData) + { + return toStream("%u", uint32_t(inData)); + } + inline ForwardingMemoryBuffer& operator<<(char inData) + { + return toStream("%c", inData); + } + inline ForwardingMemoryBuffer& operator<<(uint32_t inData) + { + return toStream("%u", inData); + } + inline ForwardingMemoryBuffer& operator<<(uint64_t inData) + { + return toStream("%I64u", inData); + } + inline ForwardingMemoryBuffer& operator<<(int64_t inData) + { + return toStream("%I64d", inData); + } + inline ForwardingMemoryBuffer& operator<<(const void* inData) + { + return *this << static_cast<uint64_t>(reinterpret_cast<size_t>(inData)); + } + inline ForwardingMemoryBuffer& operator<<(float inData) + { + return toStream("%g", double(inData)); + } + inline ForwardingMemoryBuffer& operator<<(double inData) + { + return toStream("%g", inData); + } + inline ForwardingMemoryBuffer& operator<<(const PxVec3& inData) + { + *this << inData[0]; + *this << " "; + *this << inData[1]; + *this << " "; + *this << inData[2]; + return *this; + } + + inline ForwardingMemoryBuffer& operator<<(const PxQuat& inData) + { + *this << inData.x; + *this << " "; + *this << inData.y; + *this << " "; + *this << inData.z; + *this << " "; + *this << inData.w; + return *this; + } + + inline ForwardingMemoryBuffer& operator<<(const PxTransform& inData) + { + *this << inData.q; + *this << " "; + *this << inData.p; + return *this; + } + + inline ForwardingMemoryBuffer& operator<<(const PxBounds3& inData) + { + *this << inData.minimum; + *this << " "; + *this << inData.maximum; + return *this; + } + +}; + +template <typename TObjectType, typename TGetSetIndexOp, typename TSetSetIndexOp> +class InvasiveSet +{ + shdfnd::Array<TObjectType*> mSet; + + InvasiveSet(const InvasiveSet& other); + InvasiveSet& operator=(const InvasiveSet& other); + + public: + InvasiveSet(const char* allocName) : mSet(allocName) + { + } + + bool insert(TObjectType& inObject) + { + uint32_t currentIdx = TGetSetIndexOp()(inObject); + if(currentIdx == UINT32_MAX) + { + TSetSetIndexOp()(inObject, mSet.size()); + mSet.pushBack(&inObject); + return true; + } + return false; + } + + bool remove(TObjectType& inObject) + { + uint32_t currentIdx = TGetSetIndexOp()(inObject); + if(currentIdx != UINT32_MAX) + { + TObjectType* theEnd = mSet.back(); + TObjectType* theObj = &inObject; + if(theEnd != theObj) + { + TSetSetIndexOp()(*theEnd, currentIdx); + mSet[currentIdx] = theEnd; + } + mSet.popBack(); + TSetSetIndexOp()(inObject, UINT32_MAX); + return true; + } + return false; + } + + bool contains(TObjectType& inObject) + { + return TGetSetIndexOp()(inObject) != UINT32_MAX; + } + + void clear() + { + for(uint32_t idx = 0; idx < mSet.size(); ++idx) + TSetSetIndexOp()(*(mSet[idx]), UINT32_MAX); + mSet.clear(); + } + + TObjectType* operator[](uint32_t idx) + { + return mSet[idx]; + } + const TObjectType* operator[](uint32_t idx) const + { + return mSet[idx]; + } + uint32_t size() const + { + return mSet.size(); + } + TObjectType** begin() + { + return mSet.begin(); + } + TObjectType** end() + { + return mSet.end(); + } + const TObjectType** begin() const + { + return mSet.begin(); + } + const TObjectType** end() const + { + return mSet.end(); + } + const TObjectType* back() const + { + return mSet.back(); + } + TObjectType* back() + { + return mSet.back(); + } +}; + +template <typename TDataType> +inline void* PvdAllocate(const char* typeName, const char* file, int line) +{ + PX_ASSERT(gPvdAllocatorCallback); + return gPvdAllocatorCallback->allocate(sizeof(TDataType), typeName, file, line); +} + +template <typename TDataType> +inline void PvdDeleteAndDeallocate(TDataType* inDType) +{ + PX_ASSERT(gPvdAllocatorCallback); + if(inDType) + { + inDType->~TDataType(); + gPvdAllocatorCallback->deallocate(inDType); + } +} +} +} + +#define PVD_NEW(dtype) new (PvdAllocate<dtype>(#dtype, __FILE__, __LINE__)) dtype +#define PVD_DELETE(obj) PvdDeleteAndDeallocate(obj); +//#define PVD_NEW(dtype) PX_NEW(dtype) +//#define PVD_DELETE(obj) PX_DELETE(obj) +#define PVD_FOREACH(varname, stop) for(uint32_t varname = 0; varname < stop; ++varname) + +namespace physx +{ +namespace pvdsdk +{ + +template <typename TKeyType, typename TValueType, typename THashType, typename TBufType, typename TOperator> +uint32_t getMapKeysOp(shdfnd::HashMap<TKeyType, TValueType, THashType>& map, TBufType* buffer, uint32_t bufSize, + uint32_t startIdx, TOperator op) +{ + uint32_t numItems = static_cast<uint32_t>(map.size()); + if(numItems == 0 || bufSize == 0) + return 0; + + startIdx = PxMin(numItems - 1, startIdx); + uint32_t retval = 0; + for(typename shdfnd::HashMap<TKeyType, TValueType, THashType>::Iterator iter = map.getIterator(); + iter.done() == false && bufSize; ++iter) + { + if(startIdx) + --startIdx; + else + { + buffer[retval] = op(iter->first); + --bufSize; + ++retval; + } + } + return retval; +} + +struct IdOp +{ + template <typename TDataType> + TDataType operator()(const TDataType& item) + { + return item; + } +}; + +template <typename TKeyType, typename TValueType, typename THashType> +uint32_t getMapKeys(shdfnd::HashMap<TKeyType, TValueType, THashType>& map, TKeyType* buffer, uint32_t bufSize, uint32_t startIdx) +{ + return getMapKeysOp(map, buffer, bufSize, startIdx, IdOp()); +} + +struct DerefOp +{ + template <typename TDataType> + TDataType operator()(const TDataType* item) + { + return *item; + } +}; + +template <typename TKeyType, typename TValueType, typename TBufType, typename TOp> +uint32_t getMapValues(shdfnd::HashMap<TKeyType, TValueType>& map, TBufType* buffer, uint32_t bufSize, uint32_t startIdx, TOp op) +{ + uint32_t numItems = static_cast<uint32_t>(map.size()); + if(numItems == 0 || bufSize == 0) + return 0; + + startIdx = PxMin(numItems - 1, startIdx); + uint32_t retval = 0; + for(typename shdfnd::HashMap<TKeyType, TValueType>::Iterator iter = map.getIterator(); iter.done() == false && bufSize; ++iter) + { + if(startIdx) + --startIdx; + else + { + buffer[retval] = op(iter->second); + --bufSize; + ++retval; + } + } + return retval; +} + +template <typename TValueType, typename TBufType> +uint32_t getArrayEntries(shdfnd::Array<TValueType>& data, TBufType* buffer, uint32_t bufSize, uint32_t startIdx) +{ + uint32_t numItems = static_cast<uint32_t>(data.size()); + if(numItems == 0 || bufSize == 0) + return 0; + + startIdx = PxMin(numItems - 1, startIdx); + uint32_t available = PxMin(numItems - startIdx, bufSize); + PVD_FOREACH(idx, available) + buffer[idx] = data[idx + startIdx]; + return available; +} +#define PVD_POINTER_TO_U64(ptr) static_cast<uint64_t>(reinterpret_cast<size_t>(ptr)) +} +} +#endif // PXPVDSDK_PXPVDFOUNDATION_H diff --git a/PxShared/src/pvd/src/PxPvdImpl.cpp b/PxShared/src/pvd/src/PxPvdImpl.cpp new file mode 100644 index 0000000..9fa82a0 --- /dev/null +++ b/PxShared/src/pvd/src/PxPvdImpl.cpp @@ -0,0 +1,405 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "pvd/PxPvdTransport.h" + +#include "PxPvdImpl.h" +#include "PxPvdFoundation.h" +#include "PxPvdClient.h" +#include "PxPvdMemClient.h" +#include "PxPvdProfileZoneClient.h" +#include "PxPvdCommStreamTypes.h" +#include "PxProfileZoneManager.h" +#include "PxProfileZone.h" + +#include "PsFoundation.h" + +#if PX_NVTX +#include "nvToolsExt.h" +#endif + +namespace +{ + const char* gSdkName = "PhysXSDK"; +} + +namespace physx +{ +namespace pvdsdk +{ + +class CmEventNameProvider : public physx::profile::PxProfileNameProvider +{ +public: + physx::profile::PxProfileNames getProfileNames() const + { + physx::profile::PxProfileNames ret; + ret.eventCount = 0; + return ret; + } +}; + +CmEventNameProvider gProfileNameProvider; + +void initializeModelTypes(PvdDataStream& stream) +{ + stream.createClass<profile::PxProfileZone>(); + stream.createProperty<profile::PxProfileZone, uint8_t>( + "events", PvdCommStreamEmbeddedTypes::getProfileEventStreamSemantic(), PropertyType::Array); + + stream.createClass<profile::PxProfileMemoryEventBuffer>(); + stream.createProperty<profile::PxProfileMemoryEventBuffer, uint8_t>( + "events", PvdCommStreamEmbeddedTypes::getMemoryEventStreamSemantic(), PropertyType::Array); + + stream.createClass<PvdUserRenderer>(); + stream.createProperty<PvdUserRenderer, uint8_t>( + "events", PvdCommStreamEmbeddedTypes::getRendererEventStreamSemantic(), PropertyType::Array); +} + +PvdImpl* PvdImpl::sInstance = NULL; +uint32_t PvdImpl::sRefCount = 0; + +PvdImpl::PvdImpl() +: mPvdTransport(NULL) +, mSharedMetaProvider(NULL) +, mMemClient(NULL) +, mIsConnected(false) +, mIsNVTXSupportEnabled(true) +, mNVTXContext(0) +, mNextStreamId(1) +, mProfileClient(NULL) +, mProfileZone(NULL) +{ + mProfileZoneManager = &physx::profile::PxProfileZoneManager::createProfileZoneManager(&physx::shdfnd::getAllocator()); + mProfileClient = PVD_NEW(PvdProfileZoneClient)(*this); +} + +PvdImpl::~PvdImpl() +{ + if((mFlags & PxPvdInstrumentationFlag::ePROFILE) ) + { + PxSetProfilerCallback(NULL); + } + + disconnect(); + + if ( mProfileZoneManager ) + { + mProfileZoneManager->release(); + mProfileZoneManager = NULL; + } + + PVD_DELETE(mProfileClient); + mProfileClient = NULL; +} + +bool PvdImpl::connect(PxPvdTransport& transport, PxPvdInstrumentationFlags flags) +{ + if(mIsConnected) + { + physx::shdfnd::getFoundation().error(PxErrorCode::eINVALID_PARAMETER, __FILE__, __LINE__, "PxPvd::connect - recall connect! Should call disconnect before re-connect."); + return false; + } + + mFlags = flags; + mPvdTransport = &transport; + + mIsConnected = mPvdTransport->connect(); + + if(mIsConnected) + { + mSharedMetaProvider = PVD_NEW(MetaDataProvider); + sendTransportInitialization(); + + PvdDataStream* stream = PvdDataStream::create(this); + initializeModelTypes(*stream); + stream->release(); + + if(mFlags & PxPvdInstrumentationFlag::eMEMORY) + { + mMemClient = PVD_NEW(PvdMemClient)(*this); + mPvdClients.pushBack(mMemClient); + } + + if((mFlags & PxPvdInstrumentationFlag::ePROFILE) && mProfileZoneManager) + { + mPvdClients.pushBack(mProfileClient); + mProfileZone = &physx::profile::PxProfileZone::createProfileZone(&physx::shdfnd::getAllocator(),gSdkName,gProfileNameProvider.getProfileNames()); + } + + for(uint32_t i = 0; i < mPvdClients.size(); i++) + mPvdClients[i]->onPvdConnected(); + + if (mProfileZone) + { + mProfileZoneManager->addProfileZoneHandler(*mProfileClient); + mProfileZoneManager->addProfileZone( *mProfileZone ); + } + + if ((mFlags & PxPvdInstrumentationFlag::ePROFILE)) + { + PxSetProfilerCallback(this); + } + } + return mIsConnected; +} + +void PvdImpl::disconnect() +{ + if(mProfileZone) + { + mProfileZoneManager->removeProfileZoneHandler(*mProfileClient); + mProfileZoneManager->removeProfileZone( *mProfileZone ); + mProfileZone->release(); + mProfileZone=NULL; + removeClient(mProfileClient); + } + + if(mIsConnected) + { + for(uint32_t i = 0; i < mPvdClients.size(); i++) + mPvdClients[i]->onPvdDisconnected(); + + if(mMemClient) + { + removeClient(mMemClient); + PvdMemClient* tmp = mMemClient; //avoid tracking deallocation itsself + mMemClient = NULL; + PVD_DELETE(tmp); + } + + mSharedMetaProvider->release(); + mPvdTransport->disconnect(); + mObjectRegistrar.clear(); + mIsConnected = false; + } +} + +void PvdImpl::flush() +{ + for(uint32_t i = 0; i < mPvdClients.size(); i++) + mPvdClients[i]->flush(); + if ( mProfileZone ) + { + mProfileZone->flushEventIdNameMap(); + mProfileZone->flushProfileEvents(); + } +} + +bool PvdImpl::isConnected(bool useCachedStatus) +{ + if(mPvdTransport) + return useCachedStatus ? mIsConnected : mPvdTransport->isConnected(); + else + return false; +} + +PxPvdTransport* PvdImpl::getTransport() +{ + return mPvdTransport; +} + +PxPvdInstrumentationFlags PvdImpl::getInstrumentationFlags() +{ + return mFlags; +} + +void PvdImpl::sendTransportInitialization() +{ + StreamInitialization init; + EventStreamifier<PxPvdTransport> stream(mPvdTransport->lock()); + init.serialize(stream); + mPvdTransport->unlock(); +} + +void PvdImpl::addClient(PvdClient* client) +{ + PX_ASSERT(client); + for(uint32_t i = 0; i < mPvdClients.size(); i++) + { + if(client == mPvdClients[i]) + return; + } + mPvdClients.pushBack(client); + if(mIsConnected) + { + client->onPvdConnected(); + } +} + +void PvdImpl::removeClient(PvdClient* client) +{ + for(uint32_t i = 0; i < mPvdClients.size(); i++) + { + if(client == mPvdClients[i]) + { + client->onPvdDisconnected(); + mPvdClients.remove(i); + } + } +} + +void PvdImpl::onAllocation(size_t inSize, const char* inType, const char* inFile, int inLine, void* inAddr) +{ + if(mMemClient) + mMemClient->onAllocation(inSize, inType, inFile, inLine, inAddr); +} + +void PvdImpl::onDeallocation(void* inAddr) +{ + if(mMemClient) + mMemClient->onDeallocation(inAddr); +} + +PvdOMMetaDataProvider& PvdImpl::getMetaDataProvider() +{ + return *mSharedMetaProvider; +} + +bool PvdImpl::registerObject(const void* inItem) +{ + return mObjectRegistrar.addItem(inItem); +} + + +bool PvdImpl::unRegisterObject(const void* inItem) +{ + return mObjectRegistrar.decItem(inItem); +} + +uint64_t PvdImpl::getNextStreamId() +{ + uint64_t retval = ++mNextStreamId; + return retval; +} + +bool PvdImpl::initialize() +{ + if(0 == sRefCount) + { + sInstance = PVD_NEW(PvdImpl)(); + } + ++sRefCount; + return !!sInstance; +} + +void PvdImpl::release() +{ + if(sRefCount > 0) + { + if(--sRefCount) + return; + + PVD_DELETE(sInstance); + sInstance = NULL; + } +} + +PvdImpl* PvdImpl::getInstance() +{ + return sInstance; +} + + +/************************************************************************************************************************** +Instrumented profiling events +***************************************************************************************************************************/ + +static const uint32_t CrossThreadId = 99999789; + +void* PvdImpl::zoneStart(const char* eventName, bool detached, uint64_t contextId) +{ + if(mProfileZone) + { + const uint16_t id = mProfileZone->getEventIdForName(eventName); + if(detached) + mProfileZone->startEvent(id, contextId, CrossThreadId); + else + mProfileZone->startEvent(id, contextId); + } +#if PX_NVTX + if(mIsNVTXSupportEnabled) + { + if(detached) + { + // TODO : Need to use the nvtxRangeStart API for cross thread events + nvtxEventAttributes_t eventAttrib; + memset(&eventAttrib, 0, sizeof(eventAttrib)); + eventAttrib.version = NVTX_VERSION; + eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; + eventAttrib.colorType = NVTX_COLOR_ARGB; + eventAttrib.color = 0xFF00FF00; + eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; + eventAttrib.message.ascii = eventName; + nvtxMarkEx(&eventAttrib); + } + else + { + nvtxRangePush(eventName); + } + } +#endif + return NULL; +} + +void PvdImpl::zoneEnd(void* /*profilerData*/, const char* eventName, bool detached, uint64_t contextId) +{ + if(mProfileZone) + { + const uint16_t id = mProfileZone->getEventIdForName(eventName); + if(detached) + mProfileZone->stopEvent(id, contextId, CrossThreadId); + else + mProfileZone->stopEvent(id, contextId); + } +#if PX_NVTX + if(mIsNVTXSupportEnabled) + { + if(detached) + { + nvtxEventAttributes_t eventAttrib; + memset(&eventAttrib, 0, sizeof(eventAttrib)); + eventAttrib.version = NVTX_VERSION; + eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; + eventAttrib.colorType = NVTX_COLOR_ARGB; + eventAttrib.color = 0xFFFF0000; + eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; + eventAttrib.message.ascii = eventName; + nvtxMarkEx(&eventAttrib); + } + else + { + nvtxRangePop(); + } + } +#endif +} +} // pvd + +} // physx diff --git a/PxShared/src/pvd/src/PxPvdImpl.h b/PxShared/src/pvd/src/PxPvdImpl.h new file mode 100644 index 0000000..64d4e16 --- /dev/null +++ b/PxShared/src/pvd/src/PxPvdImpl.h @@ -0,0 +1,221 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PXPVDSDK_PXPVDIMPL_H +#define PXPVDSDK_PXPVDIMPL_H + +#include "foundation/PxProfiler.h" + +#include "PsAllocator.h" +#include "PsPvd.h" +#include "PsArray.h" +#include "PsMutex.h" +#include "PxPvdCommStreamTypes.h" +#include "PxPvdFoundation.h" +#include "PxPvdObjectModelMetaData.h" +#include "PxPvdObjectRegistrar.h" + +namespace physx +{ + +namespace profile +{ + class PxProfileZoneManager; +} + +namespace pvdsdk +{ +class PvdMemClient; +class PvdProfileZoneClient; + +struct MetaDataProvider : public PvdOMMetaDataProvider, public shdfnd::UserAllocated +{ + typedef shdfnd::Mutex::ScopedLock TScopedLockType; + typedef shdfnd::HashMap<const void*, int32_t> TInstTypeMap; + PvdObjectModelMetaData& mMetaData; + shdfnd::Mutex mMutex; + uint32_t mRefCount; + TInstTypeMap mTypeMap; + + MetaDataProvider() + : mMetaData(PvdObjectModelMetaData::create()), mRefCount(0), mTypeMap("MetaDataProvider::mTypeMap") + { + mMetaData.addRef(); + } + virtual ~MetaDataProvider() + { + mMetaData.release(); + } + + virtual void addRef() + { + TScopedLockType locker(mMutex); + ++mRefCount; + } + virtual void release() + { + { + TScopedLockType locker(mMutex); + if(mRefCount) + --mRefCount; + } + if(!mRefCount) + PVD_DELETE(this); + } + virtual PvdObjectModelMetaData& lock() + { + mMutex.lock(); + return mMetaData; + } + virtual void unlock() + { + mMutex.unlock(); + } + + virtual bool createInstance(const NamespacedName& clsName, const void* instance) + { + TScopedLockType locker(mMutex); + Option<ClassDescription> cls(mMetaData.findClass(clsName)); + if(cls.hasValue() == false) + return false; + int32_t instType = cls->mClassId; + mTypeMap.insert(instance, instType); + return true; + } + virtual bool isInstanceValid(const void* instance) + { + TScopedLockType locker(mMutex); + ClassDescription classDesc; + bool retval = mTypeMap.find(instance) != NULL; +#if PX_DEBUG + if(retval) + classDesc = mMetaData.getClass(mTypeMap.find(instance)->second); +#endif + return retval; + } + virtual void destroyInstance(const void* instance) + { + { + TScopedLockType locker(mMutex); + mTypeMap.erase(instance); + } + } + virtual int32_t getInstanceClassType(const void* instance) + { + TScopedLockType locker(mMutex); + const TInstTypeMap::Entry* entry = mTypeMap.find(instance); + if(entry) + return entry->second; + return -1; + } + + private: + MetaDataProvider& operator=(const MetaDataProvider&); + MetaDataProvider(const MetaDataProvider&); +}; + +////////////////////////////////////////////////////////////////////////// +/*! +PvdImpl is the realization of PxPvd. +It implements the interface methods and provides richer functionality for advanced users or internal clients (such as +PhysX or APEX), including handler notification for clients. +*/ +////////////////////////////////////////////////////////////////////////// +class PvdImpl : public PsPvd, public shdfnd::UserAllocated +{ + PX_NOCOPY(PvdImpl) + + typedef shdfnd::Mutex::ScopedLock TScopedLockType; + typedef void (PvdImpl::*TAllocationHandler)(size_t size, const char* typeName, const char* filename, int line, + void* allocatedMemory); + typedef void (PvdImpl::*TDeallocationHandler)(void* allocatedMemory); + + public: + PvdImpl(); + virtual ~PvdImpl(); + void release(); + + bool connect(PxPvdTransport& transport, PxPvdInstrumentationFlags flags); + void disconnect(); + bool isConnected(bool useCachedStatus = true); + void flush(); + + PxPvdTransport* getTransport(); + PxPvdInstrumentationFlags getInstrumentationFlags(); + + void addClient(PvdClient* client); + void removeClient(PvdClient* client); + + PvdOMMetaDataProvider& getMetaDataProvider(); + + bool registerObject(const void* inItem); + bool unRegisterObject(const void* inItem); + + //AllocationListener + void onAllocation(size_t size, const char* typeName, const char* filename, int line, void* allocatedMemory); + void onDeallocation(void* addr); + + uint64_t getNextStreamId(); + + static bool initialize(); + static PvdImpl* getInstance(); + + // Profiling + + virtual void* zoneStart(const char* eventName, bool detached, uint64_t contextId); + + virtual void zoneEnd(void* profilerData, const char *eventName, bool detached, uint64_t contextId); + + private: + void sendTransportInitialization(); + + PxPvdTransport* mPvdTransport; + physx::shdfnd::Array<PvdClient*> mPvdClients; + + MetaDataProvider* mSharedMetaProvider; // shared between clients + ObjectRegistrar mObjectRegistrar; + + PvdMemClient* mMemClient; + + PxPvdInstrumentationFlags mFlags; + bool mIsConnected; + bool mIsNVTXSupportEnabled; + uint32_t mNVTXContext; + uint64_t mNextStreamId; + physx::profile::PxProfileZoneManager*mProfileZoneManager; + PvdProfileZoneClient* mProfileClient; + physx::profile::PxProfileZone* mProfileZone; + static PvdImpl* sInstance; + static uint32_t sRefCount; +}; + +} // namespace pvdsdk +} + +#endif // PXPVDSDK_PXPVDIMPL_H diff --git a/PxShared/src/pvd/src/PxPvdInternalByteStreams.h b/PxShared/src/pvd/src/PxPvdInternalByteStreams.h new file mode 100644 index 0000000..1fd5ddd --- /dev/null +++ b/PxShared/src/pvd/src/PxPvdInternalByteStreams.h @@ -0,0 +1,147 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PXPVDSDK_PXPVDINTERNALBYTESTREAMS_H +#define PXPVDSDK_PXPVDINTERNALBYTESTREAMS_H + +#include "PxPvdByteStreams.h" +#include "PxPvdFoundation.h" + +namespace physx +{ +namespace pvdsdk +{ +struct MemPvdOutputStream : public PvdOutputStream +{ + ForwardingMemoryBuffer mBuffer; + MemPvdOutputStream(const char* memName) : mBuffer(memName) + { + } + + virtual bool write(const uint8_t* buffer, uint32_t len) + { + mBuffer.write(buffer, len); + return true; + } + + virtual bool directCopy(PvdInputStream& inStream, uint32_t len) + { + uint32_t offset = mBuffer.size(); + mBuffer.growBuf(len); + uint32_t readLen = len; + inStream.read(mBuffer.begin() + offset, readLen); + if(readLen != len) + physx::intrinsics::memZero(mBuffer.begin() + offset, len); + return readLen == len; + } + + const uint8_t* begin() const + { + return mBuffer.begin(); + } + uint32_t size() const + { + return mBuffer.size(); + } + void clear() + { + mBuffer.clear(); + } + DataRef<const uint8_t> toRef() const + { + return DataRef<const uint8_t>(mBuffer.begin(), mBuffer.end()); + } +}; + +struct MemPvdInputStream : public PvdInputStream +{ + const uint8_t* mBegin; + const uint8_t* mEnd; + bool mGood; + + MemPvdInputStream(const MemPvdOutputStream& stream) : mGood(true) + { + mBegin = stream.mBuffer.begin(); + mEnd = stream.mBuffer.end(); + } + + MemPvdInputStream(const uint8_t* beg = NULL, const uint8_t* end = NULL) + { + mBegin = beg; + mEnd = end; + mGood = true; + } + + uint32_t size() const + { + return mGood ? static_cast<uint32_t>(mEnd - mBegin) : 0; + } + bool isGood() const + { + return mGood; + } + + void setup(uint8_t* start, uint8_t* stop) + { + mBegin = start; + mEnd = stop; + } + + void nocopyRead(uint8_t*& buffer, uint32_t& len) + { + if(len == 0 || mGood == false) + { + len = 0; + buffer = NULL; + return; + } + uint32_t original = len; + len = PxMin(len, size()); + if(mGood && len != original) + mGood = false; + buffer = const_cast<uint8_t*>(mBegin); + mBegin += len; + } + + virtual bool read(uint8_t* buffer, uint32_t& len) + { + if(len == 0) + return true; + uint32_t original = len; + len = PxMin(len, size()); + + physx::intrinsics::memCopy(buffer, mBegin, len); + mBegin += len; + if(len < original) + physx::intrinsics::memZero(buffer + len, original - len); + mGood = mGood && len == original; + return mGood; + } +}; +} +} +#endif // PXPVDSDK_PXPVDINTERNALBYTESTREAMS_H diff --git a/PxShared/src/pvd/src/PxPvdMarshalling.h b/PxShared/src/pvd/src/PxPvdMarshalling.h new file mode 100644 index 0000000..0aeaee4 --- /dev/null +++ b/PxShared/src/pvd/src/PxPvdMarshalling.h @@ -0,0 +1,220 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PXPVDSDK_PXPVDMARSHALLING_H +#define PXPVDSDK_PXPVDMARSHALLING_H + +#include "foundation/PxIntrinsics.h" + +#include "PxPvdObjectModelBaseTypes.h" +#include "PxPvdBits.h" + +namespace physx +{ +namespace pvdsdk +{ + +// Define marshalling + +template <typename TSmallerType, typename TLargerType> +struct PvdMarshalling +{ + bool canMarshal; + PvdMarshalling() : canMarshal(false) + { + } +}; + +template <typename smtype, typename lgtype> +static inline void marshalSingleT(const uint8_t* srcData, uint8_t* destData) +{ + smtype incoming; + + physx::intrinsics::memCopy(&incoming, srcData, sizeof(smtype)); + lgtype outgoing = static_cast<lgtype>(incoming); + physx::intrinsics::memCopy(destData, &outgoing, sizeof(lgtype)); +} + +template <typename smtype, typename lgtype> +static inline void marshalBlockT(const uint8_t* srcData, uint8_t* destData, uint32_t numBytes) +{ + for(const uint8_t* item = srcData, *end = srcData + numBytes; item < end; + item += sizeof(smtype), destData += sizeof(lgtype)) + marshalSingleT<smtype, lgtype>(item, destData); +} + +#define PVD_TYPE_MARSHALLER(smtype, lgtype) \ + template <> \ + struct PvdMarshalling<smtype, lgtype> \ + { \ + uint32_t canMarshal; \ + static void marshalSingle(const uint8_t* srcData, uint8_t* destData) \ + { \ + marshalSingleT<smtype, lgtype>(srcData, destData); \ + } \ + static void marshalBlock(const uint8_t* srcData, uint8_t* destData, uint32_t numBytes) \ + { \ + marshalBlockT<smtype, lgtype>(srcData, destData, numBytes); \ + } \ + }; + +// define marshalling tables. +PVD_TYPE_MARSHALLER(int8_t, int16_t) +PVD_TYPE_MARSHALLER(int8_t, uint16_t) +PVD_TYPE_MARSHALLER(int8_t, int32_t) +PVD_TYPE_MARSHALLER(int8_t, uint32_t) +PVD_TYPE_MARSHALLER(int8_t, int64_t) +PVD_TYPE_MARSHALLER(int8_t, uint64_t) +PVD_TYPE_MARSHALLER(int8_t, PvdF32) +PVD_TYPE_MARSHALLER(int8_t, PvdF64) + +PVD_TYPE_MARSHALLER(uint8_t, int16_t) +PVD_TYPE_MARSHALLER(uint8_t, uint16_t) +PVD_TYPE_MARSHALLER(uint8_t, int32_t) +PVD_TYPE_MARSHALLER(uint8_t, uint32_t) +PVD_TYPE_MARSHALLER(uint8_t, int64_t) +PVD_TYPE_MARSHALLER(uint8_t, uint64_t) +PVD_TYPE_MARSHALLER(uint8_t, PvdF32) +PVD_TYPE_MARSHALLER(uint8_t, PvdF64) + +PVD_TYPE_MARSHALLER(int16_t, int32_t) +PVD_TYPE_MARSHALLER(int16_t, uint32_t) +PVD_TYPE_MARSHALLER(int16_t, int64_t) +PVD_TYPE_MARSHALLER(int16_t, uint64_t) +PVD_TYPE_MARSHALLER(int16_t, PvdF32) +PVD_TYPE_MARSHALLER(int16_t, PvdF64) + +PVD_TYPE_MARSHALLER(uint16_t, int32_t) +PVD_TYPE_MARSHALLER(uint16_t, uint32_t) +PVD_TYPE_MARSHALLER(uint16_t, int64_t) +PVD_TYPE_MARSHALLER(uint16_t, uint64_t) +PVD_TYPE_MARSHALLER(uint16_t, PvdF32) +PVD_TYPE_MARSHALLER(uint16_t, PvdF64) + +PVD_TYPE_MARSHALLER(int32_t, int64_t) +PVD_TYPE_MARSHALLER(int32_t, uint64_t) +PVD_TYPE_MARSHALLER(int32_t, PvdF64) +PVD_TYPE_MARSHALLER(int32_t, PvdF32) + +PVD_TYPE_MARSHALLER(uint32_t, int64_t) +PVD_TYPE_MARSHALLER(uint32_t, uint64_t) +PVD_TYPE_MARSHALLER(uint32_t, PvdF64) +PVD_TYPE_MARSHALLER(uint32_t, PvdF32) + +PVD_TYPE_MARSHALLER(PvdF32, PvdF64) +PVD_TYPE_MARSHALLER(PvdF32, uint32_t) +PVD_TYPE_MARSHALLER(PvdF32, int32_t) + +PVD_TYPE_MARSHALLER(uint64_t, PvdF64) +PVD_TYPE_MARSHALLER(int64_t, PvdF64) +PVD_TYPE_MARSHALLER(PvdF64, uint64_t) +PVD_TYPE_MARSHALLER(PvdF64, int64_t) + +template <typename TMarshaller> +static inline bool getMarshalOperators(TSingleMarshaller&, TBlockMarshaller&, TMarshaller&, bool) +{ + return false; +} + +template <typename TMarshaller> +static inline bool getMarshalOperators(TSingleMarshaller& single, TBlockMarshaller& block, TMarshaller&, uint32_t) +{ + single = TMarshaller::marshalSingle; + block = TMarshaller::marshalBlock; + return true; +} + +template <typename smtype, typename lgtype> +static inline bool getMarshalOperators(TSingleMarshaller& single, TBlockMarshaller& block) +{ + single = NULL; + block = NULL; + PvdMarshalling<smtype, lgtype> marshaller = PvdMarshalling<smtype, lgtype>(); + return getMarshalOperators(single, block, marshaller, marshaller.canMarshal); +} + +template <typename smtype> +static inline bool getMarshalOperators(TSingleMarshaller& single, TBlockMarshaller& block, int32_t lgtypeId) +{ + switch(lgtypeId) + { + case PvdBaseType::PvdI8: // int8_t: + return getMarshalOperators<smtype, int8_t>(single, block); + case PvdBaseType::PvdU8: // uint8_t: + return getMarshalOperators<smtype, uint8_t>(single, block); + case PvdBaseType::PvdI16: // int16_t: + return getMarshalOperators<smtype, int16_t>(single, block); + case PvdBaseType::PvdU16: // uint16_t: + return getMarshalOperators<smtype, uint16_t>(single, block); + case PvdBaseType::PvdI32: // int32_t: + return getMarshalOperators<smtype, int32_t>(single, block); + case PvdBaseType::PvdU32: // uint32_t: + return getMarshalOperators<smtype, uint32_t>(single, block); + case PvdBaseType::PvdI64: // int64_t: + return getMarshalOperators<smtype, int64_t>(single, block); + case PvdBaseType::PvdU64: // uint64_t: + return getMarshalOperators<smtype, uint64_t>(single, block); + case PvdBaseType::PvdF32: + return getMarshalOperators<smtype, PvdF32>(single, block); + case PvdBaseType::PvdF64: + return getMarshalOperators<smtype, PvdF64>(single, block); + } + return false; +} + +static inline bool getMarshalOperators(TSingleMarshaller& single, TBlockMarshaller& block, int32_t smtypeId, + int32_t lgtypeId) +{ + switch(smtypeId) + { + case PvdBaseType::PvdI8: // int8_t: + return getMarshalOperators<int8_t>(single, block, lgtypeId); + case PvdBaseType::PvdU8: // uint8_t: + return getMarshalOperators<uint8_t>(single, block, lgtypeId); + case PvdBaseType::PvdI16: // int16_t: + return getMarshalOperators<int16_t>(single, block, lgtypeId); + case PvdBaseType::PvdU16: // uint16_t: + return getMarshalOperators<uint16_t>(single, block, lgtypeId); + case PvdBaseType::PvdI32: // int32_t: + return getMarshalOperators<int32_t>(single, block, lgtypeId); + case PvdBaseType::PvdU32: // uint32_t: + return getMarshalOperators<uint32_t>(single, block, lgtypeId); + case PvdBaseType::PvdI64: // int64_t: + return getMarshalOperators<int64_t>(single, block, lgtypeId); + case PvdBaseType::PvdU64: // uint64_t: + return getMarshalOperators<uint64_t>(single, block, lgtypeId); + case PvdBaseType::PvdF32: + return getMarshalOperators<PvdF32>(single, block, lgtypeId); + case PvdBaseType::PvdF64: + return getMarshalOperators<PvdF64>(single, block, lgtypeId); + } + return false; +} +} +} + +#endif // PXPVDSDK_PXPVDMARSHALLING_H diff --git a/PxShared/src/pvd/src/PxPvdMemClient.cpp b/PxShared/src/pvd/src/PxPvdMemClient.cpp new file mode 100644 index 0000000..05b7899 --- /dev/null +++ b/PxShared/src/pvd/src/PxPvdMemClient.cpp @@ -0,0 +1,134 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "pvd/PxPvdTransport.h" +#include "foundation/PxProfiler.h" + +#include "PxPvdImpl.h" +#include "PxPvdMemClient.h" +#include "PxProfileMemory.h" + +namespace physx +{ +namespace pvdsdk +{ + +PvdMemClient::PvdMemClient(PvdImpl& pvd) +: mSDKPvd(pvd) +, mPvdDataStream(NULL) +, mIsConnected(false) +, mMemEventBuffer(profile::PxProfileMemoryEventBuffer::createMemoryEventBuffer(*gPvdAllocatorCallback)) +{ +} + +PvdMemClient::~PvdMemClient() +{ + mSDKPvd.removeClient(this); + if(mMemEventBuffer.hasClients()) + mPvdDataStream->destroyInstance(&mMemEventBuffer); + mMemEventBuffer.release(); +} + +PvdDataStream* PvdMemClient::getDataStream() +{ + return mPvdDataStream; +} + +PvdUserRenderer* PvdMemClient::getUserRender() +{ + PX_ASSERT(0); + return NULL; +} + +void PvdMemClient::setObjectRegistrar(ObjectRegistrar*) +{ +} + +bool PvdMemClient::isConnected() const +{ + return mIsConnected; +} + +void PvdMemClient::onPvdConnected() +{ + if(mIsConnected) + return; + mIsConnected = true; + + mPvdDataStream = PvdDataStream::create(&mSDKPvd); + mPvdDataStream->createInstance(&mMemEventBuffer); + mMemEventBuffer.addClient(*this); +} + +void PvdMemClient::onPvdDisconnected() +{ + if(!mIsConnected) + return; + mIsConnected = false; + + flush(); + + mMemEventBuffer.removeClient(*this); + mPvdDataStream->release(); + mPvdDataStream = NULL; +} + +void PvdMemClient::onAllocation(size_t inSize, const char* inType, const char* inFile, int inLine, void* inAddr) +{ + mMutex.lock(); + mMemEventBuffer.onAllocation(inSize, inType, inFile, inLine, inAddr); + mMutex.unlock(); +} + +void PvdMemClient::onDeallocation(void* inAddr) +{ + mMutex.lock(); + mMemEventBuffer.onDeallocation(inAddr); + mMutex.unlock(); +} + +void PvdMemClient::flush() +{ + mMutex.lock(); + mMemEventBuffer.flushProfileEvents(); + mMutex.unlock(); +} + +void PvdMemClient::handleBufferFlush(const uint8_t* inData, uint32_t inLength) +{ + if(mPvdDataStream) + mPvdDataStream->setPropertyValue(&mMemEventBuffer, "events", inData, inLength); +} + +void PvdMemClient::handleClientRemoved() +{ +} + +} // pvd +} // physx diff --git a/PxShared/src/pvd/src/PxPvdMemClient.h b/PxShared/src/pvd/src/PxPvdMemClient.h new file mode 100644 index 0000000..37ac4ff --- /dev/null +++ b/PxShared/src/pvd/src/PxPvdMemClient.h @@ -0,0 +1,85 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PXPVDSDK_PXPVDMEMCLIENT_H +#define PXPVDSDK_PXPVDMEMCLIENT_H + +#include "PxPvdClient.h" +#include "PsHashMap.h" +#include "PsMutex.h" +#include "PsBroadcast.h" +#include "PxProfileEventBufferClient.h" +#include "PxProfileMemory.h" + +namespace physx +{ +class PvdDataStream; + +namespace pvdsdk +{ +class PvdImpl; +class PvdMemClient : public PvdClient, + public profile::PxProfileEventBufferClient, + public shdfnd::UserAllocated +{ + PX_NOCOPY(PvdMemClient) + public: + PvdMemClient(PvdImpl& pvd); + virtual ~PvdMemClient(); + + bool isConnected() const; + void onPvdConnected(); + void onPvdDisconnected(); + void flush(); + + PvdDataStream* getDataStream(); + PvdUserRenderer* getUserRender(); + void setObjectRegistrar(ObjectRegistrar*); + void sendMemEvents(); + + // memory event + void onAllocation(size_t size, const char* typeName, const char* filename, int line, void* allocatedMemory); + void onDeallocation(void* addr); + + private: + PvdImpl& mSDKPvd; + PvdDataStream* mPvdDataStream; + bool mIsConnected; + + // mem profile + shdfnd::Mutex mMutex; // mem onallocation can called from different threads + profile::PxProfileMemoryEventBuffer& mMemEventBuffer; + void handleBufferFlush(const uint8_t* inData, uint32_t inLength); + void handleClientRemoved(); +}; + +} // namespace pvdsdk +} // namespace physx + +#endif // PXPVDSDK_PXPVDMEMCLIENT_H diff --git a/PxShared/src/pvd/src/PxPvdObjectModel.h b/PxShared/src/pvd/src/PxPvdObjectModel.h new file mode 100644 index 0000000..f4858df --- /dev/null +++ b/PxShared/src/pvd/src/PxPvdObjectModel.h @@ -0,0 +1,437 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PXPVDSDK_PXPVDOBJECTMODEL_H +#define PXPVDSDK_PXPVDOBJECTMODEL_H + +#include "PsBasicTemplates.h" +#include "PxPvdObjectModelMetaData.h" + +namespace physx +{ +namespace pvdsdk +{ + +#if PX_VC == 11 || PX_VC == 12 || PX_VC == 14 +#pragma warning(push) +#pragma warning(disable : 4435) // 'class1' : Object layout under /vd2 will change due to virtual base 'class2' +#endif + +class PvdInputStream; +class PvdOutputStream; + +struct InstanceDescription +{ + int32_t mId; + int32_t mClassId; + void* mInstPtr; + bool mAlive; + + InstanceDescription(int32_t id, int32_t classId, void* inst, bool alive) + : mId(id), mClassId(classId), mInstPtr(inst), mAlive(alive) + { + } + InstanceDescription() : mId(-1), mClassId(-1), mInstPtr(NULL), mAlive(false) + { + } + operator void*() + { + PX_ASSERT(mAlive); + if(mAlive) + return mInstPtr; + return NULL; + } + operator int32_t() + { + return mId; + } +}; + +typedef physx::shdfnd::Pair<int32_t, int32_t> InstancePropertyPair; + +class PvdObjectModelBase +{ + protected: + virtual ~PvdObjectModelBase() + { + } + + public: + virtual void addRef() = 0; + virtual void release() = 0; + virtual void* idToPtr(int32_t instId) const = 0; + virtual int32_t ptrToId(void* instPtr) const = 0; + virtual InstanceDescription idToDescriptor(int32_t instId) const = 0; + virtual InstanceDescription ptrToDescriptor(void* instPtr) const = 0; + virtual Option<ClassDescription> getClassOf(void* instId) const = 0; + virtual const PvdObjectModelMetaData& getMetaData() const = 0; +}; + +class PvdObjectModelMutator : public virtual PvdObjectModelBase +{ + protected: + virtual ~PvdObjectModelMutator() + { + } + + public: + // if the instance is alive, this destroyes any arrays and sets the instance back to its initial state. + virtual InstanceDescription createInstance(int32_t clsId, int32_t instId) = 0; + virtual InstanceDescription createInstance(int32_t clsId) = 0; + // Instances that are pinned are not removed from the system, ever. + // This means that createInstance, pinInstance, deleteInstance + // can be called in this order and you can still call getClassOf, etc. on the instances. + // The instances will never be removed from memory if they are pinned, so use at your + // careful discretion. + virtual void pinInstance(void* instId) = 0; + virtual void unPinInstance(void* instId) = 0; + // when doing capture, should update all events in a section at once, otherwis there possible parse data + // incompltely. + virtual void recordCompletedInstances() = 0; + + virtual void destroyInstance(void* instId) = 0; + virtual int32_t getNextInstanceHandleValue() const = 0; + // reserve a set of instance handle values by getting the current, adding an amount to it + // and setting the value. You can never set the value lower than it already is, it only climbs. + virtual void setNextInstanceHandleValue(int32_t hdlValue) = 0; + // If incoming type is provided, then we may be able to marshal simple types + // This works for arrays, it just completely replaces the entire array. + // Because if this, it is an error of the property identifier + virtual bool setPropertyValue(void* instId, int32_t propId, const uint8_t* data, uint32_t dataLen, + int32_t incomingType) = 0; + // Set a set of properties defined by a property message + virtual bool setPropertyMessage(void* instId, int32_t msgId, const uint8_t* data, uint32_t dataLen) = 0; + // insert an element(s) into array index. If index > numElements, element(s) is(are) appended. + virtual bool insertArrayElement(void* instId, int32_t propId, int32_t index, const uint8_t* data, uint32_t dataLen, + int32_t incomingType = -1) = 0; + virtual bool removeArrayElement(void* instId, int32_t propId, int32_t index) = 0; + // Add this array element to end end if it doesn't already exist in the array. + // The option is false if there was an error with the function call. + // The integer has no value if nothing was added, else it tells you the index + // where the item was added. Comparison is done using memcmp. + virtual Option<int32_t> pushBackArrayElementIf(void* instId, int32_t propId, const uint8_t* data, uint32_t dataLen, + int32_t incomingType = -1) = 0; + // Remove an array element if it exists in the array. + // The option is false if there was an error with the function call. + // the integer has no value if the item wasn't found, else it tells you the index where + // the item resided. Comparison is memcmp. + virtual Option<int32_t> removeArrayElementIf(void* instId, int32_t propId, const uint8_t* data, uint32_t dataLen, + int32_t incomingType = -1) = 0; + virtual bool setArrayElementValue(void* instId, int32_t propId, int32_t propIdx, const uint8_t* data, + uint32_t dataLen, int32_t incomingType) = 0; + + virtual void originShift(void* instId, PxVec3 shift) = 0; + + InstanceDescription createInstance(const NamespacedName& name) + { + return createInstance(getMetaData().findClass(name)->mClassId); + } + template <typename TDataType> + bool setPropertyValue(void* instId, const char* propName, const TDataType* dtype, uint32_t count) + { + ClassDescription cls(getClassOf(instId)); + Option<PropertyDescription> descOpt(getMetaData().findProperty(cls.mClassId, propName)); + if(!descOpt.hasValue()) + { + PX_ASSERT(false); + return false; + } + const PropertyDescription& prop(descOpt); + Option<ClassDescription> incomingCls(getMetaData().findClass(getPvdNamespacedNameForType<TDataType>())); + if(incomingCls.hasValue()) + return setPropertyValue(instId, prop.mPropertyId, reinterpret_cast<const uint8_t*>(dtype), + sizeof(*dtype) * count, incomingCls.getValue().mClassId); + return false; + } + + // Simplest possible setPropertyValue + template <typename TDataType> + bool setPropertyValue(void* instId, const char* propName, const TDataType& dtype) + { + return setPropertyValue(instId, propName, &dtype, 1); + } + + template <typename TDataType> + bool setPropertyMessage(void* instId, const TDataType& msg) + { + Option<PropertyMessageDescription> msgId = + getMetaData().findPropertyMessage(getPvdNamespacedNameForType<TDataType>()); + if(msgId.hasValue() == false) + return false; + return setPropertyMessage(instId, msgId.getValue().mMessageId, reinterpret_cast<const uint8_t*>(&msg), + sizeof(msg)); + } + template <typename TDataType> + bool insertArrayElement(void* instId, const char* propName, int32_t idx, const TDataType& dtype) + { + ClassDescription cls(getClassOf(instId)); + Option<PropertyDescription> descOpt(getMetaData().findProperty(cls.mClassId, propName)); + if(!descOpt.hasValue()) + { + PX_ASSERT(false); + return false; + } + const PropertyDescription& prop(descOpt); + Option<ClassDescription> incomingCls(getMetaData().findClass(getPvdNamespacedNameForType<TDataType>())); + if(incomingCls.hasValue()) + { + return insertArrayElement(instId, prop.mPropertyId, idx, reinterpret_cast<const uint8_t*>(&dtype), + sizeof(dtype), incomingCls.getValue().mClassId); + } + return false; + } + + bool removeArrayElement(void* instId, const char* propName, int32_t idx) + { + ClassDescription cls(getClassOf(instId)); + Option<PropertyDescription> descOpt(getMetaData().findProperty(cls.mClassId, propName)); + if(!descOpt.hasValue()) + { + PX_ASSERT(false); + return false; + } + const PropertyDescription& prop(descOpt); + return removeArrayElement(instId, prop.mPropertyId, idx); + } + template <typename TDataType> + Option<int32_t> pushBackArrayElementIf(void* instId, const char* pname, const TDataType& item) + { + ClassDescription cls(getClassOf(instId)); + Option<PropertyDescription> descOpt(getMetaData().findProperty(cls.mClassId, pname)); + if(!descOpt.hasValue()) + { + PX_ASSERT(false); + return None(); + } + const PropertyDescription& prop(descOpt); + Option<ClassDescription> incomingCls(getMetaData().findClass(getPvdNamespacedNameForType<TDataType>())); + if(incomingCls.hasValue() && (incomingCls.getValue().mClassId == prop.mDatatype)) + { + return pushBackArrayElementIf(instId, prop.mPropertyId, reinterpret_cast<const uint8_t*>(&item), + sizeof(item), incomingCls.getValue().mClassId); + } + return None(); + } + template <typename TDataType> + Option<int32_t> removeArrayElementIf(void* instId, const char* propId, const TDataType& item) + { + ClassDescription cls(getClassOf(instId)); + Option<PropertyDescription> descOpt(getMetaData().findProperty(cls.mClassId, propId)); + if(!descOpt.hasValue()) + { + PX_ASSERT(false); + return None(); + } + const PropertyDescription& prop(descOpt); + Option<ClassDescription> incomingCls(getMetaData().findClass(getPvdNamespacedNameForType<TDataType>())); + if(incomingCls.hasValue() && (incomingCls.getValue().mClassId == prop.mDatatype)) + { + return removeArrayElementIf(instId, prop.mPropertyId, reinterpret_cast<const uint8_t*>(&item), sizeof(item), + incomingCls.getValue().mClassId); + } + return None(); + } + template <typename TDataType> + bool setArrayElementValue(void* instId, const char* propName, int32_t propIdx, TDataType& item) + { + ClassDescription cls(getClassOf(instId)); + Option<PropertyDescription> descOpt(getMetaData().findProperty(cls.mClassId, propName)); + if(!descOpt.hasValue()) + { + PX_ASSERT(false); + return false; + } + const PropertyDescription& prop(descOpt); + Option<ClassDescription> incomingCls(getMetaData().findClass(getPvdNamespacedNameForType<TDataType>())); + if(incomingCls.hasValue() && (incomingCls.getValue().mClassId == prop.mDatatype)) + return setArrayElementValue(instId, prop.mPropertyId, propIdx, reinterpret_cast<const uint8_t*>(&item), + sizeof(item), incomingCls.getValue().mClassId); + PX_ASSERT(false); + return false; + } +}; + +class PvdObjectModelReader : public virtual PvdObjectModelBase +{ + protected: + virtual ~PvdObjectModelReader() + { + } + + public: + // Return the byte size of a possible nested property + virtual uint32_t getPropertyByteSize(void* instId, int32_t propId) = 0; + uint32_t getPropertyByteSize(void* instId, String propName) + { + int32_t propId = getMetaData().findProperty(getClassOf(instId)->mClassId, propName)->mPropertyId; + return getPropertyByteSize(instId, propId); + } + // Return the value of a possible nested property + virtual uint32_t getPropertyValue(void* instId, int32_t propId, uint8_t* outData, uint32_t outDataLen) = 0; + // Get the actual raw database memory. This is subject to change drastically if the object gets deleted. + virtual DataRef<uint8_t> getRawPropertyValue(void* instId, int32_t propId) = 0; + + DataRef<uint8_t> getRawPropertyValue(void* instId, const char* propName) + { + ClassDescription cls(getClassOf(instId)); + Option<PropertyDescription> descOpt(getMetaData().findProperty(cls.mClassId, propName)); + if(!descOpt.hasValue()) + { + PX_ASSERT(false); + return 0; + } + return getRawPropertyValue(instId, descOpt->mPropertyId); + } + + template <typename TDataType> + DataRef<TDataType> getTypedRawPropertyValue(void* instId, int32_t propId) + { + DataRef<uint8_t> propVal = getRawPropertyValue(instId, propId); + return DataRef<TDataType>(reinterpret_cast<const TDataType*>(propVal.begin()), + propVal.size() / sizeof(TDataType)); + } + + template <typename TDataType> + DataRef<TDataType> getTypedRawPropertyValue(void* instId, const char* propName) + { + DataRef<uint8_t> propVal = getRawPropertyValue(instId, propName); + return DataRef<TDataType>(reinterpret_cast<const TDataType*>(propVal.begin()), + propVal.size() / sizeof(TDataType)); + } + + template <typename TDataType> + uint32_t getPropertyValue(void* instId, const char* propName, TDataType* outBuffer, uint32_t outNumBufferItems) + { + ClassDescription cls(getClassOf(instId)); + Option<PropertyDescription> descOpt(getMetaData().findProperty(cls.mClassId, propName)); + if(!descOpt.hasValue()) + { + PX_ASSERT(false); + return 0; + } + const PropertyDescription& prop(descOpt); + uint32_t desired = outNumBufferItems * sizeof(TDataType); + return getPropertyValue(instId, prop.mPropertyId, reinterpret_cast<uint8_t*>(outBuffer), desired) / + sizeof(TDataType); + } + + template <typename TDataType> + Option<TDataType> getPropertyValue(void* instId, const char* propName) + { + TDataType retval; + if(getPropertyValue(instId, propName, &retval, 1) == 1) + return retval; + return None(); + } + + // Get this one item out of the array + // return array[idx] + virtual uint32_t getPropertyValue(void* instId, int32_t propId, int inArrayIndex, uint8_t* outData, + uint32_t outDataLen) = 0; + // Get this sub element of one item out of the array + // return array[idx].a + virtual uint32_t getPropertyValue(void* instId, int32_t propId, int inArrayIndex, int nestedProperty, + uint8_t* outData, uint32_t outDataLen) = 0; + + // Get a set of properties defined by a property message + virtual bool getPropertyMessage(void* instId, int32_t msgId, uint8_t* data, uint32_t dataLen) const = 0; + + template <typename TDataType> + bool getPropertyMessage(void* instId, TDataType& msg) + { + Option<PropertyMessageDescription> msgId( + getMetaData().findPropertyMessage(getPvdNamespacedNameForType<TDataType>())); + if(msgId.hasValue() == false) + return false; + return getPropertyMessage(instId, msgId.getValue().mMessageId, reinterpret_cast<uint8_t*>(&msg), sizeof(msg)); + } + + // clearing the array is performed with a set property value call with no data. + virtual uint32_t getNbArrayElements(void* instId, int32_t propId) = 0; + uint32_t getNbArrayElements(void* instId, const char* propName) + { + ClassDescription cls(getClassOf(instId)); + Option<PropertyDescription> descOpt(getMetaData().findProperty(cls.mClassId, propName)); + if(!descOpt.hasValue()) + { + PX_ASSERT(false); + return false; + } + const PropertyDescription& prop(descOpt); + return getNbArrayElements(instId, prop.mPropertyId); + } + + // Write this instance out. Offset is set as the instances last write offset. + // This offset is cleared if the object is changed. + // If offset doesn't have a value, then the instance isn't changed. + virtual void writeInstance(void* instId, PvdOutputStream& stream) = 0; + + virtual uint32_t getNbInstances() const = 0; + virtual uint32_t getInstances(InstanceDescription* outBuffer, uint32_t count, uint32_t startIndex = 0) const = 0; + + // Get the list of updated objects since the last time someone cleared the updated instance list. + virtual uint32_t getNbUpdatedInstances() const = 0; + virtual uint32_t getUpdatedInstances(InstanceDescription* outBuffer, uint32_t count, uint32_t startIndex = 0) = 0; + // Must be called for instances to be released. Only instances that aren't live nor are they updated + // are valid. + virtual void clearUpdatedInstances() = 0; +}; + +class PvdObjectModel : public PvdObjectModelMutator, public PvdObjectModelReader +{ + protected: + virtual ~PvdObjectModel() + { + } + + public: + virtual void destroyAllInstances() = 0; + virtual bool setPropertyValueToDefault(void* instId, int32_t propId) = 0; + // Read an instance data and put a copy of the data in the output stream. + static bool readInstance(PvdInputStream& inStream, PvdOutputStream& outStream); + virtual InstanceDescription readInstance(DataRef<const uint8_t> writtenData) = 0; + // Set just this property from this serialized instance. + // Expects the instance to be alive, just like setPropertyValue + virtual bool readInstanceProperty(DataRef<const uint8_t> writtenData, int32_t propId) = 0; + + virtual void recordCompletedInstances() = 0; + + // OriginShift seekback support + virtual uint32_t getNbShifted() = 0; + virtual void getShiftedPair(InstancePropertyPair* outData, uint32_t count) = 0; + virtual void clearShiftedPair() = 0; + virtual void shiftObject(void* instId, int32_t propId, PxVec3 shift) = 0; + static PvdObjectModel& create(physx::PxAllocatorCallback& callback, PvdObjectModelMetaData& metaData, + bool isCapture = false); +}; + +#if PX_VC == 11 || PX_VC == 12 || PX_VC == 14 +#pragma warning(pop) +#endif +} +} +#endif // PXPVDSDK_PXPVDOBJECTMODEL_H diff --git a/PxShared/src/pvd/src/PxPvdObjectModelInternalTypeDefs.h b/PxShared/src/pvd/src/PxPvdObjectModelInternalTypeDefs.h new file mode 100644 index 0000000..eca7858 --- /dev/null +++ b/PxShared/src/pvd/src/PxPvdObjectModelInternalTypeDefs.h @@ -0,0 +1,32 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#define THERE_IS_NO_INCLUDE_GUARD_HERE_FOR_A_REASON + +DECLARE_INTERNAL_PVD_TYPE(ArrayData) + +#undef THERE_IS_NO_INCLUDE_GUARD_HERE_FOR_A_REASON diff --git a/PxShared/src/pvd/src/PxPvdObjectModelInternalTypes.h b/PxShared/src/pvd/src/PxPvdObjectModelInternalTypes.h new file mode 100644 index 0000000..3344140 --- /dev/null +++ b/PxShared/src/pvd/src/PxPvdObjectModelInternalTypes.h @@ -0,0 +1,171 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#ifndef PXPVDSDK_PXPVDOBJECTMODELINTERNALTYPES_H +#define PXPVDSDK_PXPVDOBJECTMODELINTERNALTYPES_H + +#include "foundation/PxMemory.h" +#include "PxPvdObjectModelBaseTypes.h" +#include "PsArray.h" +#include "PxPvdFoundation.h" +#include "PxPvdObjectModel.h" + +namespace physx +{ +namespace pvdsdk +{ + +struct PvdInternalType +{ + enum Enum + { + None = 0, +#define DECLARE_INTERNAL_PVD_TYPE(type) type, +#include "PxPvdObjectModelInternalTypeDefs.h" + Last +#undef DECLARE_INTERNAL_PVD_TYPE + }; +}; + +PX_COMPILE_TIME_ASSERT(uint32_t(PvdInternalType::Last) <= uint32_t(PvdBaseType::InternalStop)); + +template <typename T> +struct DataTypeToPvdTypeMap +{ + bool compile_error; +}; +template <PvdInternalType::Enum> +struct PvdTypeToDataTypeMap +{ + bool compile_error; +}; + +#define DECLARE_INTERNAL_PVD_TYPE(type) \ + template <> \ + struct DataTypeToPvdTypeMap<type> \ + { \ + enum Enum \ + { \ + BaseTypeEnum = PvdInternalType::type \ + }; \ + }; \ + template <> \ + struct PvdTypeToDataTypeMap<PvdInternalType::type> \ + { \ + typedef type TDataType; \ + }; \ + template <> \ + struct PvdDataTypeToNamespacedNameMap<type> \ + { \ + NamespacedName Name; \ + PvdDataTypeToNamespacedNameMap<type>() : Name("physx3_debugger_internal", #type) \ + { \ + } \ + }; +#include "PxPvdObjectModelInternalTypeDefs.h" +#undef DECLARE_INTERNAL_PVD_TYPE + +template <typename TDataType, typename TAlloc> +DataRef<TDataType> toDataRef(const shdfnd::Array<TDataType, TAlloc>& data) +{ + return DataRef<TDataType>(data.begin(), data.end()); +} + +static inline bool safeStrEq(const DataRef<String>& lhs, const DataRef<String>& rhs) +{ + uint32_t count = lhs.size(); + if(count != rhs.size()) + return false; + for(uint32_t idx = 0; idx < count; ++idx) + if(!safeStrEq(lhs[idx], rhs[idx])) + return false; + return true; +} + +static inline char* copyStr(const char* str) +{ + str = nonNull(str); + uint32_t len = static_cast<uint32_t>(strlen(str)); + char* newData = reinterpret_cast<char*>(PX_ALLOC(len + 1, "string")); + PxMemCopy(newData, str, len); + newData[len] = 0; + return newData; +} + +// Used for predictable bit fields. +template <typename TDataType, uint8_t TNumBits, uint8_t TOffset, typename TInputType> +struct BitMaskSetter +{ + // Create a mask that masks out the orginal value shift into place + static TDataType createOffsetMask() + { + return createMask() << TOffset; + } + // Create a mask of TNumBits number of tis + static TDataType createMask() + { + return static_cast<TDataType>((1 << TNumBits) - 1); + } + void setValue(TDataType& inCurrent, TInputType inData) + { + PX_ASSERT(inData < (1 << TNumBits)); + + // Create a mask to remove the current value. + TDataType theMask = ~(createOffsetMask()); + // Clear out current value. + inCurrent = inCurrent & theMask; + // Create the new value. + TDataType theAddition = reinterpret_cast<TDataType>(inData << TOffset); + // or it into the existing value. + inCurrent = inCurrent | theAddition; + } + + TInputType getValue(TDataType inCurrent) + { + return static_cast<TInputType>((inCurrent >> TOffset) & createMask()); + } +}; + +template <typename TObjType> +DataRef<TObjType> getArray(shdfnd::Array<uint8_t>& dataBuffer, PvdObjectModelReader& reader, InstanceDescription instanceDesc, + String propName) +{ + int32_t propId = reader.getMetaData().findProperty(reader.getClassOf(instanceDesc)->mClassId, propName)->mPropertyId; + uint32_t numBytes = reader.getPropertyByteSize(instanceDesc.mInstPtr, propId); + uint32_t numItems = reader.getNbArrayElements(instanceDesc.mInstPtr, propId); + if(numBytes == 0) + return NULL; + if(numBytes > dataBuffer.size()) + dataBuffer.resize(numBytes); + + TObjType* dataPtr = reinterpret_cast<TObjType*>(dataBuffer.begin()); + reader.getPropertyValue(instanceDesc, propId, dataBuffer.begin(), numBytes); + return DataRef<TObjType>(dataPtr, numItems); +} +} +} +#endif // PXPVDSDK_PXPVDOBJECTMODELINTERNALTYPES_H diff --git a/PxShared/src/pvd/src/PxPvdObjectModelMetaData.cpp b/PxShared/src/pvd/src/PxPvdObjectModelMetaData.cpp new file mode 100644 index 0000000..9971d00 --- /dev/null +++ b/PxShared/src/pvd/src/PxPvdObjectModelMetaData.cpp @@ -0,0 +1,1515 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +#include "PxPvdObjectModel.h" +#include "PxPvdObjectModelBaseTypes.h" +#include "PxPvdObjectModelInternalTypes.h" +#include "PxPvdObjectModelMetaData.h" +#include "PxPvdFoundation.h" +#include "PsMutex.h" +#include "PxPvdByteStreams.h" +#include "PxPvdInternalByteStreams.h" +#include "PxPvdMarshalling.h" + +using namespace physx; +using namespace pvdsdk; +using namespace shdfnd; + +namespace +{ + +struct PropDescImpl : public PropertyDescription, public UserAllocated +{ + Array<NamedValue> mValueNames; + PropDescImpl(const PropertyDescription& inBase, StringTable& table) + : PropertyDescription(inBase), mValueNames("NamedValue") + { + mName = table.registerStr(mName); + } + PropDescImpl() : mValueNames("NamedValue") + { + } + + template <typename TSerializer> + void serialize(TSerializer& serializer) + { + serializer.streamify(mOwnerClassName); + serializer.streamify(mOwnerClassId); + serializer.streamify(mSemantic); + serializer.streamify(mDatatype); + serializer.streamify(mDatatypeName); + serializer.streamify(mPropertyType); + serializer.streamify(mPropertyId); + serializer.streamify(m32BitOffset); + serializer.streamify(m64BitOffset); + serializer.streamify(mValueNames); + serializer.streamify(mName); + } +}; + +struct ClassDescImpl : public ClassDescription, public UserAllocated +{ + Array<PropDescImpl*> mPropImps; + Array<PtrOffset> m32OffsetArray; + Array<PtrOffset> m64OffsetArray; + ClassDescImpl(const ClassDescription& inBase) + : ClassDescription(inBase) + , mPropImps("PropDescImpl*") + , m32OffsetArray("ClassDescImpl::m32OffsetArray") + , m64OffsetArray("ClassDescImpl::m64OffsetArray") + { + PVD_FOREACH(idx, get32BitSizeInfo().mPtrOffsets.size()) + m32OffsetArray.pushBack(get32BitSizeInfo().mPtrOffsets[idx]); + PVD_FOREACH(idx, get64BitSizeInfo().mPtrOffsets.size()) + m64OffsetArray.pushBack(get64BitSizeInfo().mPtrOffsets[idx]); + } + ClassDescImpl() + : mPropImps("PropDescImpl*") + , m32OffsetArray("ClassDescImpl::m32OffsetArray") + , m64OffsetArray("ClassDescImpl::m64OffsetArray") + { + } + PropDescImpl* findProperty(String name) + { + PVD_FOREACH(idx, mPropImps.size()) + { + if(safeStrEq(mPropImps[idx]->mName, name)) + return mPropImps[idx]; + } + return NULL; + } + void addProperty(PropDescImpl* prop) + { + mPropImps.pushBack(prop); + } + + void addPtrOffset(PtrOffsetType::Enum type, uint32_t offset32, uint32_t offset64) + { + m32OffsetArray.pushBack(PtrOffset(type, offset32)); + m64OffsetArray.pushBack(PtrOffset(type, offset64)); + get32BitSizeInfo().mPtrOffsets = DataRef<PtrOffset>(m32OffsetArray.begin(), m32OffsetArray.end()); + get64BitSizeInfo().mPtrOffsets = DataRef<PtrOffset>(m64OffsetArray.begin(), m64OffsetArray.end()); + } + + template <typename TSerializer> + void serialize(TSerializer& serializer) + { + serializer.streamify(mName); + serializer.streamify(mClassId); + serializer.streamify(mBaseClass); + serializer.streamify(mPackedUniformWidth); + serializer.streamify(mPackedClassType); + serializer.streamify(mLocked); + serializer.streamify(mRequiresDestruction); + serializer.streamify(get32BitSize()); + serializer.streamify(get32BitSizeInfo().mDataByteSize); + serializer.streamify(get32BitSizeInfo().mAlignment); + serializer.streamify(get64BitSize()); + serializer.streamify(get64BitSizeInfo().mDataByteSize); + serializer.streamify(get64BitSizeInfo().mAlignment); + serializer.streamifyLinks(mPropImps); + serializer.streamify(m32OffsetArray); + serializer.streamify(m64OffsetArray); + get32BitSizeInfo().mPtrOffsets = DataRef<PtrOffset>(m32OffsetArray.begin(), m32OffsetArray.end()); + get64BitSizeInfo().mPtrOffsets = DataRef<PtrOffset>(m64OffsetArray.begin(), m64OffsetArray.end()); + } +}; + +class StringTableImpl : public StringTable, public UserAllocated +{ + HashMap<const char*, char*> mStrings; + uint32_t mNextStrHandle; + HashMap<uint32_t, char*> mHandleToStr; + HashMap<const char*, uint32_t> mStrToHandle; + + public: + StringTableImpl() + : mStrings("StringTableImpl::mStrings") + , mNextStrHandle(1) + , mHandleToStr("StringTableImpl::mHandleToStr") + , mStrToHandle("StringTableImpl::mStrToHandle") + { + } + uint32_t nextHandleValue() + { + return mNextStrHandle++; + } + virtual ~StringTableImpl() + { + for(HashMap<const char*, char*>::Iterator iter = mStrings.getIterator(); !iter.done(); ++iter) + PX_FREE(iter->second); + mStrings.clear(); + } + virtual uint32_t getNbStrs() + { + return mStrings.size(); + } + virtual uint32_t getStrs(const char** outStrs, uint32_t bufLen, uint32_t startIdx = 0) + { + startIdx = PxMin(getNbStrs(), startIdx); + uint32_t numStrs(PxMin(getNbStrs() - startIdx, bufLen)); + HashMap<const char*, char*>::Iterator iter(mStrings.getIterator()); + for(uint32_t idx = 0; idx < startIdx; ++idx, ++iter) + ; + for(uint32_t idx = 0; idx < numStrs && !iter.done(); ++idx, ++iter) + outStrs[idx] = iter->second; + return numStrs; + } + void addStringHandle(char* str, uint32_t hdl) + { + mHandleToStr.insert(hdl, str); + mStrToHandle.insert(str, hdl); + } + + uint32_t addStringHandle(char* str) + { + uint32_t theNewHandle = nextHandleValue(); + addStringHandle(str, theNewHandle); + return theNewHandle; + } + const char* doRegisterStr(const char* str, bool& outAdded) + { + PX_ASSERT(isMeaningful(str)); + const HashMap<const char*, char*>::Entry* entry(mStrings.find(str)); + if(entry == NULL) + { + outAdded = true; + char* retval(copyStr(str)); + mStrings.insert(retval, retval); + return retval; + } + return entry->second; + } + virtual const char* registerStr(const char* str, bool& outAdded) + { + outAdded = false; + if(isMeaningful(str) == false) + return ""; + const char* retval = doRegisterStr(str, outAdded); + if(outAdded) + addStringHandle(const_cast<char*>(retval)); + return retval; + } + + NamespacedName registerName(const NamespacedName& nm) + { + return NamespacedName(registerStr(nm.mNamespace), registerStr(nm.mName)); + } + const char* registerStr(const char* str) + { + bool ignored; + return registerStr(str, ignored); + } + + virtual StringHandle strToHandle(const char* str) + { + if(isMeaningful(str) == false) + return 0; + const HashMap<const char*, uint32_t>::Entry* entry(mStrToHandle.find(str)); + if(entry) + return entry->second; + bool added = false; + const char* registeredStr = doRegisterStr(str, added); + uint32_t theNewHandle = addStringHandle(const_cast<char*>(registeredStr)); + PX_ASSERT(mStrToHandle.find(str)); + PX_ASSERT(added); + return theNewHandle; + } + + virtual const char* handleToStr(uint32_t hdl) + { + if(hdl == 0) + return ""; + const HashMap<uint32_t, char*>::Entry* entry(mHandleToStr.find(hdl)); + if(entry) + return entry->second; + // unregistered handle... + return ""; + } + + void write(PvdOutputStream& stream) + { + uint32_t numStrs = static_cast<uint32_t>(mHandleToStr.size()); + stream << numStrs; + stream << mNextStrHandle; + for(HashMap<uint32_t, char*>::Iterator iter = mHandleToStr.getIterator(); !iter.done(); ++iter) + { + stream << iter->first; + uint32_t len = static_cast<uint32_t>(strlen(iter->second) + 1); + stream << len; + stream.write(reinterpret_cast<uint8_t*>(iter->second), len); + } + } + + template <typename TReader> + void read(TReader& stream) + { + mHandleToStr.clear(); + mStrToHandle.clear(); + uint32_t numStrs; + stream >> numStrs; + stream >> mNextStrHandle; + Array<uint8_t> readBuffer("StringTable::read::readBuffer"); + uint32_t bufSize = 0; + for(uint32_t idx = 0; idx < numStrs; ++idx) + { + uint32_t handleValue; + uint32_t bufLen; + stream >> handleValue; + stream >> bufLen; + if(bufSize < bufLen) + readBuffer.resize(bufLen); + bufSize = PxMax(bufSize, bufLen); + stream.read(readBuffer.begin(), bufLen); + bool ignored; + const char* newStr = doRegisterStr(reinterpret_cast<const char*>(readBuffer.begin()), ignored); + addStringHandle(const_cast<char*>(newStr), handleValue); + } + } + + virtual void release() + { + PVD_DELETE(this); + } + + private: + StringTableImpl& operator=(const StringTableImpl&); +}; + +struct NamespacedNameHasher +{ + uint32_t operator()(const NamespacedName& nm) + { + return Hash<const char*>()(nm.mNamespace) ^ Hash<const char*>()(nm.mName); + } + bool equal(const NamespacedName& lhs, const NamespacedName& rhs) + { + return safeStrEq(lhs.mNamespace, rhs.mNamespace) && safeStrEq(lhs.mName, rhs.mName); + } +}; + +struct ClassPropertyName +{ + NamespacedName mName; + String mPropName; + ClassPropertyName(const NamespacedName& name = NamespacedName(), String propName = "") + : mName(name), mPropName(propName) + { + } +}; + +struct ClassPropertyNameHasher +{ + uint32_t operator()(const ClassPropertyName& nm) + { + return NamespacedNameHasher()(nm.mName) ^ Hash<const char*>()(nm.mPropName); + } + bool equal(const ClassPropertyName& lhs, const ClassPropertyName& rhs) + { + return NamespacedNameHasher().equal(lhs.mName, rhs.mName) && safeStrEq(lhs.mPropName, rhs.mPropName); + } +}; + +struct PropertyMessageEntryImpl : public PropertyMessageEntry +{ + PropertyMessageEntryImpl(const PropertyMessageEntry& data) : PropertyMessageEntry(data) + { + } + PropertyMessageEntryImpl() + { + } + template <typename TSerializerType> + void serialize(TSerializerType& serializer) + { + serializer.streamify(mDatatypeName); + serializer.streamify(mDatatypeId); + serializer.streamify(mMessageOffset); + serializer.streamify(mByteSize); + serializer.streamify(mDestByteSize); + serializer.streamify(mProperty); + } +}; + +struct PropertyMessageDescriptionImpl : public PropertyMessageDescription, public UserAllocated +{ + Array<PropertyMessageEntryImpl> mEntryImpls; + Array<PropertyMessageEntry> mEntries; + Array<uint32_t> mStringOffsetArray; + PropertyMessageDescriptionImpl(const PropertyMessageDescription& data) + : PropertyMessageDescription(data) + , mEntryImpls("PropertyMessageDescriptionImpl::mEntryImpls") + , mEntries("PropertyMessageDescriptionImpl::mEntries") + , mStringOffsetArray("PropertyMessageDescriptionImpl::mStringOffsets") + { + } + PropertyMessageDescriptionImpl() + : mEntryImpls("PropertyMessageDescriptionImpl::mEntryImpls") + , mEntries("PropertyMessageDescriptionImpl::mEntries") + , mStringOffsetArray("PropertyMessageDescriptionImpl::mStringOffsets") + { + } + + ~PropertyMessageDescriptionImpl() + { + } + + void addEntry(const PropertyMessageEntryImpl& entry) + { + mEntryImpls.pushBack(entry); + mEntries.pushBack(entry); + mProperties = DataRef<PropertyMessageEntry>(mEntries.begin(), mEntries.end()); + } + + template <typename TSerializerType> + void serialize(TSerializerType& serializer) + { + serializer.streamify(mClassName); + serializer.streamify(mClassId); // No other class has this id, it is DB-unique + serializer.streamify(mMessageName); + serializer.streamify(mMessageId); + serializer.streamify(mMessageByteSize); + serializer.streamify(mEntryImpls); + serializer.streamify(mStringOffsetArray); + if(mEntries.size() != mEntryImpls.size()) + { + mEntries.clear(); + uint32_t numEntries = static_cast<uint32_t>(mEntryImpls.size()); + for(uint32_t idx = 0; idx < numEntries; ++idx) + mEntries.pushBack(mEntryImpls[idx]); + } + mProperties = DataRef<PropertyMessageEntry>(mEntries.begin(), mEntries.end()); + mStringOffsets = DataRef<uint32_t>(mStringOffsetArray.begin(), mStringOffsetArray.end()); + } + + private: + PropertyMessageDescriptionImpl& operator=(const PropertyMessageDescriptionImpl&); +}; + +struct PvdObjectModelMetaDataImpl : public PvdObjectModelMetaData, public UserAllocated +{ + typedef HashMap<NamespacedName, ClassDescImpl*, NamespacedNameHasher> TNameToClassMap; + typedef HashMap<ClassPropertyName, PropDescImpl*, ClassPropertyNameHasher> TNameToPropMap; + typedef HashMap<NamespacedName, PropertyMessageDescriptionImpl*, NamespacedNameHasher> TNameToPropertyMessageMap; + + TNameToClassMap mNameToClasses; + TNameToPropMap mNameToProperties; + Array<ClassDescImpl*> mClasses; + Array<PropDescImpl*> mProperties; + StringTableImpl* mStringTable; + TNameToPropertyMessageMap mPropertyMessageMap; + Array<PropertyMessageDescriptionImpl*> mPropertyMessages; + int32_t mNextClassId; + uint32_t mRefCount; + + PvdObjectModelMetaDataImpl() + : mNameToClasses("NamespacedName->ClassDescImpl*") + , mNameToProperties("ClassPropertyName->PropDescImpl*") + , mClasses("ClassDescImpl*") + , mProperties("PropDescImpl*") + , mStringTable(PVD_NEW(StringTableImpl)()) + , mPropertyMessageMap("PropertyMessageMap") + , mPropertyMessages("PvdObjectModelMetaDataImpl::mPropertyMessages") + , mNextClassId(1) + , mRefCount(0) + { + } + + private: + PvdObjectModelMetaDataImpl& operator=(const PvdObjectModelMetaDataImpl&); + + public: + int32_t nextClassId() + { + return mNextClassId++; + } + void initialize() + { + // Create the default classes. + { + ClassDescImpl& aryData = getOrCreateClassImpl(getPvdNamespacedNameForType<ArrayData>(), + DataTypeToPvdTypeMap<ArrayData>::BaseTypeEnum); + aryData.get32BitSize() = sizeof(ArrayData); + aryData.get32BitSizeInfo().mAlignment = sizeof(void*); + aryData.get64BitSize() = sizeof(ArrayData); + aryData.get64BitSizeInfo().mAlignment = sizeof(void*); + aryData.mLocked = true; + } +#define CREATE_BASIC_PVD_CLASS(type) \ + { \ + ClassDescImpl& cls = getOrCreateClassImpl(getPvdNamespacedNameForType<type>(), getPvdTypeForType<type>()); \ + cls.get32BitSize() = sizeof(type); \ + cls.get32BitSizeInfo().mAlignment = sizeof(type); \ + cls.get64BitSize() = sizeof(type); \ + cls.get64BitSizeInfo().mAlignment = sizeof(type); \ + cls.mLocked = true; \ + cls.mPackedUniformWidth = sizeof(type); \ + cls.mPackedClassType = getPvdTypeForType<type>(); \ + } + CREATE_BASIC_PVD_CLASS(int8_t) + CREATE_BASIC_PVD_CLASS(uint8_t) + CREATE_BASIC_PVD_CLASS(bool) + CREATE_BASIC_PVD_CLASS(int16_t) + CREATE_BASIC_PVD_CLASS(uint16_t) + CREATE_BASIC_PVD_CLASS(int32_t) + CREATE_BASIC_PVD_CLASS(uint32_t) + // CREATE_BASIC_PVD_CLASS(uint32_t) + CREATE_BASIC_PVD_CLASS(int64_t) + CREATE_BASIC_PVD_CLASS(uint64_t) + CREATE_BASIC_PVD_CLASS(float) + CREATE_BASIC_PVD_CLASS(double) +#undef CREATE_BASIC_PVD_CLASS + +#define CREATE_PTR_TYPE_PVD_CLASS(type, ptrType) \ + { \ + ClassDescImpl& cls = getOrCreateClassImpl(getPvdNamespacedNameForType<type>(), getPvdTypeForType<type>()); \ + cls.get32BitSize() = 4; \ + cls.get32BitSizeInfo().mAlignment = 4; \ + cls.get64BitSize() = 8; \ + cls.get64BitSizeInfo().mAlignment = 8; \ + cls.mLocked = true; \ + cls.addPtrOffset(PtrOffsetType::ptrType, 0, 0); \ + } + + CREATE_PTR_TYPE_PVD_CLASS(String, StringOffset) + CREATE_PTR_TYPE_PVD_CLASS(VoidPtr, VoidPtrOffset) + CREATE_PTR_TYPE_PVD_CLASS(StringHandle, StringOffset) + CREATE_PTR_TYPE_PVD_CLASS(ObjectRef, VoidPtrOffset) + +#undef CREATE_64BIT_ADJUST_PVD_CLASS + + int32_t fltClassType = getPvdTypeForType<float>(); + int32_t u32ClassType = getPvdTypeForType<uint32_t>(); + int32_t v3ClassType = getPvdTypeForType<PxVec3>(); + int32_t v4ClassType = getPvdTypeForType<PxVec4>(); + int32_t qtClassType = getPvdTypeForType<PxQuat>(); + { + ClassDescImpl& cls = + getOrCreateClassImpl(getPvdNamespacedNameForType<PvdColor>(), getPvdTypeForType<PvdColor>()); + createProperty(cls.mClassId, "r", "", getPvdTypeForType<uint8_t>(), PropertyType::Scalar); + createProperty(cls.mClassId, "g", "", getPvdTypeForType<uint8_t>(), PropertyType::Scalar); + createProperty(cls.mClassId, "b", "", getPvdTypeForType<uint8_t>(), PropertyType::Scalar); + createProperty(cls.mClassId, "a", "", getPvdTypeForType<uint8_t>(), PropertyType::Scalar); + PX_ASSERT(cls.get32BitSizeInfo().mAlignment == 1); + PX_ASSERT(cls.get32BitSize() == 4); + PX_ASSERT(cls.get64BitSizeInfo().mAlignment == 1); + PX_ASSERT(cls.get64BitSize() == 4); + PX_ASSERT(cls.mPackedUniformWidth == 1); + PX_ASSERT(cls.mPackedClassType == getPvdTypeForType<uint8_t>()); + cls.mLocked = true; + } + + { + ClassDescImpl& cls = getOrCreateClassImpl(getPvdNamespacedNameForType<PxVec2>(), getPvdTypeForType<PxVec2>()); + createProperty(cls.mClassId, "x", "", fltClassType, PropertyType::Scalar); + createProperty(cls.mClassId, "y", "", fltClassType, PropertyType::Scalar); + PX_ASSERT(cls.get32BitSizeInfo().mAlignment == 4); + PX_ASSERT(cls.get32BitSize() == 8); + PX_ASSERT(cls.get64BitSizeInfo().mAlignment == 4); + PX_ASSERT(cls.get64BitSize() == 8); + PX_ASSERT(cls.mPackedUniformWidth == 4); + PX_ASSERT(cls.mPackedClassType == fltClassType); + cls.mLocked = true; + } + { + ClassDescImpl& cls = getOrCreateClassImpl(getPvdNamespacedNameForType<PxVec3>(), getPvdTypeForType<PxVec3>()); + createProperty(cls.mClassId, "x", "", fltClassType, PropertyType::Scalar); + createProperty(cls.mClassId, "y", "", fltClassType, PropertyType::Scalar); + createProperty(cls.mClassId, "z", "", fltClassType, PropertyType::Scalar); + PX_ASSERT(cls.get32BitSizeInfo().mAlignment == 4); + PX_ASSERT(cls.get32BitSize() == 12); + PX_ASSERT(cls.get64BitSizeInfo().mAlignment == 4); + PX_ASSERT(cls.get64BitSize() == 12); + PX_ASSERT(cls.mPackedUniformWidth == 4); + PX_ASSERT(cls.mPackedClassType == fltClassType); + cls.mLocked = true; + } + { + ClassDescImpl& cls = getOrCreateClassImpl(getPvdNamespacedNameForType<PxVec4>(), getPvdTypeForType<PxVec4>()); + createProperty(cls.mClassId, "x", "", fltClassType, PropertyType::Scalar); + createProperty(cls.mClassId, "y", "", fltClassType, PropertyType::Scalar); + createProperty(cls.mClassId, "z", "", fltClassType, PropertyType::Scalar); + createProperty(cls.mClassId, "w", "", fltClassType, PropertyType::Scalar); + PX_ASSERT(cls.get32BitSizeInfo().mAlignment == 4); + PX_ASSERT(cls.get32BitSize() == 16); + PX_ASSERT(cls.get64BitSizeInfo().mAlignment == 4); + PX_ASSERT(cls.get64BitSize() == 16); + PX_ASSERT(cls.mPackedUniformWidth == 4); + PX_ASSERT(cls.mPackedClassType == fltClassType); + cls.mLocked = true; + } + + { + ClassDescImpl& cls = getOrCreateClassImpl(getPvdNamespacedNameForType<PxQuat>(), getPvdTypeForType<PxQuat>()); + createProperty(cls.mClassId, "x", "", fltClassType, PropertyType::Scalar); + createProperty(cls.mClassId, "y", "", fltClassType, PropertyType::Scalar); + createProperty(cls.mClassId, "z", "", fltClassType, PropertyType::Scalar); + createProperty(cls.mClassId, "w", "", fltClassType, PropertyType::Scalar); + PX_ASSERT(cls.get32BitSizeInfo().mAlignment == 4); + PX_ASSERT(cls.get32BitSize() == 16); + PX_ASSERT(cls.get64BitSizeInfo().mAlignment == 4); + PX_ASSERT(cls.get64BitSize() == 16); + PX_ASSERT(cls.mPackedUniformWidth == 4); + PX_ASSERT(cls.mPackedClassType == fltClassType); + cls.mLocked = true; + } + + { + ClassDescImpl& cls = + getOrCreateClassImpl(getPvdNamespacedNameForType<PxBounds3>(), getPvdTypeForType<PxBounds3>()); + createProperty(cls.mClassId, "minimum", "", v3ClassType, PropertyType::Scalar); + createProperty(cls.mClassId, "maximum", "", v3ClassType, PropertyType::Scalar); + PX_ASSERT(cls.get32BitSizeInfo().mAlignment == 4); + PX_ASSERT(cls.get32BitSize() == 24); + PX_ASSERT(cls.mPackedUniformWidth == 4); + PX_ASSERT(cls.mPackedClassType == fltClassType); + cls.mLocked = true; + } + + { + ClassDescImpl& cls = + getOrCreateClassImpl(getPvdNamespacedNameForType<PxTransform>(), getPvdTypeForType<PxTransform>()); + createProperty(cls.mClassId, "q", "", qtClassType, PropertyType::Scalar); + createProperty(cls.mClassId, "p", "", v3ClassType, PropertyType::Scalar); + PX_ASSERT(cls.get32BitSizeInfo().mAlignment == 4); + PX_ASSERT(cls.get32BitSize() == 28); + PX_ASSERT(cls.mPackedUniformWidth == 4); + PX_ASSERT(cls.mPackedClassType == fltClassType); + cls.mLocked = true; + } + + { + ClassDescImpl& cls = + getOrCreateClassImpl(getPvdNamespacedNameForType<PxMat33>(), getPvdTypeForType<PxMat33>()); + createProperty(cls.mClassId, "column0", "", v3ClassType, PropertyType::Scalar); + createProperty(cls.mClassId, "column1", "", v3ClassType, PropertyType::Scalar); + createProperty(cls.mClassId, "column2", "", v3ClassType, PropertyType::Scalar); + PX_ASSERT(cls.get32BitSizeInfo().mAlignment == 4); + PX_ASSERT(cls.get32BitSize() == 36); + PX_ASSERT(cls.mPackedUniformWidth == 4); + PX_ASSERT(cls.mPackedClassType == fltClassType); + cls.mLocked = true; + } + + { + ClassDescImpl& cls = + getOrCreateClassImpl(getPvdNamespacedNameForType<PxMat44>(), getPvdTypeForType<PxMat44>()); + createProperty(cls.mClassId, "column0", "", v4ClassType, PropertyType::Scalar); + createProperty(cls.mClassId, "column1", "", v4ClassType, PropertyType::Scalar); + createProperty(cls.mClassId, "column2", "", v4ClassType, PropertyType::Scalar); + createProperty(cls.mClassId, "column3", "", v4ClassType, PropertyType::Scalar); + PX_ASSERT(cls.get32BitSizeInfo().mAlignment == 4); + PX_ASSERT(cls.get32BitSize() == 64); + PX_ASSERT(cls.mPackedUniformWidth == 4); + PX_ASSERT(cls.mPackedClassType == fltClassType); + cls.mLocked = true; + } + + { + ClassDescImpl& cls = + getOrCreateClassImpl(getPvdNamespacedNameForType<U32Array4>(), getPvdTypeForType<U32Array4>()); + createProperty(cls.mClassId, "d0", "", u32ClassType, PropertyType::Scalar); + createProperty(cls.mClassId, "d1", "", u32ClassType, PropertyType::Scalar); + createProperty(cls.mClassId, "d2", "", u32ClassType, PropertyType::Scalar); + createProperty(cls.mClassId, "d3", "", u32ClassType, PropertyType::Scalar); + cls.mLocked = true; + } + } + virtual ~PvdObjectModelMetaDataImpl() + { + mStringTable->release(); + PVD_FOREACH(idx, mClasses.size()) + { + if(mClasses[idx] != NULL) + PVD_DELETE(mClasses[idx]); + } + mClasses.clear(); + PVD_FOREACH(idx, mProperties.size()) PVD_DELETE(mProperties[idx]); + mProperties.clear(); + PVD_FOREACH(idx, mPropertyMessages.size()) PVD_DELETE(mPropertyMessages[idx]); + mPropertyMessages.clear(); + } + + ClassDescImpl& getOrCreateClassImpl(const NamespacedName& nm, int32_t idx) + { + ClassDescImpl* impl(getClassImpl(idx)); + if(impl) + return *impl; + NamespacedName safeName(mStringTable->registerStr(nm.mNamespace), mStringTable->registerStr(nm.mName)); + while(idx >= int32_t(mClasses.size())) + mClasses.pushBack(NULL); + mClasses[uint32_t(idx)] = PVD_NEW(ClassDescImpl)(ClassDescription(safeName, idx)); + mNameToClasses.insert(nm, mClasses[uint32_t(idx)]); + mNextClassId = PxMax(mNextClassId, idx + 1); + return *mClasses[uint32_t(idx)]; + } + + ClassDescImpl& getOrCreateClassImpl(const NamespacedName& nm) + { + ClassDescImpl* retval = findClassImpl(nm); + if(retval) + return *retval; + return getOrCreateClassImpl(nm, nextClassId()); + } + virtual ClassDescription getOrCreateClass(const NamespacedName& nm) + { + return getOrCreateClassImpl(nm); + } + // get or create parent, lock parent. deriveFrom getOrCreatechild. + virtual bool deriveClass(const NamespacedName& parent, const NamespacedName& child) + { + ClassDescImpl& p(getOrCreateClassImpl(parent)); + ClassDescImpl& c(getOrCreateClassImpl(child)); + + if(c.mBaseClass >= 0) + { + PX_ASSERT(c.mBaseClass == p.mClassId); + return false; + } + p.mLocked = true; + c.mBaseClass = p.mClassId; + c.get32BitSizeInfo() = p.get32BitSizeInfo(); + c.get64BitSizeInfo() = p.get64BitSizeInfo(); + c.mPackedClassType = p.mPackedClassType; + c.mPackedUniformWidth = p.mPackedUniformWidth; + c.mRequiresDestruction = p.mRequiresDestruction; + c.m32OffsetArray = p.m32OffsetArray; + c.m64OffsetArray = p.m64OffsetArray; + // Add all the parent propertes to this class in the global name map. + for(ClassDescImpl* parent0 = &p; parent0 != NULL; parent0 = getClassImpl(parent0->mBaseClass)) + { + PVD_FOREACH(idx, parent0->mPropImps.size()) + mNameToProperties.insert(ClassPropertyName(c.mName, parent0->mPropImps[idx]->mName), parent0->mPropImps[idx]); + + if(parent0->mBaseClass < 0) + break; + } + + return true; + } + ClassDescImpl* findClassImpl(const NamespacedName& nm) const + { + const TNameToClassMap::Entry* entry(mNameToClasses.find(nm)); + if(entry) + return entry->second; + return NULL; + } + virtual Option<ClassDescription> findClass(const NamespacedName& nm) const + { + ClassDescImpl* retval = findClassImpl(nm); + if(retval) + return *retval; + return Option<ClassDescription>(); + } + + ClassDescImpl* getClassImpl(int32_t classId) const + { + if(classId < 0) + return NULL; + uint32_t idx = uint32_t(classId); + if(idx < mClasses.size()) + return mClasses[idx]; + return NULL; + } + + virtual Option<ClassDescription> getClass(int32_t classId) const + { + ClassDescImpl* impl(getClassImpl(classId)); + if(impl) + return *impl; + return None(); + } + + virtual ClassDescription* getClassPtr(int32_t classId) const + { + return getClassImpl(classId); + } + + virtual Option<ClassDescription> getParentClass(int32_t classId) const + { + ClassDescImpl* impl(getClassImpl(classId)); + if(impl == NULL) + return None(); + return getClass(impl->mBaseClass); + } + + virtual void lockClass(int32_t classId) + { + ClassDescImpl* impl(getClassImpl(classId)); + PX_ASSERT(impl); + if(impl) + impl->mLocked = true; + } + virtual uint32_t getNbClasses() const + { + uint32_t total = 0; + PVD_FOREACH(idx, mClasses.size()) if(mClasses[idx])++ total; + return total; + } + + virtual uint32_t getClasses(ClassDescription* outClasses, uint32_t requestCount, uint32_t startIndex = 0) const + { + uint32_t classCount(getNbClasses()); + startIndex = PxMin(classCount, startIndex); + uint32_t retAmount = PxMin(requestCount, classCount - startIndex); + + uint32_t idx = 0; + while(startIndex) + { + if(mClasses[idx] != NULL) + --startIndex; + ++idx; + } + + uint32_t inserted = 0; + uint32_t classesSize = static_cast<uint32_t>(mClasses.size()); + while(inserted < retAmount && idx < classesSize) + { + if(mClasses[idx] != NULL) + { + outClasses[inserted] = *mClasses[idx]; + ++inserted; + } + ++idx; + } + return inserted; + } + + uint32_t updateByteSizeAndGetPropertyAlignment(ClassDescriptionSizeInfo& dest, const ClassDescriptionSizeInfo& src) + { + uint32_t alignment = src.mAlignment; + dest.mAlignment = PxMax(dest.mAlignment, alignment); + uint32_t offset = align(dest.mDataByteSize, alignment); + dest.mDataByteSize = offset + src.mByteSize; + dest.mByteSize = align(dest.mDataByteSize, dest.mAlignment); + return offset; + } + + void transferPtrOffsets(ClassDescriptionSizeInfo& destInfo, Array<PtrOffset>& destArray, + const Array<PtrOffset>& src, uint32_t offset) + { + PVD_FOREACH(idx, src.size()) + destArray.pushBack(PtrOffset(src[idx].mOffsetType, src[idx].mOffset + offset)); + destInfo.mPtrOffsets = DataRef<PtrOffset>(destArray.begin(), destArray.end()); + } + + virtual Option<PropertyDescription> createProperty(int32_t classId, String name, String semantic, int32_t datatype, + PropertyType::Enum propertyType) + { + ClassDescImpl* cls(getClassImpl(classId)); + PX_ASSERT(cls); + if(!cls) + return None(); + if(cls->mLocked) + { + PX_ASSERT(false); + return None(); + } + PropDescImpl* impl(cls->findProperty(name)); + // duplicate property definition + if(impl) + { + PX_ASSERT(false); + return None(); + } + if(datatype == getPvdTypeForType<String>()) + { + PX_ASSERT(false); + return None(); + } + // The datatype for this property has not been declared. + ClassDescImpl* propDType(getClassImpl(datatype)); + PX_ASSERT(propDType); + if(!propDType) + return None(); + NamespacedName propClsName(propDType->mName); + int32_t propPackedWidth = propDType->mPackedUniformWidth; + int32_t propPackedType = propDType->mPackedClassType; + // The implications of properties being complex types aren't major + //*until* you start trying to undue a property event that set values + // of those complex types. Then things just get too complex. + if(propDType->mRequiresDestruction) + { + PX_ASSERT(false); + return None(); + } + bool requiresDestruction = propDType->mRequiresDestruction || cls->mRequiresDestruction; + + if(propertyType == PropertyType::Array) + { + int32_t tempId = DataTypeToPvdTypeMap<ArrayData>::BaseTypeEnum; + propDType = getClassImpl(tempId); + PX_ASSERT(propDType); + if(!propDType) + return None(); + requiresDestruction = true; + } + uint32_t offset32 = updateByteSizeAndGetPropertyAlignment(cls->get32BitSizeInfo(), propDType->get32BitSizeInfo()); + uint32_t offset64 = updateByteSizeAndGetPropertyAlignment(cls->get64BitSizeInfo(), propDType->get64BitSizeInfo()); + transferPtrOffsets(cls->get32BitSizeInfo(), cls->m32OffsetArray, propDType->m32OffsetArray, offset32); + transferPtrOffsets(cls->get64BitSizeInfo(), cls->m64OffsetArray, propDType->m64OffsetArray, offset64); + propDType->mLocked = true; // Can't add members to the property type. + cls->mRequiresDestruction = requiresDestruction; + int32_t propId = int32_t(mProperties.size()); + PropertyDescription newDesc(cls->mName, cls->mClassId, name, semantic, datatype, propClsName, propertyType, + propId, offset32, offset64); + mProperties.pushBack(PVD_NEW(PropDescImpl)(newDesc, *mStringTable)); + mNameToProperties.insert(ClassPropertyName(cls->mName, mProperties.back()->mName), mProperties.back()); + cls->addProperty(mProperties.back()); + bool firstProp = cls->mPropImps.size() == 1; + + if(firstProp) + { + cls->mPackedUniformWidth = propPackedWidth; + cls->mPackedClassType = propPackedType; + } + else + { + bool packed = (propPackedWidth > 0) && (cls->get32BitSizeInfo().mDataByteSize % propPackedWidth) == 0; + if(cls->mPackedClassType >= 0) // maybe uncheck packed class type + { + if(propPackedType < 0 || cls->mPackedClassType != propPackedType + // Object refs require conversion from stream to db id + || + datatype == getPvdTypeForType<ObjectRef>() + // Strings also require conversion from stream to db id. + || + datatype == getPvdTypeForType<StringHandle>() || packed == false) + cls->mPackedClassType = -1; + } + if(cls->mPackedUniformWidth >= 0) // maybe uncheck packed class width + { + if(propPackedWidth < 0 || cls->mPackedUniformWidth != propPackedWidth + // object refs, because they require special treatment during parsing, + // cannot be packed + || + datatype == getPvdTypeForType<ObjectRef>() + // Likewise, string handles are special because the data needs to be sent *after* + // the + || + datatype == getPvdTypeForType<StringHandle>() || packed == false) + cls->mPackedUniformWidth = -1; // invalid packed width. + } + } + return *mProperties.back(); + } + + PropDescImpl* findPropImpl(const NamespacedName& clsName, String prop) const + { + const TNameToPropMap::Entry* entry = mNameToProperties.find(ClassPropertyName(clsName, prop)); + if(entry) + return entry->second; + return NULL; + } + virtual Option<PropertyDescription> findProperty(const NamespacedName& cls, String propName) const + { + PropDescImpl* prop(findPropImpl(cls, propName)); + if(prop) + return *prop; + return None(); + } + + virtual Option<PropertyDescription> findProperty(int32_t clsId, String propName) const + { + ClassDescImpl* cls(getClassImpl(clsId)); + PX_ASSERT(cls); + if(!cls) + return None(); + PropDescImpl* prop(findPropImpl(cls->mName, propName)); + if(prop) + return *prop; + return None(); + } + + PropDescImpl* getPropertyImpl(int32_t propId) const + { + PX_ASSERT(propId >= 0); + if(propId < 0) + return NULL; + uint32_t val = uint32_t(propId); + if(val >= mProperties.size()) + { + PX_ASSERT(false); + return NULL; + } + return mProperties[val]; + } + + virtual Option<PropertyDescription> getProperty(int32_t propId) const + { + PropDescImpl* impl(getPropertyImpl(propId)); + if(impl) + return *impl; + return None(); + } + + virtual void setNamedPropertyValues(DataRef<NamedValue> values, int32_t propId) + { + PropDescImpl* impl(getPropertyImpl(propId)); + if(impl) + { + impl->mValueNames.resize(values.size()); + PVD_FOREACH(idx, values.size()) impl->mValueNames[idx] = values[idx]; + } + } + + virtual DataRef<NamedValue> getNamedPropertyValues(int32_t propId) const + { + PropDescImpl* impl(getPropertyImpl(propId)); + if(impl) + { + return toDataRef(impl->mValueNames); + } + return DataRef<NamedValue>(); + } + + virtual uint32_t getNbProperties(int32_t classId) const + { + uint32_t retval = 0; + for(ClassDescImpl* impl(getClassImpl(classId)); impl; impl = getClassImpl(impl->mBaseClass)) + { + retval += impl->mPropImps.size(); + if(impl->mBaseClass < 0) + break; + } + return retval; + } + + // Properties need to be returned in base class order, so this requires a recursive function. + uint32_t getPropertiesImpl(int32_t classId, PropertyDescription*& outBuffer, uint32_t& numItems, + uint32_t& startIdx) const + { + ClassDescImpl* impl = getClassImpl(classId); + if(impl) + { + uint32_t retval = 0; + if(impl->mBaseClass >= 0) + retval = getPropertiesImpl(impl->mBaseClass, outBuffer, numItems, startIdx); + + uint32_t localStart = PxMin(impl->mPropImps.size(), startIdx); + uint32_t localNumItems = PxMin(numItems, impl->mPropImps.size() - localStart); + PVD_FOREACH(idx, localNumItems) + { + outBuffer[idx] = *impl->mPropImps[localStart + idx]; + } + + startIdx -= localStart; + numItems -= localNumItems; + outBuffer += localNumItems; + return retval + localNumItems; + } + return 0; + } + + virtual uint32_t getProperties(int32_t classId, PropertyDescription* outBuffer, uint32_t numItems, + uint32_t startIdx) const + { + return getPropertiesImpl(classId, outBuffer, numItems, startIdx); + } + + virtual MarshalQueryResult checkMarshalling(int32_t srcClsId, int32_t dstClsId) const + { + Option<ClassDescription> propTypeOpt(getClass(dstClsId)); + if(propTypeOpt.hasValue() == false) + { + PX_ASSERT(false); + return MarshalQueryResult(); + } + const ClassDescription& propType(propTypeOpt); + + Option<ClassDescription> incomingTypeOpt(getClass(srcClsId)); + if(incomingTypeOpt.hasValue() == false) + { + PX_ASSERT(false); + return MarshalQueryResult(); + } + const ClassDescription& incomingType(incomingTypeOpt); + // Can only marshal simple things at this point in time. + bool needsMarshalling = false; + bool canMarshal = false; + TSingleMarshaller single = NULL; + TBlockMarshaller block = NULL; + if(incomingType.mClassId != propType.mClassId) + { + // Check that marshalling is even possible. + if((incomingType.mPackedUniformWidth >= 0 && propType.mPackedUniformWidth >= 0) == false) + { + PX_ASSERT(false); + return MarshalQueryResult(); + } + + int32_t srcType = incomingType.mPackedClassType; + int32_t dstType = propType.mPackedClassType; + + int32_t srcWidth = incomingType.mPackedUniformWidth; + int32_t dstWidth = propType.mPackedUniformWidth; + canMarshal = getMarshalOperators(single, block, srcType, dstType); + if(srcWidth == dstWidth) + needsMarshalling = canMarshal; // If the types are the same width, we assume we can convert between some + // of them seamlessly (uint16_t, int16_t) + else + { + needsMarshalling = true; + // If we can't marshall and we have to then we can't set the property value. + // This indicates that the src and dest are different properties and we don't + // know how to convert between them. + if(!canMarshal) + { + PX_ASSERT(false); + return MarshalQueryResult(); + } + } + } + return MarshalQueryResult(srcClsId, dstClsId, canMarshal, needsMarshalling, block); + } + + PropertyMessageDescriptionImpl* findPropertyMessageImpl(const NamespacedName& messageName) const + { + const TNameToPropertyMessageMap::Entry* entry = mPropertyMessageMap.find(messageName); + if(entry) + return entry->second; + return NULL; + } + + PropertyMessageDescriptionImpl* getPropertyMessageImpl(int32_t msg) const + { + int32_t msgCount = int32_t(mPropertyMessages.size()); + if(msg >= 0 && msg < msgCount) + return mPropertyMessages[uint32_t(msg)]; + return NULL; + } + + virtual Option<PropertyMessageDescription> createPropertyMessage(const NamespacedName& clsName, + const NamespacedName& messageName, + DataRef<PropertyMessageArg> entries, + uint32_t messageSize) + { + PropertyMessageDescriptionImpl* existing(findPropertyMessageImpl(messageName)); + if(existing) + { + PX_ASSERT(false); + return None(); + } + ClassDescImpl* cls = findClassImpl(clsName); + PX_ASSERT(cls); + if(!cls) + return None(); + int32_t msgId = int32_t(mPropertyMessages.size()); + PropertyMessageDescriptionImpl* newMessage = PVD_NEW(PropertyMessageDescriptionImpl)( + PropertyMessageDescription(mStringTable->registerName(clsName), cls->mClassId, + mStringTable->registerName(messageName), msgId, messageSize)); + uint32_t calculatedSize = 0; + PVD_FOREACH(idx, entries.size()) + { + PropertyMessageArg entry(entries[idx]); + ClassDescImpl* dtypeCls = findClassImpl(entry.mDatatypeName); + if(dtypeCls == NULL) + { + PX_ASSERT(false); + goto DestroyNewMessage; + } + ClassDescriptionSizeInfo dtypeInfo(dtypeCls->get32BitSizeInfo()); + uint32_t incomingSize = dtypeInfo.mByteSize; + if(entry.mByteSize < incomingSize) + { + PX_ASSERT(false); + goto DestroyNewMessage; + } + + calculatedSize = PxMax(calculatedSize, entry.mMessageOffset + entry.mByteSize); + if(calculatedSize > messageSize) + { + PX_ASSERT(false); + goto DestroyNewMessage; + } + + Option<PropertyDescription> propName(findProperty(cls->mClassId, entry.mPropertyName)); + if(propName.hasValue() == false) + { + PX_ASSERT(false); + goto DestroyNewMessage; + } + + Option<ClassDescription> propCls(getClass(propName.getValue().mDatatype)); + if(propCls.hasValue() == false) + { + PX_ASSERT(false); + goto DestroyNewMessage; + } + + PropertyMessageEntryImpl newEntry(PropertyMessageEntry( + propName, dtypeCls->mName, dtypeCls->mClassId, entry.mMessageOffset, incomingSize, dtypeInfo.mByteSize)); + newMessage->addEntry(newEntry); + + if(newEntry.mDatatypeId == getPvdTypeForType<String>()) + newMessage->mStringOffsetArray.pushBack(entry.mMessageOffset); + + // property messages cannot be marshalled at this time. + if(newEntry.mDatatypeId != getPvdTypeForType<String>() && newEntry.mDatatypeId != getPvdTypeForType<VoidPtr>()) + { + MarshalQueryResult marshalInfo = checkMarshalling(newEntry.mDatatypeId, newEntry.mProperty.mDatatype); + if(marshalInfo.needsMarshalling) + { + PX_ASSERT(false); + goto DestroyNewMessage; + } + } + } + + if(newMessage) + { + newMessage->mStringOffsets = + DataRef<uint32_t>(newMessage->mStringOffsetArray.begin(), newMessage->mStringOffsetArray.end()); + mPropertyMessages.pushBack(newMessage); + mPropertyMessageMap.insert(messageName, newMessage); + return *newMessage; + } + + DestroyNewMessage: + if(newMessage) + PVD_DELETE(newMessage); + + return None(); + } + virtual Option<PropertyMessageDescription> findPropertyMessage(const NamespacedName& msgName) const + { + PropertyMessageDescriptionImpl* desc(findPropertyMessageImpl(msgName)); + if(desc) + return *desc; + return None(); + } + + virtual Option<PropertyMessageDescription> getPropertyMessage(int32_t msgId) const + { + PropertyMessageDescriptionImpl* desc(getPropertyMessageImpl(msgId)); + if(desc) + return *desc; + return None(); + } + + virtual uint32_t getNbPropertyMessages() const + { + return mPropertyMessages.size(); + } + + virtual uint32_t getPropertyMessages(PropertyMessageDescription* msgBuf, uint32_t bufLen, uint32_t startIdx = 0) const + { + startIdx = PxMin(startIdx, getNbPropertyMessages()); + bufLen = PxMin(bufLen, getNbPropertyMessages() - startIdx); + PVD_FOREACH(idx, bufLen) msgBuf[idx] = *mPropertyMessages[idx + startIdx]; + return bufLen; + } + + struct MetaDataWriter + { + const PvdObjectModelMetaDataImpl& mMetaData; + PvdOutputStream& mStream; + MetaDataWriter(const PvdObjectModelMetaDataImpl& meta, PvdOutputStream& stream) + : mMetaData(meta), mStream(stream) + { + } + + void streamify(NamespacedName& type) + { + mStream << mMetaData.mStringTable->strToHandle(type.mNamespace); + mStream << mMetaData.mStringTable->strToHandle(type.mName); + } + void streamify(String& type) + { + mStream << mMetaData.mStringTable->strToHandle(type); + } + void streamify(int32_t& type) + { + mStream << type; + } + void streamify(uint32_t& type) + { + mStream << type; + } + void streamify(uint8_t type) + { + mStream << type; + } + void streamify(bool type) + { + streamify( uint8_t(type)); + } + void streamify(PropertyType::Enum type) + { + uint32_t val = static_cast<uint32_t>(type); + mStream << val; + } + void streamify(NamedValue& type) + { + streamify(type.mValue); + streamify(type.mName); + } + void streamifyLinks(PropDescImpl* prop) + { + streamify(prop->mPropertyId); + } + void streamify(PropertyDescription& prop) + { + streamify(prop.mPropertyId); + } + void streamify(PropertyMessageEntryImpl& prop) + { + prop.serialize(*this); + } + void streamify(PtrOffset& off) + { + uint32_t type = off.mOffsetType; + mStream << type; + mStream << off.mOffset; + } + template <typename TDataType> + void streamify(TDataType* type) + { + int32_t existMarker = type ? 1 : 0; + mStream << existMarker; + if(type) + type->serialize(*this); + } + template <typename TArrayType> + void streamify(const Array<TArrayType>& type) + { + mStream << static_cast<uint32_t>(type.size()); + PVD_FOREACH(idx, type.size()) streamify(const_cast<TArrayType&>(type[idx])); + } + template <typename TArrayType> + void streamifyLinks(const Array<TArrayType>& type) + { + mStream << static_cast<uint32_t>(type.size()); + PVD_FOREACH(idx, type.size()) streamifyLinks(const_cast<TArrayType&>(type[idx])); + } + + private: + MetaDataWriter& operator=(const MetaDataWriter&); + }; + + template <typename TStreamType> + struct MetaDataReader + { + PvdObjectModelMetaDataImpl& mMetaData; + TStreamType& mStream; + MetaDataReader(PvdObjectModelMetaDataImpl& meta, TStreamType& stream) : mMetaData(meta), mStream(stream) + { + } + + void streamify(NamespacedName& type) + { + streamify(type.mNamespace); + streamify(type.mName); + } + + void streamify(String& type) + { + uint32_t handle; + mStream >> handle; + type = mMetaData.mStringTable->handleToStr(handle); + } + void streamify(int32_t& type) + { + mStream >> type; + } + void streamify(uint32_t& type) + { + mStream >> type; + } + void streamify(bool& type) + { + uint8_t data; + mStream >> data; + type = data ? true : false; + } + + void streamify(PropertyType::Enum& type) + { + uint32_t val; + mStream >> val; + type = static_cast<PropertyType::Enum>(val); + } + void streamify(NamedValue& type) + { + streamify(type.mValue); + streamify(type.mName); + } + void streamify(PropertyMessageEntryImpl& type) + { + type.serialize(*this); + } + void streamify(PtrOffset& off) + { + uint32_t type; + mStream >> type; + mStream >> off.mOffset; + off.mOffsetType = static_cast<PtrOffsetType::Enum>(type); + } + void streamifyLinks(PropDescImpl*& prop) + { + int32_t propId; + streamify(propId); + prop = mMetaData.getPropertyImpl(propId); + } + void streamify(PropertyDescription& prop) + { + streamify(prop.mPropertyId); + prop = mMetaData.getProperty(prop.mPropertyId); + } + template <typename TDataType> + void streamify(TDataType*& type) + { + uint32_t existMarker; + mStream >> existMarker; + if(existMarker) + { + TDataType* newType = PVD_NEW(TDataType)(); + newType->serialize(*this); + type = newType; + } + else + type = NULL; + } + template <typename TArrayType> + void streamify(Array<TArrayType>& type) + { + uint32_t typeSize; + mStream >> typeSize; + type.resize(typeSize); + PVD_FOREACH(idx, type.size()) streamify(type[idx]); + } + template <typename TArrayType> + void streamifyLinks(Array<TArrayType>& type) + { + uint32_t typeSize; + mStream >> typeSize; + type.resize(typeSize); + PVD_FOREACH(idx, type.size()) streamifyLinks(type[idx]); + } + + private: + MetaDataReader& operator=(const MetaDataReader&); + }; + + virtual void write(PvdOutputStream& stream) const + { + stream << getCurrentPvdObjectModelVersion(); + stream << mNextClassId; + mStringTable->write(stream); + MetaDataWriter writer(*this, stream); + writer.streamify(mProperties); + writer.streamify(mClasses); + writer.streamify(mPropertyMessages); + } + + template <typename TReaderType> + void read(TReaderType& stream) + { + uint32_t version; + stream >> version; + stream >> mNextClassId; + mStringTable->read(stream); + MetaDataReader<TReaderType> reader(*this, stream); + reader.streamify(mProperties); + reader.streamify(mClasses); + reader.streamify(mPropertyMessages); + + mNameToClasses.clear(); + mNameToProperties.clear(); + mPropertyMessageMap.clear(); + PVD_FOREACH(i, mClasses.size()) + { + ClassDescImpl* cls(mClasses[i]); + if(cls == NULL) + continue; + mNameToClasses.insert(cls->mName, mClasses[i]); + uint32_t propCount = getNbProperties(cls->mClassId); + PropertyDescription descs[16]; + uint32_t offset = 0; + for(uint32_t idx = 0; idx < propCount; idx = offset) + { + uint32_t numProps = getProperties(cls->mClassId, descs, 16, offset); + offset += numProps; + for(uint32_t propIdx = 0; propIdx < numProps; ++propIdx) + { + PropDescImpl* prop = getPropertyImpl(descs[propIdx].mPropertyId); + if(prop) + mNameToProperties.insert(ClassPropertyName(cls->mName, prop->mName), prop); + } + } + } + PVD_FOREACH(idx, mPropertyMessages.size()) + mPropertyMessageMap.insert(mPropertyMessages[idx]->mMessageName, mPropertyMessages[idx]); + } + + virtual PvdObjectModelMetaData& clone() const + { + MemPvdOutputStream tempStream("PvdObjectModelMetaData::clone"); + write(tempStream); + MemPvdInputStream inStream(tempStream); + return create(inStream); + } + + virtual StringTable& getStringTable() const + { + return *mStringTable; + } + virtual void addRef() + { + ++mRefCount; + } + virtual void release() + { + if(mRefCount) + --mRefCount; + if(!mRefCount) + PVD_DELETE(this); + } +}; +} + +uint32_t PvdObjectModelMetaData::getCurrentPvdObjectModelVersion() +{ + return 1; +} + +PvdObjectModelMetaData& PvdObjectModelMetaData::create() +{ + PvdObjectModelMetaDataImpl& retval(*PVD_NEW(PvdObjectModelMetaDataImpl)()); + retval.initialize(); + return retval; +} + +PvdObjectModelMetaData& PvdObjectModelMetaData::create(PvdInputStream& stream) +{ + PvdObjectModelMetaDataImpl& retval(*PVD_NEW(PvdObjectModelMetaDataImpl)()); + retval.read(stream); + return retval; +} + +StringTable& StringTable::create() +{ + return *PVD_NEW(StringTableImpl)(); +} diff --git a/PxShared/src/pvd/src/PxPvdObjectModelMetaData.h b/PxShared/src/pvd/src/PxPvdObjectModelMetaData.h new file mode 100644 index 0000000..7357708 --- /dev/null +++ b/PxShared/src/pvd/src/PxPvdObjectModelMetaData.h @@ -0,0 +1,495 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +#ifndef PXPVDSDK_PXPVDOBJECTMODELMETADATA_H +#define PXPVDSDK_PXPVDOBJECTMODELMETADATA_H + +#include "foundation/PxAssert.h" +#include "PxPvdObjectModelBaseTypes.h" +#include "PxPvdBits.h" + +namespace physx +{ +namespace pvdsdk +{ + +class PvdInputStream; +class PvdOutputStream; + +struct PropertyDescription +{ + NamespacedName mOwnerClassName; + int32_t mOwnerClassId; + String mName; + String mSemantic; + // The datatype this property corresponds to. + int32_t mDatatype; + // The name of the datatype + NamespacedName mDatatypeName; + // Scalar or array. + PropertyType::Enum mPropertyType; + // No other property under any class has this id, it is DB-unique. + int32_t mPropertyId; + // Offset in bytes into the object's data section where this property starts. + uint32_t m32BitOffset; + // Offset in bytes into the object's data section where this property starts. + uint32_t m64BitOffset; + + PropertyDescription(const NamespacedName& clsName, int32_t classId, String name, String semantic, int32_t datatype, + const NamespacedName& datatypeName, PropertyType::Enum propType, int32_t propId, + uint32_t offset32, uint32_t offset64) + : mOwnerClassName(clsName) + , mOwnerClassId(classId) + , mName(name) + , mSemantic(semantic) + , mDatatype(datatype) + , mDatatypeName(datatypeName) + , mPropertyType(propType) + , mPropertyId(propId) + , m32BitOffset(offset32) + , m64BitOffset(offset64) + { + } + PropertyDescription() + : mOwnerClassId(-1) + , mName("") + , mSemantic("") + , mDatatype(-1) + , mPropertyType(PropertyType::Unknown) + , mPropertyId(-1) + , m32BitOffset(0) + , m64BitOffset(0) + + { + } + + virtual ~PropertyDescription() + { + } +}; + +struct PtrOffsetType +{ + enum Enum + { + UnknownOffset, + VoidPtrOffset, + StringOffset + }; +}; + +struct PtrOffset +{ + PtrOffsetType::Enum mOffsetType; + uint32_t mOffset; + PtrOffset(PtrOffsetType::Enum type, uint32_t offset) : mOffsetType(type), mOffset(offset) + { + } + PtrOffset() : mOffsetType(PtrOffsetType::UnknownOffset), mOffset(0) + { + } +}; + +inline uint32_t align(uint32_t offset, uint32_t alignment) +{ + uint32_t startOffset = offset; + uint32_t alignmentMask = ~(alignment - 1); + offset = (offset + alignment - 1) & alignmentMask; + PX_ASSERT(offset >= startOffset && (offset % alignment) == 0); + (void)startOffset; + return offset; +} + +struct ClassDescriptionSizeInfo +{ + // The size of the data section of this object, padded to alignment. + uint32_t mByteSize; + // The last data member goes to here. + uint32_t mDataByteSize; + // Alignment in bytes of the data section of this object. + uint32_t mAlignment; + // the offsets of string handles in the binary value of this class + DataRef<PtrOffset> mPtrOffsets; + ClassDescriptionSizeInfo() : mByteSize(0), mDataByteSize(0), mAlignment(0) + { + } +}; + +struct ClassDescription +{ + NamespacedName mName; + // No other class has this id, it is DB-unique + int32_t mClassId; + // Only single derivation supported. + int32_t mBaseClass; + // If this class has properties that are of uniform type, then we note that. + // This means that when deserialization an array of these objects we can just use + // single function to endian convert the entire mess at once. + int32_t mPackedUniformWidth; + // If this class is composed uniformly of members of a given type + // Or all of its properties are composed uniformly of members of + // a give ntype, then this class's packed type is that type. + // PxTransform's packed type would be float. + int32_t mPackedClassType; + // 0: 32Bit 1: 64Bit + ClassDescriptionSizeInfo mSizeInfo[2]; + // No further property additions allowed. + bool mLocked; + // True when this datatype has an array on it that needs to be + // separately deleted. + bool mRequiresDestruction; + + ClassDescription(NamespacedName name, int32_t id) + : mName(name) + , mClassId(id) + , mBaseClass(-1) + , mPackedUniformWidth(-1) + , mPackedClassType(-1) + , mLocked(false) + , mRequiresDestruction(false) + { + } + ClassDescription() + : mClassId(-1), mBaseClass(-1), mPackedUniformWidth(-1), mPackedClassType(-1), mLocked(false), mRequiresDestruction(false) + { + } + virtual ~ClassDescription() + { + } + + ClassDescriptionSizeInfo& get32BitSizeInfo() + { + return mSizeInfo[0]; + } + ClassDescriptionSizeInfo& get64BitSizeInfo() + { + return mSizeInfo[1]; + } + uint32_t& get32BitSize() + { + return get32BitSizeInfo().mByteSize; + } + uint32_t& get64BitSize() + { + return get64BitSizeInfo().mByteSize; + } + + uint32_t get32BitSize() const + { + return mSizeInfo[0].mByteSize; + } + const ClassDescriptionSizeInfo& getNativeSizeInfo() const + { + return mSizeInfo[(sizeof(void*) >> 2) - 1]; + } + uint32_t getNativeSize() const + { + return getNativeSizeInfo().mByteSize; + } +}; + +struct MarshalQueryResult +{ + int32_t srcType; + int32_t dstType; + // If canMarshal != needsMarshalling we have a problem. + bool canMarshal; + bool needsMarshalling; + // Non null if marshalling is possible. + TBlockMarshaller marshaller; + MarshalQueryResult(int32_t _srcType = -1, int32_t _dstType = -1, bool _canMarshal = false, bool _needs = false, + TBlockMarshaller _m = NULL) + : srcType(_srcType), dstType(_dstType), canMarshal(_canMarshal), needsMarshalling(_needs), marshaller(_m) + { + } +}; + +struct PropertyMessageEntry +{ + PropertyDescription mProperty; + NamespacedName mDatatypeName; + // datatype of the data in the message. + int32_t mDatatypeId; + // where in the message this property starts. + uint32_t mMessageOffset; + // size of this entry object + uint32_t mByteSize; + + // If the chain of properties doesn't have any array properties this indicates the + uint32_t mDestByteSize; + + PropertyMessageEntry(PropertyDescription propName, NamespacedName dtypeName, int32_t dtype, uint32_t messageOff, + uint32_t byteSize, uint32_t destByteSize) + : mProperty(propName) + , mDatatypeName(dtypeName) + , mDatatypeId(dtype) + , mMessageOffset(messageOff) + , mByteSize(byteSize) + , mDestByteSize(destByteSize) + { + } + PropertyMessageEntry() : mDatatypeId(-1), mMessageOffset(0), mByteSize(0), mDestByteSize(0) + { + } +}; + +// Create a struct that defines a subset of the properties on an object. +struct PropertyMessageDescription +{ + NamespacedName mClassName; + // No other class has this id, it is DB-unique + int32_t mClassId; + NamespacedName mMessageName; + int32_t mMessageId; + DataRef<PropertyMessageEntry> mProperties; + uint32_t mMessageByteSize; + // Offsets into the property message where const char* items are. + DataRef<uint32_t> mStringOffsets; + PropertyMessageDescription(const NamespacedName& nm, int32_t clsId, const NamespacedName& msgName, int32_t msgId, + uint32_t msgSize) + : mClassName(nm), mClassId(clsId), mMessageName(msgName), mMessageId(msgId), mMessageByteSize(msgSize) + { + } + PropertyMessageDescription() : mClassId(-1), mMessageId(-1), mMessageByteSize(0) + { + } + virtual ~PropertyMessageDescription() + { + } +}; + +class StringTable +{ + protected: + virtual ~StringTable() + { + } + + public: + virtual uint32_t getNbStrs() = 0; + virtual uint32_t getStrs(const char** outStrs, uint32_t bufLen, uint32_t startIdx = 0) = 0; + virtual const char* registerStr(const char* str, bool& outAdded) = 0; + const char* registerStr(const char* str) + { + bool ignored; + return registerStr(str, ignored); + } + virtual StringHandle strToHandle(const char* str) = 0; + virtual const char* handleToStr(uint32_t hdl) = 0; + virtual void release() = 0; + + static StringTable& create(); +}; + +struct None +{ +}; + +template <typename T> +class Option +{ + T mValue; + bool mHasValue; + + public: + Option(const T& val) : mValue(val), mHasValue(true) + { + } + Option(None nothing = None()) : mHasValue(false) + { + (void)nothing; + } + Option(const Option& other) : mValue(other.mValue), mHasValue(other.mHasValue) + { + } + Option& operator=(const Option& other) + { + mValue = other.mValue; + mHasValue = other.mHasValue; + return *this; + } + bool hasValue() const + { + return mHasValue; + } + const T& getValue() const + { + PX_ASSERT(hasValue()); + return mValue; + } + T& getValue() + { + PX_ASSERT(hasValue()); + return mValue; + } + operator const T&() const + { + return getValue(); + } + operator T&() + { + return getValue(); + } + T* operator->() + { + return &getValue(); + } + const T* operator->() const + { + return &getValue(); + } +}; + +/** + * Create new classes and add properties to some existing ones. + * The default classes are created already, the simple types + * along with the basic math types. + * (uint8_t, int8_t, etc ) + * (PxVec3, PxQuat, PxTransform, PxMat33, PxMat34, PxMat44) + */ +class PvdObjectModelMetaData +{ + protected: + virtual ~PvdObjectModelMetaData() + { + } + + public: + virtual ClassDescription getOrCreateClass(const NamespacedName& nm) = 0; + // get or create parent, lock parent. deriveFrom getOrCreatechild. + virtual bool deriveClass(const NamespacedName& parent, const NamespacedName& child) = 0; + virtual Option<ClassDescription> findClass(const NamespacedName& nm) const = 0; + template <typename TDataType> + Option<ClassDescription> findClass() + { + return findClass(getPvdNamespacedNameForType<TDataType>()); + } + virtual Option<ClassDescription> getClass(int32_t classId) const = 0; + virtual ClassDescription* getClassPtr(int32_t classId) const = 0; + + virtual Option<ClassDescription> getParentClass(int32_t classId) const = 0; + bool isDerivedFrom(int32_t classId, int32_t parentClass) const + { + if(classId == parentClass) + return true; + ClassDescription* p = getClassPtr(getClassPtr(classId)->mBaseClass); + while(p != NULL) + { + if(p->mClassId == parentClass) + return true; + p = getClassPtr(p->mBaseClass); + } + return false; + } + + virtual void lockClass(int32_t classId) = 0; + + virtual uint32_t getNbClasses() const = 0; + virtual uint32_t getClasses(ClassDescription* outClasses, uint32_t requestCount, uint32_t startIndex = 0) const = 0; + + // Create a nested property. + // This way you can have obj.p.x without explicity defining the class p. + virtual Option<PropertyDescription> createProperty(int32_t classId, String name, String semantic, int32_t datatype, + PropertyType::Enum propertyType = PropertyType::Scalar) = 0; + Option<PropertyDescription> createProperty(NamespacedName clsId, String name, String semantic, NamespacedName dtype, + PropertyType::Enum propertyType = PropertyType::Scalar) + { + return createProperty(findClass(clsId)->mClassId, name, semantic, findClass(dtype)->mClassId, propertyType); + } + Option<PropertyDescription> createProperty(NamespacedName clsId, String name, NamespacedName dtype, + PropertyType::Enum propertyType = PropertyType::Scalar) + { + return createProperty(findClass(clsId)->mClassId, name, "", findClass(dtype)->mClassId, propertyType); + } + Option<PropertyDescription> createProperty(int32_t clsId, String name, int32_t dtype, + PropertyType::Enum propertyType = PropertyType::Scalar) + { + return createProperty(clsId, name, "", dtype, propertyType); + } + template <typename TDataType> + Option<PropertyDescription> createProperty(int32_t clsId, String name, String semantic = "", + PropertyType::Enum propertyType = PropertyType::Scalar) + { + return createProperty(clsId, name, semantic, getPvdNamespacedNameForType<TDataType>(), propertyType); + } + virtual Option<PropertyDescription> findProperty(const NamespacedName& cls, String prop) const = 0; + virtual Option<PropertyDescription> findProperty(int32_t clsId, String prop) const = 0; + virtual Option<PropertyDescription> getProperty(int32_t propId) const = 0; + virtual void setNamedPropertyValues(DataRef<NamedValue> values, int32_t propId) = 0; + // for enumerations and flags. + virtual DataRef<NamedValue> getNamedPropertyValues(int32_t propId) const = 0; + + virtual uint32_t getNbProperties(int32_t classId) const = 0; + virtual uint32_t getProperties(int32_t classId, PropertyDescription* outBuffer, uint32_t bufCount, + uint32_t startIdx = 0) const = 0; + + // Check that a property path, starting at the given class id and first property is value. Return the resolved + // properties. + // outbuffer.size *must* equal the propPath.size(). + Option<PropertyDescription> resolvePropertyPath(int32_t clsId, const int32_t propId) const + { + Option<PropertyDescription> prop(getProperty(propId)); + if(prop.hasValue() == false) + return prop; + if(isDerivedFrom(clsId, prop.getValue().mOwnerClassId) == false) + return None(); + return prop; + } + // Does one cls id differ marshalling to another and if so return the functions to do it. + virtual MarshalQueryResult checkMarshalling(int32_t srcClsId, int32_t dstClsId) const = 0; + + // messages and classes are stored in separate maps, so a property message can have the same name as a class. + virtual Option<PropertyMessageDescription> createPropertyMessage(const NamespacedName& cls, + const NamespacedName& msgName, + DataRef<PropertyMessageArg> entries, + uint32_t messageSize) = 0; + virtual Option<PropertyMessageDescription> findPropertyMessage(const NamespacedName& msgName) const = 0; + virtual Option<PropertyMessageDescription> getPropertyMessage(int32_t msgId) const = 0; + + virtual uint32_t getNbPropertyMessages() const = 0; + virtual uint32_t getPropertyMessages(PropertyMessageDescription* msgBuf, uint32_t bufLen, + uint32_t startIdx = 0) const = 0; + + virtual StringTable& getStringTable() const = 0; + + virtual void write(PvdOutputStream& stream) const = 0; + void save(PvdOutputStream& stream) const + { + write(stream); + } + + virtual PvdObjectModelMetaData& clone() const = 0; + + virtual void addRef() = 0; + virtual void release() = 0; + + static uint32_t getCurrentPvdObjectModelVersion(); + static PvdObjectModelMetaData& create(); + static PvdObjectModelMetaData& create(PvdInputStream& stream); +}; +} +} +#endif // PXPVDSDK_PXPVDOBJECTMODELMETADATA_H diff --git a/PxShared/src/pvd/src/PxPvdObjectRegistrar.cpp b/PxShared/src/pvd/src/PxPvdObjectRegistrar.cpp new file mode 100644 index 0000000..67667f9 --- /dev/null +++ b/PxShared/src/pvd/src/PxPvdObjectRegistrar.cpp @@ -0,0 +1,80 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "PxPvdObjectRegistrar.h" + +namespace physx +{ +namespace pvdsdk +{ + +bool ObjectRegistrar::addItem(const void* inItem) +{ + physx::shdfnd::Mutex::ScopedLock lock(mRefCountMapLock); + + if(mRefCountMap.find(inItem)) + { + uint32_t& counter = mRefCountMap[inItem]; + counter++; + return false; + } + else + { + mRefCountMap.insert(inItem, 1); + return true; + } +} + +bool ObjectRegistrar::decItem(const void* inItem) +{ + physx::shdfnd::Mutex::ScopedLock lock(mRefCountMapLock); + const physx::shdfnd::HashMap<const void*, uint32_t>::Entry* entry = mRefCountMap.find(inItem); + if(entry) + { + uint32_t& retval(const_cast<uint32_t&>(entry->second)); + if(retval) + --retval; + uint32_t theValue = retval; + if(theValue == 0) + { + mRefCountMap.erase(inItem); + return true; + } + } + return false; +} + +void ObjectRegistrar::clear() +{ + physx::shdfnd::Mutex::ScopedLock lock(mRefCountMapLock); + mRefCountMap.clear(); +} + +} // pvdsdk +} // physx diff --git a/PxShared/src/pvd/src/PxPvdObjectRegistrar.h b/PxShared/src/pvd/src/PxPvdObjectRegistrar.h new file mode 100644 index 0000000..dbd9ebc --- /dev/null +++ b/PxShared/src/pvd/src/PxPvdObjectRegistrar.h @@ -0,0 +1,71 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PXPVDSDK_PXPVDOBJECTREGISTRAR_H +#define PXPVDSDK_PXPVDOBJECTREGISTRAR_H + +/** \addtogroup pvd +@{ +*/ + +#include "PsHashMap.h" +#include "PsMutex.h" + +#if !PX_DOXYGEN +namespace physx +{ +namespace pvdsdk +{ +#endif +class ObjectRegistrar +{ + PX_NOCOPY(ObjectRegistrar) + public: + ObjectRegistrar() + { + } + virtual ~ObjectRegistrar() + { + } + + bool addItem(const void* inItem); + bool decItem(const void* inItem); + void clear(); + + private: + physx::shdfnd::HashMap<const void*, uint32_t> mRefCountMap; + physx::shdfnd::Mutex mRefCountMapLock; +}; +#if !PX_DOXYGEN +} // pvdsdk +} // physx +#endif + +/** @} */ +#endif // PXPVDSDK_PXPVDOBJECTREGISTRAR_H diff --git a/PxShared/src/pvd/src/PxPvdProfileZoneClient.cpp b/PxShared/src/pvd/src/PxPvdProfileZoneClient.cpp new file mode 100644 index 0000000..8d8582c --- /dev/null +++ b/PxShared/src/pvd/src/PxPvdProfileZoneClient.cpp @@ -0,0 +1,173 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "pvd/PxPvdTransport.h" + +#include "PxPvdImpl.h" +#include "PxPvdProfileZoneClient.h" +#include "PxProfileZone.h" + +namespace physx +{ +namespace pvdsdk +{ +struct ProfileZoneClient : public profile::PxProfileZoneClient, public shdfnd::UserAllocated +{ + profile::PxProfileZone& mZone; + PvdDataStream& mStream; + + ProfileZoneClient(profile::PxProfileZone& zone, PvdDataStream& stream) : mZone(zone), mStream(stream) + { + } + + ~ProfileZoneClient() + { + mZone.removeClient(*this); + } + + virtual void createInstance() + { + mStream.addProfileZone(&mZone, mZone.getName()); + mStream.createInstance(&mZone); + mZone.addClient(*this); + profile::PxProfileNames names(mZone.getProfileNames()); + PVD_FOREACH(idx, names.eventCount) + { + handleEventAdded(names.events[idx]); + } + } + + virtual void handleEventAdded(const profile::PxProfileEventName& inName) + { + mStream.addProfileZoneEvent(&mZone, inName.name, inName.eventId.eventId, inName.eventId.compileTimeEnabled); + } + + virtual void handleBufferFlush(const uint8_t* inData, uint32_t inLength) + { + mStream.setPropertyValue(&mZone, "events", inData, inLength); + } + + virtual void handleClientRemoved() + { + mStream.destroyInstance(&mZone); + } + + private: + ProfileZoneClient& operator=(const ProfileZoneClient&); +}; +} +} + +using namespace physx; +using namespace pvdsdk; + +PvdProfileZoneClient::PvdProfileZoneClient(PvdImpl& pvd) : mSDKPvd(pvd), mPvdDataStream(NULL), mIsConnected(false) +{ +} + +PvdProfileZoneClient::~PvdProfileZoneClient() +{ + mSDKPvd.removeClient(this); + // all zones should removed + PX_ASSERT(mProfileZoneClients.size() == 0); +} + +PvdDataStream* PvdProfileZoneClient::getDataStream() +{ + return mPvdDataStream; +} + +PvdUserRenderer* PvdProfileZoneClient::getUserRender() +{ + PX_ASSERT(0); + return NULL; +} + +void PvdProfileZoneClient::setObjectRegistrar(ObjectRegistrar*) +{ +} + +bool PvdProfileZoneClient::isConnected() const +{ + return mIsConnected; +} + +void PvdProfileZoneClient::onPvdConnected() +{ + if(mIsConnected) + return; + mIsConnected = true; + + mPvdDataStream = PvdDataStream::create(&mSDKPvd); + +} + +void PvdProfileZoneClient::onPvdDisconnected() +{ + if(!mIsConnected) + return; + + mIsConnected = false; + flush(); + + mPvdDataStream->release(); + mPvdDataStream = NULL; +} + +void PvdProfileZoneClient::flush() +{ + PVD_FOREACH(idx, mProfileZoneClients.size()) + mProfileZoneClients[idx]->mZone.flushProfileEvents(); +} + +void PvdProfileZoneClient::onZoneAdded(profile::PxProfileZone& zone) +{ + PX_ASSERT(mIsConnected); + ProfileZoneClient* client = PVD_NEW(ProfileZoneClient)(zone, *mPvdDataStream); + mMutex.lock(); + client->createInstance(); + mProfileZoneClients.pushBack(client); + mMutex.unlock(); +} + +void PvdProfileZoneClient::onZoneRemoved(profile::PxProfileZone& zone) +{ + for(uint32_t i = 0; i < mProfileZoneClients.size(); i++) + { + if(&zone == &mProfileZoneClients[i]->mZone) + { + mMutex.lock(); + ProfileZoneClient* client = mProfileZoneClients[i]; + mProfileZoneClients.replaceWithLast(i); + PVD_DELETE(client); + mMutex.unlock(); + return; + } + } +} diff --git a/PxShared/src/pvd/src/PxPvdProfileZoneClient.h b/PxShared/src/pvd/src/PxPvdProfileZoneClient.h new file mode 100644 index 0000000..4484997 --- /dev/null +++ b/PxShared/src/pvd/src/PxPvdProfileZoneClient.h @@ -0,0 +1,77 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PXPVDSDK_PXPVDPROFILEZONECLIENT_H +#define PXPVDSDK_PXPVDPROFILEZONECLIENT_H +#include "PxPvdClient.h" +#include "PsHashMap.h" +#include "PsMutex.h" +#include "PxProfileZoneManager.h" + +namespace physx +{ +namespace pvdsdk +{ +class PvdImpl; +class PvdDataStream; + +struct ProfileZoneClient; + +class PvdProfileZoneClient : public PvdClient, public profile::PxProfileZoneHandler, public shdfnd::UserAllocated +{ + PX_NOCOPY(PvdProfileZoneClient) + public: + PvdProfileZoneClient(PvdImpl& pvd); + virtual ~PvdProfileZoneClient(); + + bool isConnected() const; + void onPvdConnected(); + void onPvdDisconnected(); + void flush(); + + PvdDataStream* getDataStream(); + PvdUserRenderer* getUserRender(); + void setObjectRegistrar(ObjectRegistrar*); + + // PxProfileZoneHandler + void onZoneAdded(profile::PxProfileZone& inSDK); + void onZoneRemoved(profile::PxProfileZone& inSDK); + + private: + shdfnd::Mutex mMutex; // zoneAdded can called from different threads + PvdImpl& mSDKPvd; + PvdDataStream* mPvdDataStream; + physx::shdfnd::Array<ProfileZoneClient*> mProfileZoneClients; + bool mIsConnected; +}; + +} // namespace pvdsdk +} // namespace physx + +#endif // PXPVDSDK_PXPVDPROFILEZONECLIENT_H diff --git a/PxShared/src/pvd/src/PxPvdUserRenderImpl.h b/PxShared/src/pvd/src/PxPvdUserRenderImpl.h new file mode 100644 index 0000000..04574e9 --- /dev/null +++ b/PxShared/src/pvd/src/PxPvdUserRenderImpl.h @@ -0,0 +1,411 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +#ifndef PXPVDSDK_PXPVDUSERRENDERIMPL_H +#define PXPVDSDK_PXPVDUSERRENDERIMPL_H + +#include "PxPvdUserRenderer.h" + +namespace physx +{ +namespace pvdsdk +{ + +struct PvdUserRenderTypes +{ + enum Enum + { + Unknown = 0, +#define DECLARE_PVD_IMMEDIATE_RENDER_TYPE(type) type, +#define DECLARE_PVD_IMMEDIATE_RENDER_TYPE_NO_COMMA(type) type +#include "PxPvdUserRenderTypes.h" +#undef DECLARE_PVD_IMMEDIATE_RENDER_TYPE_NO_COMMA +#undef DECLARE_PVD_IMMEDIATE_RENDER_TYPE + }; +}; + +class RenderSerializer +{ + protected: + virtual ~RenderSerializer() + { + } + + public: + virtual void streamify(uint64_t& val) = 0; + virtual void streamify(float& val) = 0; + virtual void streamify(uint32_t& val) = 0; + virtual void streamify(uint8_t& val) = 0; + virtual void streamify(DataRef<uint8_t>& val) = 0; + virtual void streamify(DataRef<PvdDebugPoint>& val) = 0; + virtual void streamify(DataRef<PvdDebugLine>& val) = 0; + virtual void streamify(DataRef<PvdDebugTriangle>& val) = 0; + virtual void streamify(PvdDebugText& val) = 0; + virtual bool isGood() = 0; + virtual uint32_t hasData() = 0; + + void streamify(PvdUserRenderTypes::Enum& val) + { + uint8_t data = static_cast<uint8_t>(val); + streamify(data); + val = static_cast<PvdUserRenderTypes::Enum>(data); + } + void streamify(PxVec3& val) + { + streamify(val[0]); + streamify(val[1]); + streamify(val[2]); + } + + void streamify(PvdColor& val) + { + streamify(val.r); + streamify(val.g); + streamify(val.b); + streamify(val.a); + } + void streamify(PxTransform& val) + { + streamify(val.q.x); + streamify(val.q.y); + streamify(val.q.z); + streamify(val.q.w); + streamify(val.p.x); + streamify(val.p.y); + streamify(val.p.z); + } + void streamify(bool& val) + { + uint8_t tempVal = uint8_t(val ? 1 : 0); + streamify(tempVal); + val = tempVal ? true : false; + } +}; + +template <typename TBulkRenderType> +struct BulkRenderEvent +{ + DataRef<TBulkRenderType> mData; + BulkRenderEvent(const TBulkRenderType* data, uint32_t count) : mData(data, count) + { + } + BulkRenderEvent() + { + } + void serialize(RenderSerializer& serializer) + { + serializer.streamify(mData); + } +}; +struct SetInstanceIdRenderEvent +{ + uint64_t mInstanceId; + SetInstanceIdRenderEvent(uint64_t iid) : mInstanceId(iid) + { + } + SetInstanceIdRenderEvent() + { + } + void serialize(RenderSerializer& serializer) + { + serializer.streamify(mInstanceId); + } +}; +struct PointsRenderEvent : BulkRenderEvent<PvdDebugPoint> +{ + PointsRenderEvent(const PvdDebugPoint* data, uint32_t count) : BulkRenderEvent<PvdDebugPoint>(data, count) + { + } + PointsRenderEvent() + { + } +}; +struct LinesRenderEvent : BulkRenderEvent<PvdDebugLine> +{ + LinesRenderEvent(const PvdDebugLine* data, uint32_t count) : BulkRenderEvent<PvdDebugLine>(data, count) + { + } + LinesRenderEvent() + { + } +}; +struct TrianglesRenderEvent : BulkRenderEvent<PvdDebugTriangle> +{ + TrianglesRenderEvent(const PvdDebugTriangle* data, uint32_t count) : BulkRenderEvent<PvdDebugTriangle>(data, count) + { + } + TrianglesRenderEvent() + { + } +}; +struct DebugRenderEvent +{ + DataRef<PvdDebugPoint> mPointData; + DataRef<PvdDebugLine> mLineData; + DataRef<PvdDebugTriangle> mTriangleData; + DebugRenderEvent(const PvdDebugPoint* pointData, uint32_t pointCount, const PvdDebugLine* lineData, + uint32_t lineCount, const PvdDebugTriangle* triangleData, uint32_t triangleCount) + : mPointData(pointData, pointCount), mLineData(lineData, lineCount), mTriangleData(triangleData, triangleCount) + { + } + + DebugRenderEvent() + { + } + void serialize(RenderSerializer& serializer) + { + serializer.streamify(mPointData); + serializer.streamify(mLineData); + serializer.streamify(mTriangleData); + } +}; + +struct TextRenderEvent +{ + PvdDebugText mText; + TextRenderEvent(const PvdDebugText& text) + { + mText.color = text.color; + mText.position = text.position; + mText.size = text.size; + mText.string = text.string; + } + TextRenderEvent() + { + } + void serialize(RenderSerializer& serializer) + { + serializer.streamify(mText); + } +}; + +struct JointFramesRenderEvent +{ + PxTransform parent; + PxTransform child; + JointFramesRenderEvent(const PxTransform& p, const PxTransform& c) : parent(p), child(c) + { + } + JointFramesRenderEvent() + { + } + void serialize(RenderSerializer& serializer) + { + serializer.streamify(parent); + serializer.streamify(child); + } +}; +struct LinearLimitRenderEvent +{ + PxTransform t0; + PxTransform t1; + float value; + bool active; + LinearLimitRenderEvent(const PxTransform& _t0, const PxTransform& _t1, float _value, bool _active) + : t0(_t0), t1(_t1), value(_value), active(_active) + { + } + LinearLimitRenderEvent() + { + } + void serialize(RenderSerializer& serializer) + { + serializer.streamify(t0); + serializer.streamify(t1); + serializer.streamify(value); + serializer.streamify(active); + } +}; +struct AngularLimitRenderEvent +{ + PxTransform t0; + float lower; + float upper; + bool active; + AngularLimitRenderEvent(const PxTransform& _t0, float _lower, float _upper, bool _active) + : t0(_t0), lower(_lower), upper(_upper), active(_active) + { + } + AngularLimitRenderEvent() + { + } + void serialize(RenderSerializer& serializer) + { + serializer.streamify(t0); + serializer.streamify(lower); + serializer.streamify(upper); + serializer.streamify(active); + } +}; +struct LimitConeRenderEvent +{ + PxTransform t; + float ySwing; + float zSwing; + bool active; + LimitConeRenderEvent(const PxTransform& _t, float _ySwing, float _zSwing, bool _active) + : t(_t), ySwing(_ySwing), zSwing(_zSwing), active(_active) + { + } + LimitConeRenderEvent() + { + } + void serialize(RenderSerializer& serializer) + { + serializer.streamify(t); + serializer.streamify(ySwing); + serializer.streamify(zSwing); + serializer.streamify(active); + } +}; +struct DoubleConeRenderEvent +{ + PxTransform t; + float angle; + bool active; + DoubleConeRenderEvent(const PxTransform& _t, float _angle, bool _active) : t(_t), angle(_angle), active(_active) + { + } + DoubleConeRenderEvent() + { + } + void serialize(RenderSerializer& serializer) + { + serializer.streamify(t); + serializer.streamify(angle); + serializer.streamify(active); + } +}; + +template <typename TDataType> +struct RenderSerializerMap +{ + void serialize(RenderSerializer& s, TDataType& d) + { + d.serialize(s); + } +}; +template <> +struct RenderSerializerMap<uint8_t> +{ + void serialize(RenderSerializer& s, uint8_t& d) + { + s.streamify(d); + } +}; + +template <> +struct RenderSerializerMap<PvdDebugPoint> +{ + void serialize(RenderSerializer& s, PvdDebugPoint& d) + { + s.streamify(d.pos); + s.streamify(d.color); + } +}; + +template <> +struct RenderSerializerMap<PvdDebugLine> +{ + void serialize(RenderSerializer& s, PvdDebugLine& d) + { + s.streamify(d.pos0); + s.streamify(d.color0); + s.streamify(d.pos1); + s.streamify(d.color1); + } +}; + +template <> +struct RenderSerializerMap<PvdDebugTriangle> +{ + void serialize(RenderSerializer& s, PvdDebugTriangle& d) + { + s.streamify(d.pos0); + s.streamify(d.color0); + s.streamify(d.pos1); + s.streamify(d.color1); + s.streamify(d.pos2); + s.streamify(d.color2); + } +}; + +template <typename TDataType> +struct PvdTypeToRenderType +{ + bool compile_error; +}; + +#define DECLARE_PVD_IMMEDIATE_RENDER_TYPE(type) \ + template <> \ + struct PvdTypeToRenderType<type##RenderEvent> \ + { \ + enum Enum \ + { \ + EnumVal = PvdUserRenderTypes::type \ + }; \ + }; + +#include "PxPvdUserRenderTypes.h" +#undef DECLARE_PVD_IMMEDIATE_RENDER_TYPE + +template <typename TDataType> +PvdUserRenderTypes::Enum getPvdRenderTypeFromType() +{ + return static_cast<PvdUserRenderTypes::Enum>(PvdTypeToRenderType<TDataType>::EnumVal); +} + +class PvdUserRenderHandler +{ + protected: + virtual ~PvdUserRenderHandler() + { + } + + public: +#define DECLARE_PVD_IMMEDIATE_RENDER_TYPE(type) virtual void handleRenderEvent(const type##RenderEvent& evt) = 0; + +#include "PxPvdUserRenderTypes.h" +#undef DECLARE_PVD_IMMEDIATE_RENDER_TYPE +}; + +class PvdUserRenderParser +{ + protected: + virtual ~PvdUserRenderParser() + { + } + + public: + virtual void release() = 0; + virtual void parseData(DataRef<const uint8_t> data, PvdUserRenderHandler& handler) = 0; + + static PvdUserRenderParser& create(bool swapBytes); +}; +} +} + +#endif // PXPVDSDK_PXPVDUSERRENDERIMPL_H diff --git a/PxShared/src/pvd/src/PxPvdUserRenderTypes.h b/PxShared/src/pvd/src/PxPvdUserRenderTypes.h new file mode 100644 index 0000000..6a47abb --- /dev/null +++ b/PxShared/src/pvd/src/PxPvdUserRenderTypes.h @@ -0,0 +1,46 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +#define THERE_IS_NO_INCLUDE_GUARD_HERE_FOR_A_REASON + +#ifndef DECLARE_PVD_IMMEDIATE_RENDER_TYPE_NO_COMMA +#define DECLARE_PVD_IMMEDIATE_RENDER_TYPE_NO_COMMA DECLARE_PVD_IMMEDIATE_RENDER_TYPE +#endif + +DECLARE_PVD_IMMEDIATE_RENDER_TYPE(SetInstanceId) +DECLARE_PVD_IMMEDIATE_RENDER_TYPE(Points) +DECLARE_PVD_IMMEDIATE_RENDER_TYPE(Lines) +DECLARE_PVD_IMMEDIATE_RENDER_TYPE(Triangles) +DECLARE_PVD_IMMEDIATE_RENDER_TYPE(JointFrames) +DECLARE_PVD_IMMEDIATE_RENDER_TYPE(LinearLimit) +DECLARE_PVD_IMMEDIATE_RENDER_TYPE(AngularLimit) +DECLARE_PVD_IMMEDIATE_RENDER_TYPE(LimitCone) +DECLARE_PVD_IMMEDIATE_RENDER_TYPE(DoubleCone) +DECLARE_PVD_IMMEDIATE_RENDER_TYPE(Text) +DECLARE_PVD_IMMEDIATE_RENDER_TYPE_NO_COMMA(Debug) + +#undef DECLARE_PVD_IMMEDIATE_RENDER_TYPE_NO_COMMA +#undef THERE_IS_NO_INCLUDE_GUARD_HERE_FOR_A_REASON diff --git a/PxShared/src/pvd/src/PxPvdUserRenderer.cpp b/PxShared/src/pvd/src/PxPvdUserRenderer.cpp new file mode 100644 index 0000000..784d115 --- /dev/null +++ b/PxShared/src/pvd/src/PxPvdUserRenderer.cpp @@ -0,0 +1,460 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +#include "PxPvdUserRenderer.h" +#include "PxPvdUserRenderImpl.h" +#include "PxPvdFoundation.h" +#include "PxPvdInternalByteStreams.h" +#include "PxPvdBits.h" +#include "PxPvdCommStreamTypes.h" +#include <stdarg.h> + +using namespace physx; +using namespace physx::pvdsdk; + +namespace +{ + +template <typename TStreamType> +struct RenderWriter : public RenderSerializer +{ + TStreamType& mStream; + RenderWriter(TStreamType& stream) : mStream(stream) + { + } + template <typename TDataType> + void write(const TDataType* val, uint32_t count) + { + uint32_t numBytes = count * sizeof(TDataType); + mStream.write(reinterpret_cast<const uint8_t*>(val), numBytes); + } + template <typename TDataType> + void write(const TDataType& val) + { + write(&val, 1); + } + + template <typename TDataType> + void writeRef(DataRef<TDataType>& val) + { + uint32_t amount = val.size(); + write(amount); + if(amount) + write(val.begin(), amount); + } + + virtual void streamify(uint64_t& val) + { + write(val); + } + virtual void streamify(uint32_t& val) + { + write(val); + } + virtual void streamify(float& val) + { + write(val); + } + virtual void streamify(uint8_t& val) + { + write(val); + } + virtual void streamify(DataRef<uint8_t>& val) + { + writeRef(val); + } + + virtual void streamify(PvdDebugText& val) + { + write(val.color); + write(val.position); + write(val.size); + + uint32_t amount = static_cast<uint32_t>(strlen(val.string)) + 1; + write(amount); + if(amount) + write(val.string, amount); + } + + virtual void streamify(DataRef<PvdDebugPoint>& val) + { + writeRef(val); + } + virtual void streamify(DataRef<PvdDebugLine>& val) + { + writeRef(val); + } + virtual void streamify(DataRef<PvdDebugTriangle>& val) + { + writeRef(val); + } + + virtual uint32_t hasData() + { + return false; + } + virtual bool isGood() + { + return true; + } + + private: + RenderWriter& operator=(const RenderWriter&); +}; + +struct UserRenderer : public PvdUserRenderer +{ + ForwardingMemoryBuffer mBuffer; + uint32_t mBufferCapacity; + RendererEventClient* mClient; + + UserRenderer(uint32_t bufferFullAmount) + : mBuffer("UserRenderBuffer"), mBufferCapacity(bufferFullAmount), mClient(NULL) + { + } + virtual ~UserRenderer() + { + } + virtual void release() + { + PVD_DELETE(this); + } + + template <typename TEventType> + void handleEvent(TEventType evt) + { + RenderWriter<ForwardingMemoryBuffer> _writer(mBuffer); + RenderSerializer& writer(_writer); + + PvdUserRenderTypes::Enum evtType(getPvdRenderTypeFromType<TEventType>()); + writer.streamify(evtType); + evt.serialize(writer); + if(mBuffer.size() >= mBufferCapacity) + flushRenderEvents(); + } + virtual void setInstanceId(const void* iid) + { + handleEvent(SetInstanceIdRenderEvent(PVD_POINTER_TO_U64(iid))); + } + // Draw these points associated with this instance + virtual void drawPoints(const PvdDebugPoint* points, uint32_t count) + { + handleEvent(PointsRenderEvent(points, count)); + } + // Draw these lines associated with this instance + virtual void drawLines(const PvdDebugLine* lines, uint32_t count) + { + handleEvent(LinesRenderEvent(lines, count)); + } + // Draw these triangles associated with this instance + virtual void drawTriangles(const PvdDebugTriangle* triangles, uint32_t count) + { + handleEvent(TrianglesRenderEvent(triangles, count)); + } + + virtual void drawText(const PvdDebugText& text) + { + handleEvent(TextRenderEvent(text)); + } + + virtual void drawRenderbuffer(const PvdDebugPoint* pointData, uint32_t pointCount, const PvdDebugLine* lineData, + uint32_t lineCount, const PvdDebugTriangle* triangleData, uint32_t triangleCount) + { + handleEvent(DebugRenderEvent(pointData, pointCount, lineData, lineCount, triangleData, triangleCount)); + } + + // Constraint visualization routines + virtual void visualizeJointFrames(const PxTransform& parent, const PxTransform& child) + { + handleEvent(JointFramesRenderEvent(parent, child)); + } + virtual void visualizeLinearLimit(const PxTransform& t0, const PxTransform& t1, float value, bool active) + { + handleEvent(LinearLimitRenderEvent(t0, t1, value, active)); + } + virtual void visualizeAngularLimit(const PxTransform& t0, float lower, float upper, bool active) + { + handleEvent(AngularLimitRenderEvent(t0, lower, upper, active)); + } + virtual void visualizeLimitCone(const PxTransform& t, float ySwing, float zSwing, bool active) + { + handleEvent(LimitConeRenderEvent(t, ySwing, zSwing, active)); + } + virtual void visualizeDoubleCone(const PxTransform& t, float angle, bool active) + { + handleEvent(DoubleConeRenderEvent(t, angle, active)); + } + // Clear the immedate buffer. + virtual void flushRenderEvents() + { + if(mClient) + mClient->handleBufferFlush(mBuffer.begin(), mBuffer.size()); + mBuffer.clear(); + } + + virtual void setClient(RendererEventClient* client) + { + mClient = client; + } + + private: + UserRenderer& operator=(const UserRenderer&); +}; + +template <bool swapBytes> +struct RenderReader : public RenderSerializer +{ + MemPvdInputStream mStream; + ForwardingMemoryBuffer& mBuffer; + + RenderReader(ForwardingMemoryBuffer& buf) : mBuffer(buf) + { + } + void setData(DataRef<const uint8_t> data) + { + mStream.setup(const_cast<uint8_t*>(data.begin()), const_cast<uint8_t*>(data.end())); + } + virtual void streamify(uint32_t& val) + { + mStream >> val; + } + virtual void streamify(uint64_t& val) + { + mStream >> val; + } + virtual void streamify(float& val) + { + mStream >> val; + } + virtual void streamify(uint8_t& val) + { + mStream >> val; + } + template <typename TDataType> + void readRef(DataRef<TDataType>& val) + { + uint32_t count; + mStream >> count; + uint32_t numBytes = sizeof(TDataType) * count; + + TDataType* dataPtr = reinterpret_cast<TDataType*>(mBuffer.growBuf(numBytes)); + mStream.read(reinterpret_cast<uint8_t*>(dataPtr), numBytes); + val = DataRef<TDataType>(dataPtr, count); + } + + virtual void streamify(DataRef<PvdDebugPoint>& val) + { + readRef(val); + } + virtual void streamify(DataRef<PvdDebugLine>& val) + { + readRef(val); + } + virtual void streamify(DataRef<PvdDebugTriangle>& val) + { + readRef(val); + } + virtual void streamify(PvdDebugText& val) + { + mStream >> val.color; + mStream >> val.position; + mStream >> val.size; + + uint32_t len = 0; + mStream >> len; + + uint8_t* dataPtr = mBuffer.growBuf(len); + mStream.read(dataPtr, len); + val.string = reinterpret_cast<const char*>(dataPtr); + } + virtual void streamify(DataRef<uint8_t>& val) + { + readRef(val); + } + virtual bool isGood() + { + return mStream.isGood(); + } + virtual uint32_t hasData() + { + return uint32_t(mStream.size() > 0); + } + + private: + RenderReader& operator=(const RenderReader&); +}; + +template <> +struct RenderReader<true> : public RenderSerializer +{ + MemPvdInputStream mStream; + ForwardingMemoryBuffer& mBuffer; + RenderReader(ForwardingMemoryBuffer& buf) : mBuffer(buf) + { + } + void setData(DataRef<const uint8_t> data) + { + mStream.setup(const_cast<uint8_t*>(data.begin()), const_cast<uint8_t*>(data.end())); + } + + template <typename TDataType> + void read(TDataType& val) + { + mStream >> val; + swapBytes(val); + } + virtual void streamify(uint64_t& val) + { + read(val); + } + virtual void streamify(uint32_t& val) + { + read(val); + } + virtual void streamify(float& val) + { + read(val); + } + virtual void streamify(uint8_t& val) + { + read(val); + } + template <typename TDataType> + void readRef(DataRef<TDataType>& val) + { + uint32_t count; + mStream >> count; + swapBytes(count); + uint32_t numBytes = sizeof(TDataType) * count; + + TDataType* dataPtr = reinterpret_cast<TDataType*>(mBuffer.growBuf(numBytes)); + PVD_FOREACH(idx, count) + RenderSerializerMap<TDataType>().serialize(*this, dataPtr[idx]); + val = DataRef<TDataType>(dataPtr, count); + } + + virtual void streamify(DataRef<PvdDebugPoint>& val) + { + readRef(val); + } + virtual void streamify(DataRef<PvdDebugLine>& val) + { + readRef(val); + } + virtual void streamify(DataRef<PvdDebugTriangle>& val) + { + readRef(val); + } + virtual void streamify(PvdDebugText& val) + { + mStream >> val.color; + mStream >> val.position; + mStream >> val.size; + + uint32_t len = 0; + mStream >> len; + + uint8_t* dataPtr = mBuffer.growBuf(len); + mStream.read(dataPtr, len); + val.string = reinterpret_cast<const char*>(dataPtr); + } + virtual void streamify(DataRef<uint8_t>& val) + { + readRef(val); + } + virtual bool isGood() + { + return mStream.isGood(); + } + virtual uint32_t hasData() + { + return uint32_t(mStream.size() > 0); + } + + private: + RenderReader& operator=(const RenderReader&); +}; + +template <bool swapBytes> +struct Parser : public PvdUserRenderParser +{ + ForwardingMemoryBuffer mBuffer; + RenderReader<swapBytes> mReader; + Parser() : mBuffer("PvdUserRenderParser::mBuffer"), mReader(mBuffer) + { + } + + void release() + { + PVD_DELETE(this); + } + void parseData(DataRef<const uint8_t> data, PvdUserRenderHandler& handler) + { + mReader.setData(data); + RenderSerializer& serializer(mReader); + while(serializer.isGood() && serializer.hasData()) + { + mReader.mBuffer.clear(); + PvdUserRenderTypes::Enum evtType = PvdUserRenderTypes::Unknown; + serializer.streamify(evtType); + switch(evtType) + { +#define DECLARE_PVD_IMMEDIATE_RENDER_TYPE(type) \ + case PvdUserRenderTypes::type: \ + { \ + type##RenderEvent evt; \ + evt.serialize(serializer); \ + handler.handleRenderEvent(evt); \ + } \ + break; +#include "PxPvdUserRenderTypes.h" +#undef DECLARE_PVD_IMMEDIATE_RENDER_TYPE + case PvdUserRenderTypes::Unknown: + PX_ASSERT(false); + return; + } + } + PX_ASSERT(serializer.isGood()); + return; + } + + PX_NOCOPY(Parser<swapBytes>) +}; +} + +PvdUserRenderParser& PvdUserRenderParser::create(bool swapBytes) +{ + if(swapBytes) + return *PVD_NEW(Parser<true>); + else + return *PVD_NEW(Parser<false>); +} + +PvdUserRenderer* PvdUserRenderer::create(uint32_t bufferSize) +{ + return PVD_NEW(UserRenderer)(bufferSize); +} + diff --git a/PxShared/src/pvd/src/windows/PxWindowsPvdDelayLoadHook.cpp b/PxShared/src/pvd/src/windows/PxWindowsPvdDelayLoadHook.cpp new file mode 100644 index 0000000..a8c6df0 --- /dev/null +++ b/PxShared/src/pvd/src/windows/PxWindowsPvdDelayLoadHook.cpp @@ -0,0 +1,82 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#include "pvd/windows/PxWindowsPvdDelayLoadHook.h" +#include "windows/PsWindowsInclude.h" +#include "windows/PsWindowsLoadLibrary.h" + +// Prior to Visual Studio 2015 Update 3, these hooks were non-const. +#define DELAYIMP_INSECURE_WRITABLE_HOOKS +#include <delayimp.h> + +static const physx::PxFoundationDelayLoadHook* gDelayLoadHook = NULL; + +void physx::PxPvdSetFoundationDelayLoadHook(const physx::PxFoundationDelayLoadHook* hook) +{ + gDelayLoadHook = hook; +} + +using namespace physx; + +#pragma comment(lib, "delayimp") + +FARPROC WINAPI delayHook(unsigned dliNotify, PDelayLoadInfo pdli) +{ + switch (dliNotify) { + case dliStartProcessing : + break; + + case dliNotePreLoadLibrary : + { + return physx::shdfnd::foundationDliNotePreLoadLibrary(pdli->szDll, gDelayLoadHook); + } + break; + + case dliNotePreGetProcAddress : + break; + + case dliFailLoadLib : + break; + + case dliFailGetProc : + break; + + case dliNoteEndProcessing : + break; + + default : + + return NULL; + } + + return NULL; +} + +PfnDliHook __pfnDliNotifyHook2 = delayHook; diff --git a/PxShared/src/task/src/TaskManager.cpp b/PxShared/src/task/src/TaskManager.cpp new file mode 100644 index 0000000..ffcbfcd --- /dev/null +++ b/PxShared/src/task/src/TaskManager.cpp @@ -0,0 +1,733 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. + +#include "task/PxTask.h" +#include "task/PxTaskDefine.h" +#include "foundation/PxErrors.h" + +#include "PsThread.h" +#include "PsAtomic.h" +#include "PsMutex.h" +#include "PsHashMap.h" +#include "PsArray.h" +#include "PsAllocator.h" + + +#if PX_SUPPORT_PXTASK_PROFILING +#include "foundation/PxProfiler.h" +#endif + +#define DOT_LOG 0 + +// for information on generating tasks graphs see this wiki page +// https://wiki.nvidia.com/engwiki/index.php/PhysX/sdk/InternalDoc_Example_TaskGraph +#if DOT_LOG +#include "stdio.h" +#define LOG_FRAME_NUM 60 +static int framenum; +static FILE *logfile; + +static const char* logFilename = "pxtask-graph.txt"; +__declspec(thread) static physx::PxBaseTask* currentTask; + +template<class A, class B> PX_FORCE_INLINE A PxTaskUnionCast(B b) +{ + union AB + { + AB(B bb) + : _b(bb) + { + } + B _b; + A _a; + } u(b); + return u._a; +} +#endif + +#define LOCK() shdfnd::Mutex::ScopedLock __lock__(mMutex) + +namespace physx +{ + const int EOL = -1; + typedef shdfnd::HashMap<const char *, PxTaskID> PxTaskNameToIDMap; + + struct PxTaskDepTableRow + { + PxTaskID mTaskID; + int mNextDep; + }; + typedef shdfnd::Array<PxTaskDepTableRow> PxTaskDepTable; + + class PxTaskTableRow + { + public: + PxTaskTableRow() : mRefCount( 1 ), mStartDep(EOL), mLastDep(EOL) {} + void addDependency( PxTaskDepTable& depTable, PxTaskID taskID ) + { + int newDep = int(depTable.size()); + PxTaskDepTableRow row; + row.mTaskID = taskID; + row.mNextDep = EOL; + depTable.pushBack( row ); + + if( mLastDep == EOL ) + { + mStartDep = mLastDep = newDep; + } + else + { + depTable[ uint32_t(mLastDep) ].mNextDep = newDep; + mLastDep = newDep; + } + } + + PxTask * mTask; + volatile int mRefCount; + PxTaskType::Enum mType; + int mStartDep; + int mLastDep; + }; + typedef shdfnd::Array<PxTaskTableRow> PxTaskTable; + + +/* Implementation of PxTaskManager abstract API */ +class PxTaskMgr : public PxTaskManager, public shdfnd::UserAllocated +{ + PX_NOCOPY(PxTaskMgr) +public: + PxTaskMgr(PxErrorCallback& , PxCpuDispatcher*, PxGpuDispatcher*); + ~PxTaskMgr(); + + void setCpuDispatcher( PxCpuDispatcher& ref ) + { + mCpuDispatcher = &ref; + } + + void setGpuDispatcher( PxGpuDispatcher& ref ) + { + mGpuDispatcher = &ref; + } + + PxCpuDispatcher* getCpuDispatcher() const + { + return mCpuDispatcher; + } + + PxGpuDispatcher* getGpuDispatcher() const + { + return mGpuDispatcher; + } + + void resetDependencies(); + void startSimulation(); + void stopSimulation(); + void taskCompleted( PxTask& task ); + + PxTaskID getNamedTask( const char *name ); + PxTaskID submitNamedTask( PxTask *task, const char *name, PxTaskType::Enum type = PxTaskType::TT_CPU ); + PxTaskID submitUnnamedTask( PxTask& task, PxTaskType::Enum type = PxTaskType::TT_CPU ); + PxTask* getTaskFromID( PxTaskID ); + + bool dispatchTask( PxTaskID taskID, bool gpuGroupStart ); + bool resolveRow( PxTaskID taskID, bool gpuGroupStart ); + + void release(); + + void finishBefore( PxTask& task, PxTaskID taskID ); + void startAfter( PxTask& task, PxTaskID taskID ); + + void addReference( PxTaskID taskID ); + void decrReference( PxTaskID taskID ); + int32_t getReference( PxTaskID taskID ) const; + + void decrReference( PxLightCpuTask& lighttask ); + void addReference( PxLightCpuTask& lighttask ); + + void emitStartEvent( PxBaseTask& basetask, uint32_t threadId=0); + void emitStopEvent( PxBaseTask& basetask, uint32_t threadId=0); + + PxErrorCallback& mErrorCallback; + PxCpuDispatcher *mCpuDispatcher; + PxGpuDispatcher *mGpuDispatcher; + PxTaskNameToIDMap mName2IDmap; + volatile int mPendingTasks; + shdfnd::Mutex mMutex; + + PxTaskDepTable mDepTable; + PxTaskTable mTaskTable; + + shdfnd::Array<PxTaskID> mStartDispatch; + + +#if DOT_LOG + static void debugGraphEnd(); + static void debugGraphEdge(PxBaseTask* prev, uint32_t prevIndex, uint32_t prevType, PxBaseTask* next, uint32_t nextIndex, uint32_t nextType, uint32_t weight); + static void debugGraphBegin(const char* filename); +#endif + }; + +PxTaskManager* PxTaskManager::createTaskManager(PxErrorCallback& errorCallback, PxCpuDispatcher* cpuDispatcher, PxGpuDispatcher* gpuDispatcher) +{ + return PX_NEW(PxTaskMgr)(errorCallback, cpuDispatcher, gpuDispatcher); +} + +PxTaskMgr::PxTaskMgr(PxErrorCallback& errorCallback, PxCpuDispatcher* cpuDispatcher, PxGpuDispatcher* gpuDispatcher) + : mErrorCallback (errorCallback) + , mCpuDispatcher( cpuDispatcher ) + , mGpuDispatcher( gpuDispatcher ) + , mPendingTasks( 0 ) + , mDepTable(PX_DEBUG_EXP("PxTaskDepTable")) + , mTaskTable(PX_DEBUG_EXP("PxTaskTable")) + , mStartDispatch(PX_DEBUG_EXP("StartDispatch")) +{ +} + + +#if DOT_LOG +void PxTaskMgr::debugGraphBegin(const char* filename) +{ + logfile = fopen(filename, "w"); + + if (logfile) + { + fprintf(logfile, "digraph tasks {\n"); + fprintf(logfile, "dpi=300;\n"); + fprintf(logfile, "node [width=.3, height=0.8 style=\"rounded, filled\"];"); + } +} +void PxTaskMgr::debugGraphEnd() +{ + if (logfile) + { + fprintf(logfile, "}\n"); + fclose(logfile); + logfile = NULL; + } +} + +void PxTaskMgr::debugGraphEdge(PxBaseTask* prev, uint32_t prevIndex, uint32_t prevType, PxBaseTask* next, uint32_t nextIndex, uint32_t nextType, uint32_t weight) +{ + PX_ASSERT(next); + + enum Type + { + eCpuNode, + eSpuNode, + eJoinNode + }; + + if (logfile) + { + // lock + PxTaskMgr& mgr = static_cast<PxTaskMgr&>(*next->getTaskManager()); + shdfnd::Mutex::ScopedLock lock(mgr.mMutex); + + // check both task and their task manager is valid + if (prev && prev->mTm) + fprintf(logfile, "{node [shape=%s,label=\"%s\"] t%d%d};\n", (prevType==eSpuNode)?"box,fillcolor=lightblue":"ellipse,fillcolor=lightgrey", prev->getName(), PxTaskUnionCast<uint32_t>(prev), prevIndex); + + if (next && next->mTm) + fprintf(logfile, "{node [shape=%s,label=\"%s\"] t%d%d};\n", (nextType==eSpuNode)?"box,fillcolor=lightblue":"ellipse,fillcolor=lightgrey", next->getName(), PxTaskUnionCast<uint32_t>(next), nextIndex); + + if (weight > 0 && prev && next) + fprintf(logfile, "t%d%d->t%d%d [weight=%d];\n", PxTaskUnionCast<uint32_t>(prev), prevIndex, PxTaskUnionCast<uint32_t>(next), nextIndex, weight); + } +} +#endif + + +PxTaskMgr::~PxTaskMgr() +{ +} + +void PxTaskMgr::release() +{ + PX_DELETE(this); +} + +void PxTaskMgr::decrReference(PxLightCpuTask& lighttask) +{ +#if DOT_LOG + uint32_t weight = 1; +#endif + + /* This does not need a lock! */ + if (!shdfnd::atomicDecrement(&lighttask.mRefCount)) + { +#if DOT_LOG + ++weight; +#endif + PX_ASSERT(mCpuDispatcher); + if (mCpuDispatcher) + { + mCpuDispatcher->submitTask(lighttask); + } + else + { + lighttask.release(); + } + } + +#if DOT_LOG + debugGraphEdge(currentTask, 0, 0, &lighttask, 0, 0, weight); +#endif +} + +void PxTaskMgr::addReference(PxLightCpuTask& lighttask) +{ + /* This does not need a lock! */ + shdfnd::atomicIncrement(&lighttask.mRefCount); +} + +void PxTaskMgr::emitStartEvent(PxBaseTask& basetask, uint32_t threadId) +{ +#if DOT_LOG + currentTask = &basetask; +#endif + + PxBaseTask* tmp = &basetask; + PX_UNUSED(tmp); + PX_UNUSED(threadId); + + /* This does not need a lock! */ +#if PX_SUPPORT_PXTASK_PROFILING + //PX_COMPILE_TIME_ASSERT(sizeof(PxProfileEventId::mEventId) == sizeof(PxBaseTask::mEventID)); + PX_PROFILE_START_CROSSTHREAD(basetask.getName(),0); +#endif +} + +void PxTaskMgr::emitStopEvent(PxBaseTask& basetask, uint32_t threadId) +{ + PxBaseTask* tmp = &basetask; + PX_UNUSED(tmp); + PX_UNUSED(threadId); + + /* This does not need a lock! */ +#if PX_SUPPORT_PXTASK_PROFILING + //PX_COMPILE_TIME_ASSERT(sizeof(PxProfileEventId::mEventId) == sizeof(PxBaseTask::mEventID)); + PX_PROFILE_STOP_CROSSTHREAD(basetask.getName(),0); +#endif +} + +/* + * Called by the owner (Scene) at the start of every frame, before + * asking for tasks to be submitted. + */ +void PxTaskMgr::resetDependencies() +{ +#if DOT_LOG + if( logfile ) + { + debugGraphEnd(); + } + if( framenum++ == LOG_FRAME_NUM ) + { + debugGraphBegin(logFilename); + } +#endif + + PX_ASSERT( !mPendingTasks ); // only valid if you don't resubmit named tasks, this is true for the SDK + PX_ASSERT( mCpuDispatcher ); + mTaskTable.clear(); + mDepTable.clear(); + mName2IDmap.clear(); + mPendingTasks = 0; +} + +/* + * Called by the owner (Scene) to start simulating the task graph. + * Dispatch all tasks with refCount == 1 + */ +void PxTaskMgr::startSimulation() +{ + PX_ASSERT( mCpuDispatcher ); + + if( mGpuDispatcher ) + { + mGpuDispatcher->startSimulation(); + } + + /* Handle empty task graph */ + if( mPendingTasks == 0 ) + { + + return; + } + + bool gpuDispatch = false; + for( PxTaskID i = 0 ; i < mTaskTable.size() ; i++ ) + { + if( mTaskTable[ i ].mType == PxTaskType::TT_COMPLETED ) + { + continue; + } + if( !shdfnd::atomicDecrement( &mTaskTable[ i ].mRefCount ) ) + { + mStartDispatch.pushBack(i); + } + } + for( uint32_t i=0; i<mStartDispatch.size(); ++i) + { + gpuDispatch |= dispatchTask( mStartDispatch[i], gpuDispatch ); + } + //mStartDispatch.resize(0); + mStartDispatch.forceSize_Unsafe(0); + + if( mGpuDispatcher && gpuDispatch ) + { + mGpuDispatcher->finishGroup(); + } +} + +void PxTaskMgr::stopSimulation() +{ + if( mGpuDispatcher ) + { + mGpuDispatcher->stopSimulation(); + } +} + +PxTaskID PxTaskMgr::getNamedTask( const char *name ) +{ + const PxTaskNameToIDMap::Entry *ret; + { + LOCK(); + ret = mName2IDmap.find( name ); + } + if( ret ) + { + return ret->second; + } + else + { + // create named entry in task table, without a task + return submitNamedTask( NULL, name, PxTaskType::TT_NOT_PRESENT ); +} +} + +PxTask* PxTaskMgr::getTaskFromID( PxTaskID id ) +{ + LOCK(); // todo: reader lock necessary? + return mTaskTable[ id ].mTask; +} + + +/* If called at runtime, must be thread-safe */ +PxTaskID PxTaskMgr::submitNamedTask( PxTask *task, const char *name, PxTaskType::Enum type ) +{ + if( task ) + { + task->mTm = this; + task->submitted(); + } + + LOCK(); + + const PxTaskNameToIDMap::Entry *ret = mName2IDmap.find( name ); + if( ret ) + { + PxTaskID prereg = ret->second; + if( task ) + { + /* name was registered for us by a dependent task */ + PX_ASSERT( !mTaskTable[ prereg ].mTask ); + PX_ASSERT( mTaskTable[ prereg ].mType == PxTaskType::TT_NOT_PRESENT ); + mTaskTable[ prereg ].mTask = task; + mTaskTable[ prereg ].mType = type; + task->mTaskID = prereg; + } + return prereg; + } + else + { + shdfnd::atomicIncrement(&mPendingTasks); + PxTaskID id = static_cast<PxTaskID>(mTaskTable.size()); + mName2IDmap[ name ] = id; + if( task ) + { + task->mTaskID = id; + } + PxTaskTableRow r; + r.mTask = task; + r.mType = type; +#if DOT_LOG + if( logfile ) + { + if( type == PxTaskType::TT_GPU ) + { + fprintf(logfile, "{node [shape=box,label=\"%s\"] t%d0};\n", task->getName(), PxTaskUnionCast<uint32_t>(task)); + } + else if (type == PxTaskType::TT_NOT_PRESENT) + { + fprintf(logfile, "{node [shape=invhouse,label=\"%s\"] t%d0};\n", name, PxTaskUnionCast<uint32_t>(task)); + } + else + { + fprintf(logfile, "{node [label=\"%s\"] t%d0};\n", task->getName(), PxTaskUnionCast<uint32_t>(task)); + } + } +#endif + mTaskTable.pushBack(r); + return id; + } +} + +/* + * Add an unnamed task to the task table + */ +PxTaskID PxTaskMgr::submitUnnamedTask( PxTask& task, PxTaskType::Enum type ) +{ + shdfnd::atomicIncrement(&mPendingTasks); + + task.mTm = this; + task.submitted(); + + LOCK(); + task.mTaskID = static_cast<PxTaskID>(mTaskTable.size()); + PxTaskTableRow r; + r.mTask = &task; + r.mType = type; +#if DOT_LOG + if( logfile ) + { + if( type == PxTaskType::TT_GPU ) + { + fprintf(logfile, "{node [shape=box,label=\"%s\"] t%d0};\n", task.getName(), PxTaskUnionCast<uint32_t>(&task)); + } + else + { + fprintf(logfile, "{node [label=\"%s\"] t%d0};\n", task.getName(), PxTaskUnionCast<uint32_t>(&task)); + } + } +#endif + mTaskTable.pushBack(r); + return task.mTaskID; +} + + +/* Called by worker threads (or cooperating application threads) when a + * PxTask has completed. Propogate depdenencies, decrementing all + * referenced tasks' refCounts. If any of those reach zero, activate + * those tasks. + */ +void PxTaskMgr::taskCompleted( PxTask& task ) +{ + LOCK(); + if( resolveRow( task.mTaskID, false ) ) + { + mGpuDispatcher->finishGroup(); + } +} + +/* ================== Private Functions ======================= */ + +/* + * Add a dependency to force 'task' to complete before the + * referenced 'taskID' is allowed to be dispatched. + */ +void PxTaskMgr::finishBefore( PxTask& task, PxTaskID taskID ) +{ + LOCK(); + PX_ASSERT( mTaskTable[ taskID ].mType != PxTaskType::TT_COMPLETED ); + +#if DOT_LOG + if( logfile ) + { + fprintf(logfile, "t%d0->t%d0;\n", PxTaskUnionCast<uint32_t>(&task), PxTaskUnionCast<uint32_t>(mTaskTable[ taskID ].mTask)); + } +#endif + + mTaskTable[ task.mTaskID ].addDependency( mDepTable, taskID ); + shdfnd::atomicIncrement( &mTaskTable[ taskID ].mRefCount ); +} + + +/* + * Add a dependency to force 'task' to wait for the referenced 'taskID' + * to complete before it is allowed to be dispatched. + */ +void PxTaskMgr::startAfter( PxTask& task, PxTaskID taskID ) +{ + LOCK(); + PX_ASSERT( mTaskTable[ taskID ].mType != PxTaskType::TT_COMPLETED ); + +#if DOT_LOG + if( logfile ) + { + fprintf(logfile, "t%d0->t%d0;\n", PxTaskUnionCast<uint32_t>(mTaskTable[ taskID ].mTask), PxTaskUnionCast<uint32_t>(&task)); + } +#endif + + mTaskTable[ taskID ].addDependency( mDepTable, task.mTaskID ); + shdfnd::atomicIncrement( &mTaskTable[ task.mTaskID ].mRefCount ); +} + + +void PxTaskMgr::addReference( PxTaskID taskID ) +{ + LOCK(); + shdfnd::atomicIncrement( &mTaskTable[ taskID ].mRefCount ); +} + +/* + * Remove one reference count from a task. Intended for use by the + * GPU dispatcher, to remove reference counts when CUDA events are + * resolved. Must be done here to make it thread safe. + */ +void PxTaskMgr::decrReference( PxTaskID taskID ) +{ + LOCK(); + +#if DOT_LOG + debugGraphEdge(currentTask, 0, 0, mTaskTable[ taskID ].mTask, 0, 0, 1); +#endif + + if( !shdfnd::atomicDecrement( &mTaskTable[ taskID ].mRefCount ) ) + { + if( dispatchTask( taskID, false ) ) + { + mGpuDispatcher->finishGroup(); + } + } +} + +int32_t PxTaskMgr::getReference(PxTaskID taskID) const +{ + return mTaskTable[ taskID ].mRefCount; +} + +/* + * A task has completed, decrement all dependencies and submit tasks + * that are ready to run. Signal simulation end if ther are no more + * pending tasks. + */ +bool PxTaskMgr::resolveRow( PxTaskID taskID, bool gpuGroupStart ) +{ + int depRow = mTaskTable[ taskID ].mStartDep; + + uint32_t streamIndex = 0; + bool syncRequired = false; + if( mTaskTable[ taskID ].mTask ) + { + streamIndex = mTaskTable[ taskID ].mTask->mStreamIndex; + } + + while( depRow != EOL ) + { + PxTaskDepTableRow& row = mDepTable[ uint32_t(depRow) ]; + PxTaskTableRow& dtt = mTaskTable[ row.mTaskID ]; + + // pass stream index to (up to one) dependent GPU task + if( dtt.mTask && dtt.mType == PxTaskType::TT_GPU && streamIndex ) + { + if( dtt.mTask->mStreamIndex ) + { + PX_ASSERT( dtt.mTask->mStreamIndex != streamIndex ); + dtt.mTask->mPreSyncRequired = true; + } + else if( syncRequired ) + { + dtt.mTask->mPreSyncRequired = true; + } + else + { + dtt.mTask->mStreamIndex = streamIndex; + /* only one forward task gets to use this stream */ + syncRequired = true; + } + } + + if( !shdfnd::atomicDecrement( &dtt.mRefCount ) ) + { + gpuGroupStart |= dispatchTask( row.mTaskID, gpuGroupStart ); + } + + depRow = row.mNextDep; + } + + shdfnd::atomicDecrement( &mPendingTasks ); + return gpuGroupStart; +} + +/* + * Submit a ready task to its appropriate dispatcher. + */ +bool PxTaskMgr::dispatchTask( PxTaskID taskID, bool gpuGroupStart ) +{ + LOCK(); // todo: reader lock necessary? + PxTaskTableRow& tt = mTaskTable[ taskID ]; + + // prevent re-submission + if( tt.mType == PxTaskType::TT_COMPLETED ) + { + mErrorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "PxTask dispatched twice", __FILE__, __LINE__); + return false; + } + + switch ( tt.mType ) + { + case PxTaskType::TT_CPU: + mCpuDispatcher->submitTask( *tt.mTask ); + break; + + case PxTaskType::TT_GPU: +#if PX_WINDOWS_FAMILY + if( mGpuDispatcher ) + { + if( !gpuGroupStart ) + { + mGpuDispatcher->startGroup(); + } + mGpuDispatcher->submitTask( *tt.mTask ); + gpuGroupStart = true; + } + else +#endif + { + mErrorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "No GPU dispatcher", __FILE__, __LINE__); + } + break; + + case PxTaskType::TT_NOT_PRESENT: + /* No task registered with this taskID, resolve its dependencies */ + PX_ASSERT(!tt.mTask); + //shdfnd::getFoundation().error(PX_INFO, "unregistered task resolved"); + gpuGroupStart |= resolveRow( taskID, gpuGroupStart ); + break; + case PxTaskType::TT_COMPLETED: + default: + mErrorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "Unknown task type", __FILE__, __LINE__); + gpuGroupStart |= resolveRow( taskID, gpuGroupStart ); + break; + } + + tt.mType = PxTaskType::TT_COMPLETED; + return gpuGroupStart; +} + +}// end physx namespace |