From 39ed87570bdb2f86969d4be821c94b722dc71179 Mon Sep 17 00:00:00 2001 From: Joe Ludwig Date: Wed, 26 Jun 2013 15:22:04 -0700 Subject: First version of the SOurce SDK 2013 --- mp/src/utils/common/vmpi_tools_shared.cpp | 374 ++++++++++++++++++++++++++++++ 1 file changed, 374 insertions(+) create mode 100644 mp/src/utils/common/vmpi_tools_shared.cpp (limited to 'mp/src/utils/common/vmpi_tools_shared.cpp') diff --git a/mp/src/utils/common/vmpi_tools_shared.cpp b/mp/src/utils/common/vmpi_tools_shared.cpp new file mode 100644 index 00000000..c753ce11 --- /dev/null +++ b/mp/src/utils/common/vmpi_tools_shared.cpp @@ -0,0 +1,374 @@ +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +//=============================================================================// + +#include +#include +#include "vmpi.h" +#include "cmdlib.h" +#include "vmpi_tools_shared.h" +#include "tier1/strtools.h" +#include "mpi_stats.h" +#include "iphelpers.h" +#include "tier0/minidump.h" + + +// ----------------------------------------------------------------------------- // +// Globals. +// ----------------------------------------------------------------------------- // + +static bool g_bReceivedDirectoryInfo = false; // Have we gotten the qdir info yet? + +static bool g_bReceivedDBInfo = false; +static CDBInfo g_DBInfo; +static unsigned long g_JobPrimaryID; + +static int g_nDisconnects = 0; // Tracks how many remote processes have disconnected ungracefully. + + +// ----------------------------------------------------------------------------- // +// Shared dispatch code. +// ----------------------------------------------------------------------------- // + +bool SharedDispatch( MessageBuffer *pBuf, int iSource, int iPacketID ) +{ + char *pInPos = &pBuf->data[2]; + + switch ( pBuf->data[1] ) + { + case VMPI_SUBPACKETID_DIRECTORIES: + { + Q_strncpy( gamedir, pInPos, sizeof( gamedir ) ); + pInPos += strlen( pInPos ) + 1; + + Q_strncpy( qdir, pInPos, sizeof( qdir ) ); + + g_bReceivedDirectoryInfo = true; + } + return true; + + case VMPI_SUBPACKETID_DBINFO: + { + g_DBInfo = *((CDBInfo*)pInPos); + pInPos += sizeof( CDBInfo ); + g_JobPrimaryID = *((unsigned long*)pInPos); + + g_bReceivedDBInfo = true; + } + return true; + + case VMPI_SUBPACKETID_CRASH: + { + char const chCrashInfoType = *pInPos; + pInPos += 2; + switch ( chCrashInfoType ) + { + case 't': + Warning( "\nWorker '%s' dead: %s\n", VMPI_GetMachineName( iSource ), pInPos ); + break; + case 'f': + { + int iFileSize = * reinterpret_cast< int const * >( pInPos ); + pInPos += sizeof( iFileSize ); + + // Temp folder + char const *szFolder = NULL; + if ( !szFolder ) szFolder = getenv( "TEMP" ); + if ( !szFolder ) szFolder = getenv( "TMP" ); + if ( !szFolder ) szFolder = "c:"; + + // Base module name + char chModuleName[_MAX_PATH], *pModuleName = chModuleName; + ::GetModuleFileName( NULL, chModuleName, sizeof( chModuleName ) / sizeof( chModuleName[0] ) ); + + if ( char *pch = strrchr( chModuleName, '.' ) ) + *pch = 0; + if ( char *pch = strrchr( chModuleName, '\\' ) ) + *pch = 0, pModuleName = pch + 1; + + // Current time + time_t currTime = ::time( NULL ); + struct tm * pTime = ::localtime( &currTime ); + + // Number of minidumps this run + static int s_numMiniDumps = 0; + ++ s_numMiniDumps; + + // Prepare the filename + char chSaveFileName[ 2 * _MAX_PATH ] = { 0 }; + sprintf( chSaveFileName, "%s\\vmpi_%s_on_%s_%d%.2d%2d%.2d%.2d%.2d_%d.mdmp", + szFolder, + pModuleName, + VMPI_GetMachineName( iSource ), + pTime->tm_year + 1900, /* Year less 2000 */ + pTime->tm_mon + 1, /* month (0 - 11 : 0 = January) */ + pTime->tm_mday, /* day of month (1 - 31) */ + pTime->tm_hour, /* hour (0 - 23) */ + pTime->tm_min, /* minutes (0 - 59) */ + pTime->tm_sec, /* seconds (0 - 59) */ + s_numMiniDumps + ); + + if ( FILE *fDump = fopen( chSaveFileName, "wb" ) ) + { + fwrite( pInPos, 1, iFileSize, fDump ); + fclose( fDump ); + + Warning( "\nSaved worker crash minidump '%s', size %d byte(s).\n", + chSaveFileName, iFileSize ); + } + else + { + Warning( "\nReceived worker crash minidump size %d byte(s), failed to save.\n", iFileSize ); + } + } + break; + } + } + return true; + } + + return false; +} + +CDispatchReg g_SharedDispatchReg( VMPI_SHARED_PACKET_ID, SharedDispatch ); + + + +// ----------------------------------------------------------------------------- // +// Module interfaces. +// ----------------------------------------------------------------------------- // + +void SendQDirInfo() +{ + char cPacketID[2] = { VMPI_SHARED_PACKET_ID, VMPI_SUBPACKETID_DIRECTORIES }; + + MessageBuffer mb; + mb.write( cPacketID, 2 ); + mb.write( gamedir, strlen( gamedir ) + 1 ); + mb.write( qdir, strlen( qdir ) + 1 ); + + VMPI_SendData( mb.data, mb.getLen(), VMPI_PERSISTENT ); +} + + +void RecvQDirInfo() +{ + while ( !g_bReceivedDirectoryInfo ) + VMPI_DispatchNextMessage(); +} + + +void SendDBInfo( const CDBInfo *pInfo, unsigned long jobPrimaryID ) +{ + char cPacketInfo[2] = { VMPI_SHARED_PACKET_ID, VMPI_SUBPACKETID_DBINFO }; + const void *pChunks[] = { cPacketInfo, pInfo, &jobPrimaryID }; + int chunkLengths[] = { 2, sizeof( CDBInfo ), sizeof( jobPrimaryID ) }; + + VMPI_SendChunks( pChunks, chunkLengths, ARRAYSIZE( pChunks ), VMPI_PERSISTENT ); +} + + +void RecvDBInfo( CDBInfo *pInfo, unsigned long *pJobPrimaryID ) +{ + while ( !g_bReceivedDBInfo ) + VMPI_DispatchNextMessage(); + + *pInfo = g_DBInfo; + *pJobPrimaryID = g_JobPrimaryID; +} + +// If the file is successfully opened, read and sent returns the size of the file in bytes +// otherwise returns 0 and nothing is sent +int VMPI_SendFileChunk( const void *pvChunkPrefix, int lenPrefix, tchar const *ptchFileName ) +{ + HANDLE hFile = NULL; + HANDLE hMapping = NULL; + void const *pvMappedData = NULL; + int iResult = 0; + + hFile = ::CreateFile( ptchFileName, GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL ); + if ( !hFile || ( hFile == INVALID_HANDLE_VALUE ) ) + goto done; + + hMapping = ::CreateFileMapping( hFile, NULL, PAGE_READONLY, 0, 0, NULL ); + if ( !hMapping || ( hMapping == INVALID_HANDLE_VALUE ) ) + goto done; + + pvMappedData = ::MapViewOfFile( hMapping, FILE_MAP_READ, 0, 0, 0 ); + if ( !pvMappedData ) + goto done; + + int iMappedFileSize = ::GetFileSize( hFile, NULL ); + if ( INVALID_FILE_SIZE == iMappedFileSize ) + goto done; + + // Send the data over VMPI + if ( VMPI_Send3Chunks( + pvChunkPrefix, lenPrefix, + &iMappedFileSize, sizeof( iMappedFileSize ), + pvMappedData, iMappedFileSize, + VMPI_MASTER_ID ) ) + iResult = iMappedFileSize; + + // Fall-through for cleanup code to execute +done: + if ( pvMappedData ) + ::UnmapViewOfFile( pvMappedData ); + + if ( hMapping && ( hMapping != INVALID_HANDLE_VALUE ) ) + ::CloseHandle( hMapping ); + + if ( hFile && ( hFile != INVALID_HANDLE_VALUE ) ) + ::CloseHandle( hFile ); + + return iResult; +} + +void VMPI_HandleCrash( const char *pMessage, void *pvExceptionInfo, bool bAssert ) +{ + static LONG crashHandlerCount = 0; + if ( InterlockedIncrement( &crashHandlerCount ) == 1 ) + { + Msg( "\nFAILURE: '%s' (assert: %d)\n", pMessage, bAssert ); + + // Send a message to the master. + char crashMsg[4] = { VMPI_SHARED_PACKET_ID, VMPI_SUBPACKETID_CRASH, 't', ':' }; + + VMPI_Send2Chunks( + crashMsg, + sizeof( crashMsg ), + pMessage, + strlen( pMessage ) + 1, + VMPI_MASTER_ID ); + + // Now attempt to create a minidump with the given exception information + if ( pvExceptionInfo ) + { + struct _EXCEPTION_POINTERS *pvExPointers = ( struct _EXCEPTION_POINTERS * ) pvExceptionInfo; + tchar tchMinidumpFileName[_MAX_PATH] = { 0 }; + bool bSucceededWritingMinidump = WriteMiniDumpUsingExceptionInfo( + pvExPointers->ExceptionRecord->ExceptionCode, + pvExPointers, + ( MINIDUMP_TYPE )( MiniDumpWithDataSegs | MiniDumpWithIndirectlyReferencedMemory | MiniDumpWithProcessThreadData ), + // ( MINIDUMP_TYPE )( MiniDumpWithDataSegs | MiniDumpWithFullMemory | MiniDumpWithHandleData | MiniDumpWithUnloadedModules | MiniDumpWithIndirectlyReferencedMemory | MiniDumpWithProcessThreadData | MiniDumpWithPrivateReadWriteMemory ), + // ( MINIDUMP_TYPE )( MiniDumpNormal ), + tchMinidumpFileName ); + if ( bSucceededWritingMinidump ) + { + crashMsg[2] = 'f'; + VMPI_SendFileChunk( crashMsg, sizeof( crashMsg ), tchMinidumpFileName ); + ::DeleteFile( tchMinidumpFileName ); + } + } + + // Let the messages go out. + Sleep( 500 ); + } + + InterlockedDecrement( &crashHandlerCount ); +} + + +// This is called if we crash inside our crash handler. It just terminates the process immediately. +LONG __stdcall VMPI_SecondExceptionFilter( struct _EXCEPTION_POINTERS *ExceptionInfo ) +{ + TerminateProcess( GetCurrentProcess(), 2 ); + return EXCEPTION_EXECUTE_HANDLER; // (never gets here anyway) +} + + +void VMPI_ExceptionFilter( unsigned long uCode, void *pvExceptionInfo ) +{ + // This is called if we crash inside our crash handler. It just terminates the process immediately. + SetUnhandledExceptionFilter( VMPI_SecondExceptionFilter ); + + //DWORD code = ExceptionInfo->ExceptionRecord->ExceptionCode; + + #define ERR_RECORD( name ) { name, #name } + struct + { + int code; + char *pReason; + } errors[] = + { + ERR_RECORD( EXCEPTION_ACCESS_VIOLATION ), + ERR_RECORD( EXCEPTION_ARRAY_BOUNDS_EXCEEDED ), + ERR_RECORD( EXCEPTION_BREAKPOINT ), + ERR_RECORD( EXCEPTION_DATATYPE_MISALIGNMENT ), + ERR_RECORD( EXCEPTION_FLT_DENORMAL_OPERAND ), + ERR_RECORD( EXCEPTION_FLT_DIVIDE_BY_ZERO ), + ERR_RECORD( EXCEPTION_FLT_INEXACT_RESULT ), + ERR_RECORD( EXCEPTION_FLT_INVALID_OPERATION ), + ERR_RECORD( EXCEPTION_FLT_OVERFLOW ), + ERR_RECORD( EXCEPTION_FLT_STACK_CHECK ), + ERR_RECORD( EXCEPTION_FLT_UNDERFLOW ), + ERR_RECORD( EXCEPTION_ILLEGAL_INSTRUCTION ), + ERR_RECORD( EXCEPTION_IN_PAGE_ERROR ), + ERR_RECORD( EXCEPTION_INT_DIVIDE_BY_ZERO ), + ERR_RECORD( EXCEPTION_INT_OVERFLOW ), + ERR_RECORD( EXCEPTION_INVALID_DISPOSITION ), + ERR_RECORD( EXCEPTION_NONCONTINUABLE_EXCEPTION ), + ERR_RECORD( EXCEPTION_PRIV_INSTRUCTION ), + ERR_RECORD( EXCEPTION_SINGLE_STEP ), + ERR_RECORD( EXCEPTION_STACK_OVERFLOW ), + ERR_RECORD( EXCEPTION_ACCESS_VIOLATION ), + }; + + int nErrors = sizeof( errors ) / sizeof( errors[0] ); + int i=0; + char *pchReason = NULL; + char chUnknownBuffer[32]; + for ( i; ( i < nErrors ) && !pchReason; i++ ) + { + if ( errors[i].code == uCode ) + pchReason = errors[i].pReason; + } + + if ( i == nErrors ) + { + sprintf( chUnknownBuffer, "Error code 0x%08X", uCode ); + pchReason = chUnknownBuffer; + } + + VMPI_HandleCrash( pchReason, pvExceptionInfo, true ); + + TerminateProcess( GetCurrentProcess(), 1 ); +} + + +void HandleMPIDisconnect( int procID, const char *pReason ) +{ + int nLiveWorkers = VMPI_GetCurrentNumberOfConnections() - g_nDisconnects - 1; + + // We ran into the size limit before and it wasn't readily apparent that the size limit had + // been breached, so make sure to show errors about invalid packet sizes.. + bool bOldSuppress = g_bSuppressPrintfOutput; + g_bSuppressPrintfOutput = ( Q_stristr( pReason, "invalid packet size" ) == 0 ); + + Warning( "\n\n--- WARNING: lost connection to '%s' (%s).\n", VMPI_GetMachineName( procID ), pReason ); + + if ( g_bMPIMaster ) + { + Warning( "%d workers remain.\n\n", nLiveWorkers ); + + ++g_nDisconnects; + /* + if ( VMPI_GetCurrentNumberOfConnections() - g_nDisconnects <= 1 ) + { + Error( "All machines disconnected!" ); + } + */ + } + else + { + VMPI_HandleAutoRestart(); + Error( "Worker quitting." ); + } + + g_bSuppressPrintfOutput = bOldSuppress; +} + + -- cgit v1.2.3