From f56bb35301836e56582a575a75864392a0177875 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B8rgen=20P=2E=20Tjern=C3=B8?= Date: Mon, 2 Dec 2013 19:31:46 -0800 Subject: Fix line endings. WHAMMY. --- .../phonemeextractor/phonemeextractor_ims.cpp | 2148 ++++++++++---------- 1 file changed, 1074 insertions(+), 1074 deletions(-) (limited to 'mp/src/utils/phonemeextractor/phonemeextractor_ims.cpp') diff --git a/mp/src/utils/phonemeextractor/phonemeextractor_ims.cpp b/mp/src/utils/phonemeextractor/phonemeextractor_ims.cpp index 70819f8e..29dabab4 100644 --- a/mp/src/utils/phonemeextractor/phonemeextractor_ims.cpp +++ b/mp/src/utils/phonemeextractor/phonemeextractor_ims.cpp @@ -1,1075 +1,1075 @@ -//========= Copyright Valve Corporation, All rights reserved. ============// -// -// Purpose: -// -// $NoKeywords: $ -//=============================================================================// -#include -#include -#include -#include -#include -#include -#include -#include - -#include "phonemeextractor/PhonemeExtractor.h" -#include "ims_helper/ims_helper.h" - -#include "tier0/dbg.h" -#include "sentence.h" -#include "PhonemeConverter.h" -#include "tier1/strtools.h" - -#define TEXTLESS_WORDNAME "[Textless]" - -static IImsHelper *talkback = NULL; - -//----------------------------------------------------------------------------- -// Purpose: Expose the interface -//----------------------------------------------------------------------------- -class CPhonemeExtractorLipSinc : public IPhonemeExtractor -{ -public: - virtual PE_APITYPE GetAPIType() const - { - return SPEECH_API_LIPSINC; - } - - // Used for menus, etc - virtual char const *GetName() const - { - return "IMS (LipSinc)"; - } - - SR_RESULT Extract( - const char *wavfile, - int numsamples, - void (*pfnPrint)( const char *fmt, ... ), - CSentence& inwords, - CSentence& outwords ); - - - CPhonemeExtractorLipSinc( void ); - ~CPhonemeExtractorLipSinc( void ); - - enum - { - MAX_WORD_LENGTH = 128, - }; -private: - - - class CAnalyzedWord - { - public: - char buffer[ MAX_WORD_LENGTH ]; - double starttime; - double endtime; - }; - - class CAnalyzedPhoneme - { - public: - char phoneme[ 32 ]; - double starttime; - double endtime; - }; - - bool InitLipSinc( void ); - void ShutdownLipSinc( void ); - - void DescribeError( TALKBACK_ERR err ); - void Printf( char const *fmt, ... ); - - bool CheckSoundFile( char const *filename ); - bool GetInitialized( void ); - void SetInitialized( bool init ); - - void (*m_pfnPrint)( const char *fmt, ... ); - - char const *ConstructInputSentence( CSentence& inwords ); - bool AttemptAnalysis( TALKBACK_ANALYSIS **ppAnalysis, char const *wavfile, CSentence& inwords ); - - char const *ApplyTBWordRules( char const *word ); - - void ProcessWords( TALKBACK_ANALYSIS *analysis, CSentence& inwords, CSentence& outwords ); - void ProcessWordsTextless( TALKBACK_ANALYSIS *analysis, CSentence& outwords ); - - int GetPhonemeIndexAtWord( TALKBACK_ANALYSIS *analysis, double time, bool checkstart ); - - int GetPhonemeIndexAtWordStart( TALKBACK_ANALYSIS *analysis, double starttime ); - int GetPhonemeIndexAtWordEnd( TALKBACK_ANALYSIS *analysis, double endtime ); - - CAnalyzedWord *GetAnalyzedWord( TALKBACK_ANALYSIS *analysis, int index ); - CAnalyzedPhoneme *GetAnalyzedPhoneme( TALKBACK_ANALYSIS *analysis, int index ); - - int ComputeByteFromTime( float time ); - - bool m_bInitialized; - - float m_flSampleCount; - float m_flDuration; - - float m_flSamplesPerSecond; - - int m_nBytesPerSample; - - HMODULE m_hHelper; -}; - -CPhonemeExtractorLipSinc::CPhonemeExtractorLipSinc( void ) -{ - m_hHelper = (HMODULE)0; - m_pfnPrint = NULL; - - m_bInitialized = false; - - m_flSampleCount = 0.0f; - m_flDuration = 0.0f; - - m_flSamplesPerSecond = 0.0f; - - m_nBytesPerSample = 0; -} - -CPhonemeExtractorLipSinc::~CPhonemeExtractorLipSinc( void ) -{ - if ( GetInitialized() ) - { - ShutdownLipSinc(); - } -} - -bool CPhonemeExtractorLipSinc::GetInitialized( void ) -{ - return m_bInitialized; -} - -void CPhonemeExtractorLipSinc::SetInitialized( bool init ) -{ - m_bInitialized = init; -} - -int CPhonemeExtractorLipSinc::ComputeByteFromTime( float time ) -{ - if ( !m_flDuration ) - return 0; - - float frac = time / m_flDuration; - - float sampleNumber = frac * m_flSampleCount; - - int bytenumber = sampleNumber * m_nBytesPerSample; - - return bytenumber; -} - -void CPhonemeExtractorLipSinc::DescribeError( TALKBACK_ERR err ) -{ - Assert( m_pfnPrint ); - - // Get the error description. - char errorDesc[256] = ""; - if ( err != TALKBACK_NOERR ) - { - talkback->TalkBackGetErrorString( err, sizeof(errorDesc), errorDesc ); - } - - // Report or log the error... - (*m_pfnPrint)( "LIPSINC ERROR: %s\n", errorDesc ); -} - -//----------------------------------------------------------------------------- -// Purpose: -// Input : *fmt - -// .. - -//----------------------------------------------------------------------------- -void CPhonemeExtractorLipSinc::Printf( char const *fmt, ... ) -{ - Assert( m_pfnPrint ); - - char string[ 4096 ]; - - va_list argptr; - va_start( argptr, fmt ); - vsprintf( string, fmt, argptr ); - va_end( argptr ); - - (*m_pfnPrint)( "%s", string ); -} - -bool CPhonemeExtractorLipSinc::CheckSoundFile( char const *filename ) -{ - TALKBACK_SOUND_FILE_METRICS fm; - memset( &fm, 0, sizeof( fm ) ); - fm.m_size = sizeof( fm ); - - TALKBACK_ERR err = talkback->TalkBackGetSoundFileMetrics( filename, &fm ); - if ( err != TALKBACK_NOERR ) - { - DescribeError( err ); - return false; - } - - if ( fm.m_canBeAnalyzed ) - { - Printf( "%s: %.2f s, rate %i, bits %i, channels %i\n", - filename, - fm.m_duration, - fm.m_sampleRate, - fm.m_bitsPerSample, - fm.m_channelCount ); - } - - m_flDuration = fm.m_duration; - if ( m_flDuration > 0 ) - { - m_flSamplesPerSecond = m_flSampleCount / m_flDuration; - } - else - { - m_flSamplesPerSecond = 0.0f; - } - - m_nBytesPerSample = ( fm.m_bitsPerSample >> 3 ); - - m_flSampleCount /= m_nBytesPerSample; - - m_nBytesPerSample /= fm.m_channelCount; - - return fm.m_canBeAnalyzed ? true : false; -} - -typedef IImsHelper *(*pfnImsHelper)(void); - -//----------------------------------------------------------------------------- -// Purpose: -// Output : Returns true on success, false on failure. -//----------------------------------------------------------------------------- -bool CPhonemeExtractorLipSinc::InitLipSinc( void ) -{ - if ( GetInitialized() ) - { - return true; - } - - m_hHelper = LoadLibrary( "ims_helper.dll" ); - if ( !m_hHelper ) - { - return false; - } - - pfnImsHelper factory = (pfnImsHelper)::GetProcAddress( m_hHelper, "GetImsHelper" ); - if ( !factory ) - { - FreeLibrary( m_hHelper ); - return false; - } - - talkback = reinterpret_cast< IImsHelper * >( (*factory)() ); - if ( !talkback ) - { - FreeLibrary( m_hHelper ); - return false; - } - - char szExeName[ MAX_PATH ]; - szExeName[0] = 0; - GetModuleFileName( (HMODULE)0, szExeName, sizeof( szExeName ) ); - - char szBaseDir[ MAX_PATH ]; - Q_strncpy( szBaseDir, szExeName, sizeof( szBaseDir ) ); - - Q_StripLastDir( szBaseDir, sizeof( szBaseDir ) ); - Q_StripTrailingSlash( szBaseDir ); - Q_strlower( szBaseDir ); - - char coreDataDir[ 512 ]; - Q_snprintf( coreDataDir, sizeof( coreDataDir ), "%s\\lipsinc_data\\", - szBaseDir ); - Q_FixSlashes( coreDataDir ); - - char szCheck[ 512 ]; - Q_snprintf( szCheck, sizeof( szCheck ), "%sDtC6dal.dat", coreDataDir ); - struct __stat64 buf; - - if ( _stat64( szCheck, &buf ) != 0 ) - { - Q_snprintf( coreDataDir, sizeof( coreDataDir ), "%s\\bin\\lipsinc_data\\", - szBaseDir ); - Q_FixSlashes( coreDataDir ); - Q_snprintf( szCheck, sizeof( szCheck ), "%sDtC6dal.dat", coreDataDir ); - - if ( _stat64( szCheck, &buf ) != 0 ) - { - Error( "Unable to find talkback data files in %s.", coreDataDir ); - } - } - - TALKBACK_ERR err; - - err = talkback->TalkBackStartupLibrary( coreDataDir ); - if ( err != TALKBACK_NOERR ) - { - DescribeError( err ); - FreeLibrary( m_hHelper ); - return false; - } - - long verMajor = 0; - long verMinor = 0; - long verRevision = 0; - - err = talkback->TalkBackGetVersion( - &verMajor, - &verMinor, - &verRevision); - if ( err != TALKBACK_NOERR ) - { - DescribeError( err ); - FreeLibrary( m_hHelper ); - return false; - } - - Printf( "Lipsinc TalkBack Version %i.%i.%i\n", verMajor, verMinor, verRevision ); - - m_bInitialized = true; - - return true; -} - -//----------------------------------------------------------------------------- -// Purpose: -//----------------------------------------------------------------------------- -void CPhonemeExtractorLipSinc::ShutdownLipSinc( void ) -{ - // HACK HACK: This seems to crash on exit sometimes - __try - { - talkback->TalkBackShutdownLibrary(); - - FreeLibrary( m_hHelper ); - } - __except(EXCEPTION_EXECUTE_HANDLER ) - { - OutputDebugString( "----> Crash shutting down TALKBACK sdk, exception caught and ignored\n" ); - } -} - -//----------------------------------------------------------------------------- -// Purpose: -// Input : inwords - -// Output : char const -//----------------------------------------------------------------------------- -char const *CPhonemeExtractorLipSinc::ConstructInputSentence( CSentence& inwords ) -{ - static char sentence[ 16384 ]; - - sentence[ 0 ] = 0; - - int last = inwords.m_Words.Size() - 1; - - for ( int i = 0 ; i <= last; i++ ) - { - CWordTag *w = inwords.m_Words[ i ]; - - strcat( sentence, w->GetWord() ); - if ( i != last ) - { - strcat( sentence, " " ); - } - } - - if ( inwords.m_Words.Count() == 1 && - !Q_strnicmp( inwords.GetText(), TEXTLESS_WORDNAME, Q_strlen( TEXTLESS_WORDNAME ) ) ) - { - sentence[ 0 ] = 0; - } - - return sentence; -} - -bool CPhonemeExtractorLipSinc::AttemptAnalysis( TALKBACK_ANALYSIS **ppAnalysis, char const *wavfile, CSentence& inwords ) -{ - *ppAnalysis = NULL; - - TALKBACK_ANALYSIS_SETTINGS settings; - memset( &settings, 0, sizeof( settings ) ); - - // Set this field to sizeof(TALKBACK_ANALYSIS_SETTINGS) before using the - // structure. - settings.fSize = sizeof( TALKBACK_ANALYSIS_SETTINGS ); - - - // Default value: 30 (frames per second). - settings.fFrameRate = 100; - // Set this to 1 to optimize for flipbook output, 0 to do analysis normally. - // - // Default value: 0 (normal analysis). - settings.fOptimizeForFlipbook = 0; - // Set this to -1 to seed the random number generator with the current time. - // Any other number will be used directly for the random number seed, which - // is useful if you want repeatable speech gestures. This value does not - // influence lip-synching at all. - // - // Default value: -1 (use current time). - settings.fRandomSeed = -1; - // Path to the configuration (.INI) file with phoneme-to-speech-target - // mapping. Set this to NULL to use the default mapping. - // - // Default value: NULL (use default mapping). - settings.fConfigFile = NULL; - - char const *text = ConstructInputSentence( inwords ); - - Printf( "Analyzing: \"%s\"\n", text[ 0 ] ? text : TEXTLESS_WORDNAME ); - - TALKBACK_ERR err = talkback->TalkBackGetAnalysis( - ppAnalysis, - wavfile, - text, - &settings ); - - if ( err != TALKBACK_NOERR ) - { - DescribeError( err ); - return false; - } - - Printf( "Analysis successful...\n" ); - - return true; -} - -typedef struct -{ - TALKBACK_PHONEME phoneme; - char const *string; -} TBPHONEMES_t; - -static TBPHONEMES_t g_TBPhonemeList[]= -{ - { TALKBACK_PHONEME_IY, "iy" }, - { TALKBACK_PHONEME_IH, "ih" }, - { TALKBACK_PHONEME_EH, "eh" }, - { TALKBACK_PHONEME_EY, "ey" }, - { TALKBACK_PHONEME_AE, "ae" }, - { TALKBACK_PHONEME_AA, "aa" }, - { TALKBACK_PHONEME_AW, "aw" }, - { TALKBACK_PHONEME_AY, "ay" }, - { TALKBACK_PHONEME_AH, "ah" }, - { TALKBACK_PHONEME_AO, "ao" }, - { TALKBACK_PHONEME_OY, "oy" }, - { TALKBACK_PHONEME_OW, "ow" }, - { TALKBACK_PHONEME_UH, "uh" }, - { TALKBACK_PHONEME_UW, "uw" }, - { TALKBACK_PHONEME_ER, "er" }, - { TALKBACK_PHONEME_AX, "ax" }, - { TALKBACK_PHONEME_S, "s" }, - { TALKBACK_PHONEME_SH, "sh" }, - { TALKBACK_PHONEME_Z, "z" }, - { TALKBACK_PHONEME_ZH, "zh" }, - { TALKBACK_PHONEME_F, "f" }, - { TALKBACK_PHONEME_TH, "th" }, - { TALKBACK_PHONEME_V, "v" }, - { TALKBACK_PHONEME_DH, "dh" }, - { TALKBACK_PHONEME_M, "m" }, - { TALKBACK_PHONEME_N, "n" }, - { TALKBACK_PHONEME_NG, "ng" }, - { TALKBACK_PHONEME_L, "l" }, - { TALKBACK_PHONEME_R, "r" }, - { TALKBACK_PHONEME_W, "w" }, - { TALKBACK_PHONEME_Y, "y" }, - { TALKBACK_PHONEME_HH, "hh" }, - { TALKBACK_PHONEME_B, "b" }, - { TALKBACK_PHONEME_D, "d" }, - { TALKBACK_PHONEME_JH, "jh" }, - { TALKBACK_PHONEME_G, "g" }, - { TALKBACK_PHONEME_P, "p" }, - { TALKBACK_PHONEME_T, "t" }, - { TALKBACK_PHONEME_K, "k" }, - { TALKBACK_PHONEME_CH, "ch" }, - { TALKBACK_PHONEME_SIL, "" }, - { -1, NULL } -}; - -char const *TBPhonemeToString( TALKBACK_PHONEME phoneme ) -{ - if ( phoneme < TALKBACK_PHONEME_FIRST || phoneme > TALKBACK_PHONEME_LAST ) - { - return "Bogus"; - } - - TBPHONEMES_t *item = &g_TBPhonemeList[ phoneme ]; - return item->string; -} - -//----------------------------------------------------------------------------- -// Purpose: -// Input : *analysis - -// time - -// start - -// Output : int -//----------------------------------------------------------------------------- -int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWord( TALKBACK_ANALYSIS *analysis, double time, bool start ) -{ - long count; - - TALKBACK_ERR err = talkback->TalkBackGetNumPhonemes( analysis, &count ); - if ( err != TALKBACK_NOERR ) - { - DescribeError( err ); - return -1; - } - - if ( count <= 0L ) - return -1; - - // Bogus - if ( count >= 100000L ) - return -1; - - for ( int i = 0; i < (int)count; i++ ) - { - TALKBACK_PHONEME tbPhoneme = TALKBACK_PHONEME_INVALID; - err = talkback->TalkBackGetPhonemeEnum( analysis, i, &tbPhoneme ); - if ( err != TALKBACK_NOERR ) - { - DescribeError( err ); - continue; - } - - double t; - - if ( start ) - { - err = talkback->TalkBackGetPhonemeStartTime( analysis, i, &t ); - } - else - { - err = talkback->TalkBackGetPhonemeEndTime( analysis, i, &t ); - } - - if ( err != TALKBACK_NOERR ) - { - DescribeError( err ); - continue; - } - - if ( t == time ) - { - return i; - } - } - - return -1; -} - -//----------------------------------------------------------------------------- -// Purpose: -// Input : *analysis - -// starttime - -// Output : int -//----------------------------------------------------------------------------- -int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWordStart( TALKBACK_ANALYSIS *analysis, double starttime ) -{ - return GetPhonemeIndexAtWord( analysis, starttime, true ); -} - -//----------------------------------------------------------------------------- -// Purpose: -// Input : *analysis - -// endtime - -// Output : int -//----------------------------------------------------------------------------- -int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWordEnd( TALKBACK_ANALYSIS *analysis, double endtime ) -{ - return GetPhonemeIndexAtWord( analysis, endtime, false ); -} - -CPhonemeExtractorLipSinc::CAnalyzedPhoneme *CPhonemeExtractorLipSinc::GetAnalyzedPhoneme( TALKBACK_ANALYSIS *analysis, int index ) -{ - static CAnalyzedPhoneme p; - - memset( &p, 0, sizeof( p ) ); - - TALKBACK_PHONEME tb; - - TALKBACK_ERR err = talkback->TalkBackGetPhonemeEnum( analysis, index, &tb ); - if ( err != TALKBACK_NOERR ) - { - DescribeError( err ); - return NULL; - } - - strcpy( p.phoneme, TBPhonemeToString( tb ) ); - - err = talkback->TalkBackGetPhonemeStartTime( analysis, index, &p.starttime ); - if ( err != TALKBACK_NOERR ) - { - DescribeError( err ); - return NULL; - } - err = talkback->TalkBackGetPhonemeEndTime( analysis, index, &p.endtime ); - if ( err != TALKBACK_NOERR ) - { - DescribeError( err ); - return NULL; - } - - return &p; -} - -CPhonemeExtractorLipSinc::CAnalyzedWord *CPhonemeExtractorLipSinc::GetAnalyzedWord( TALKBACK_ANALYSIS *analysis, int index ) -{ - static CAnalyzedWord w; - - memset( &w, 0, sizeof( w ) ); - - long chars = sizeof( w.buffer ); - - TALKBACK_ERR err = talkback->TalkBackGetWord( analysis, index, chars, w.buffer ); - if ( err != TALKBACK_NOERR ) - { - DescribeError( err ); - return NULL; - } - - err = talkback->TalkBackGetWordStartTime( analysis, index, &w.starttime ); - if ( err != TALKBACK_NOERR ) - { - DescribeError( err ); - return NULL; - } - err = talkback->TalkBackGetWordEndTime( analysis, index, &w.endtime ); - if ( err != TALKBACK_NOERR ) - { - DescribeError( err ); - return NULL; - } - - return &w; -} - -//----------------------------------------------------------------------------- -// Purpose: -// Input : *w1 - -// *w2 - -// Output : Returns true on success, false on failure. -//----------------------------------------------------------------------------- -bool FuzzyWordMatch( char const *w1, char const *w2 ) -{ - int len1 = strlen( w1 ); - int len2 = strlen( w2 ); - - int minlen = min( len1, len2 ); - - // Found a match - if ( !strnicmp( w1, w2, minlen ) ) - return true; - - int letterdiff = abs( len1 - len2 ); - // More than three letters different, don't bother - if ( letterdiff > 5 ) - return false; - - // Compute a "delta" - char *p1 = (char *)w1; - char *p2 = (char *)w2; - - CUtlVector word1; - CUtlVector word2; - - while ( *p1 ) - { - if ( V_isalpha( *p1 ) ) - { - word1.AddToTail( *p1 ); - } - p1++; - } - - while ( *p2 ) - { - if ( V_isalpha( *p2 ) ) - { - word2.AddToTail( *p2 ); - } - p2++; - } - - int i; - for ( i = 0; i < word1.Size(); i++ ) - { - char c = word1[ i ]; - - // See if c is in word 2, if so subtract it out - int idx = word2.Find( c ); - - if ( idx != word2.InvalidIndex() ) - { - word2.Remove( idx ); - } - } - - if ( word2.Size() <= letterdiff ) - return true; - - word2.RemoveAll(); - - while ( *p2 ) - { - if ( V_isalpha( *p2 ) ) - { - word2.AddToTail( *p2 ); - } - p2++; - } - - for ( i = 0; i < word2.Size(); i++ ) - { - char c = word2[ i ]; - - // See if c is in word 2, if so subtract it out - int idx = word1.Find( c ); - - if ( idx != word1.InvalidIndex() ) - { - word1.Remove( idx ); - } - } - - if ( word1.Size() <= letterdiff ) - return true; - - return false; -} - -//----------------------------------------------------------------------------- -// Purpose: For foreign language stuff, if inwords is empty, process anyway... -// Input : *analysis - -// outwords - -//----------------------------------------------------------------------------- -void CPhonemeExtractorLipSinc::ProcessWordsTextless( TALKBACK_ANALYSIS *analysis, CSentence& outwords ) -{ - long count; - - TALKBACK_ERR err = talkback->TalkBackGetNumPhonemes( analysis, &count ); - if ( err != TALKBACK_NOERR ) - { - DescribeError( err ); - return; - } - - CWordTag *newWord = new CWordTag; - - newWord->SetWord( TEXTLESS_WORDNAME ); - - float starttime = 0.0f; - float endtime = 1.0f; - - - for ( int i = 0; i < count; ++i ) - { - // Get phoneme and timing info - CAnalyzedPhoneme *ph = GetAnalyzedPhoneme( analysis, i ); - if ( !ph ) - continue; - - CPhonemeTag *ptag = new CPhonemeTag; - - if ( i == 0 || ( ph->starttime < starttime ) ) - { - starttime = ph->starttime; - } - - if ( i == 0 || ( ph->endtime > endtime ) ) - { - endtime = ph->endtime; - } - - ptag->SetStartTime( ph->starttime ); - ptag->SetEndTime( ph->endtime ); - - ptag->m_uiStartByte = ComputeByteFromTime( ph->starttime ); - ptag->m_uiEndByte = ComputeByteFromTime( ph->endtime ); - - ptag->SetTag( ph->phoneme ); - ptag->SetPhonemeCode( TextToPhoneme( ptag->GetTag() ) ); - - newWord->m_Phonemes.AddToTail( ptag ); - } - - newWord->m_flStartTime = starttime; - newWord->m_flEndTime = endtime; - - newWord->m_uiStartByte = ComputeByteFromTime( starttime ); - newWord->m_uiEndByte = ComputeByteFromTime( endtime ); - - outwords.Reset(); - outwords.AddWordTag( newWord ); - outwords.SetTextFromWords(); -} - -//----------------------------------------------------------------------------- -// Purpose: -// Input : *analysis - -// inwords - -// outwords - -//----------------------------------------------------------------------------- -void CPhonemeExtractorLipSinc::ProcessWords( TALKBACK_ANALYSIS *analysis, CSentence& inwords, CSentence& outwords ) -{ - long count; - - TALKBACK_ERR err = talkback->TalkBackGetNumWords( analysis, &count ); - if ( err != TALKBACK_NOERR ) - { - DescribeError( err ); - return; - } - - if ( count <= 0L ) - { - if ( inwords.m_Words.Count() == 0 || - !Q_strnicmp( inwords.GetText(), TEXTLESS_WORDNAME, Q_strlen( TEXTLESS_WORDNAME ) ) ) - { - ProcessWordsTextless( analysis, outwords ); - } - return; - } - - // Bogus - if ( count >= 100000L ) - return; - - int inwordpos = 0; - int awordpos = 0; - - outwords.Reset(); - - char previous[ 256 ]; - previous[ 0 ] = 0; - - while ( inwordpos < inwords.m_Words.Size() ) - { - CWordTag *in = inwords.m_Words[ inwordpos ]; - - if ( awordpos >= count ) - { - // Just copy the rest over without phonemes - CWordTag *copy = new CWordTag( *in ); - - outwords.AddWordTag( copy ); - - inwordpos++; - continue; - } - - // Should never fail - CAnalyzedWord *w = GetAnalyzedWord( analysis, awordpos ); - if ( !w ) - { - return; - } - - if ( !stricmp( w->buffer, "" ) ) - { - awordpos++; - continue; - } - - char const *check = ApplyTBWordRules( in->GetWord() ); - if ( !FuzzyWordMatch( check, w->buffer ) ) - { - bool advance_input = true; - if ( previous[ 0 ] ) - { - if ( FuzzyWordMatch( previous, w->buffer ) ) - { - advance_input = false; - } - } - - if ( advance_input ) - { - inwordpos++; - } - awordpos++; - continue; - } - strcpy( previous, check ); - - CWordTag *newWord = new CWordTag; - - newWord->SetWord( in->GetWord() ); - - newWord->m_flStartTime = w->starttime; - newWord->m_flEndTime = w->endtime; - - newWord->m_uiStartByte = ComputeByteFromTime( w->starttime ); - newWord->m_uiEndByte = ComputeByteFromTime( w->endtime ); - - int phonemestart, phonemeend; - - phonemestart = GetPhonemeIndexAtWordStart( analysis, w->starttime ); - phonemeend = GetPhonemeIndexAtWordEnd( analysis, w->endtime ); - - if ( phonemestart >= 0 && phonemeend >= 0 ) - { - for ( ; phonemestart <= phonemeend; phonemestart++ ) - { - // Get phoneme and timing info - CAnalyzedPhoneme *ph = GetAnalyzedPhoneme( analysis, phonemestart ); - if ( !ph ) - continue; - - CPhonemeTag *ptag = new CPhonemeTag; - ptag->SetStartTime( ph->starttime ); - ptag->SetEndTime( ph->endtime ); - - ptag->m_uiStartByte = ComputeByteFromTime( ph->starttime ); - ptag->m_uiEndByte = ComputeByteFromTime( ph->endtime ); - - ptag->SetTag( ph->phoneme ); - ptag->SetPhonemeCode( TextToPhoneme( ptag->GetTag() ) ); - - newWord->m_Phonemes.AddToTail( ptag ); - } - } - - outwords.AddWordTag( newWord ); - inwordpos++; - awordpos++; - } -} - -char const *CPhonemeExtractorLipSinc::ApplyTBWordRules( char const *word ) -{ - static char outword[ 256 ]; - - char const *in = word; - char *out = outword; - - while ( *in && ( ( out - outword ) <= 255 ) ) - { - if ( *in == '\t' || - *in == ' ' || - *in == '\n' || - *in == '-' || - *in == '.' || - *in == ',' || - *in == ';' || - *in == '?' || - *in == '"' || - *in == ':' || - *in == '(' || - *in == ')' ) - { - in++; - *out++ = ' '; - continue; - } - - if ( !V_isprint( *in ) ) - { - in++; - continue; - } - - if ( *in >= 128 ) - { - in++; - continue; - } - - // Skip numbers - if ( *in >= '0' && *in <= '9' ) - { - in++; - continue; - } - - // Convert all letters to upper case - if ( *in >= 'a' && *in <= 'z' ) - { - *out++ = ( *in++ ) - 'a' + 'A'; - continue; - } - - if ( *in >= 'A' && *in <= 'Z' ) - { - *out++ = *in++; - continue; - } - - if ( *in == '\'' ) - { - *out++ = *in++; - continue; - } - - in++; - } - - *out = 0; - - return outword; -} - -//----------------------------------------------------------------------------- -// Purpose: Given a wavfile and a list of inwords, determines the word/phonene -// sample counts for the sentce -// Output : SR_RESULT -//----------------------------------------------------------------------------- -SR_RESULT CPhonemeExtractorLipSinc::Extract( - const char *wavfile, - int numsamples, - void (*pfnPrint)( const char *fmt, ... ), - CSentence& inwords, - CSentence& outwords ) -{ - // g_enableTalkBackDebuggingOutput = 1; - - m_pfnPrint = pfnPrint; - - if ( !InitLipSinc() ) - { - return SR_RESULT_ERROR; - } - - m_flSampleCount = numsamples; - - if ( !CheckSoundFile( wavfile ) ) - { - FreeLibrary( m_hHelper ); - return SR_RESULT_ERROR; - } - - TALKBACK_ANALYSIS *analysis = NULL; - - if ( !AttemptAnalysis( &analysis, wavfile, inwords ) ) - { - FreeLibrary( m_hHelper ); - return SR_RESULT_FAILED; - } - - if ( strlen( inwords.GetText() ) <= 0 ) - { - inwords.SetTextFromWords(); - } - - outwords = inwords; - - // Examine data - ProcessWords( analysis, inwords, outwords ); - - if ( analysis ) - { - talkback->TalkBackFreeAnalysis( &analysis ); - } - - return SR_RESULT_SUCCESS; -} - +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +// $NoKeywords: $ +//=============================================================================// +#include +#include +#include +#include +#include +#include +#include +#include + +#include "phonemeextractor/PhonemeExtractor.h" +#include "ims_helper/ims_helper.h" + +#include "tier0/dbg.h" +#include "sentence.h" +#include "PhonemeConverter.h" +#include "tier1/strtools.h" + +#define TEXTLESS_WORDNAME "[Textless]" + +static IImsHelper *talkback = NULL; + +//----------------------------------------------------------------------------- +// Purpose: Expose the interface +//----------------------------------------------------------------------------- +class CPhonemeExtractorLipSinc : public IPhonemeExtractor +{ +public: + virtual PE_APITYPE GetAPIType() const + { + return SPEECH_API_LIPSINC; + } + + // Used for menus, etc + virtual char const *GetName() const + { + return "IMS (LipSinc)"; + } + + SR_RESULT Extract( + const char *wavfile, + int numsamples, + void (*pfnPrint)( const char *fmt, ... ), + CSentence& inwords, + CSentence& outwords ); + + + CPhonemeExtractorLipSinc( void ); + ~CPhonemeExtractorLipSinc( void ); + + enum + { + MAX_WORD_LENGTH = 128, + }; +private: + + + class CAnalyzedWord + { + public: + char buffer[ MAX_WORD_LENGTH ]; + double starttime; + double endtime; + }; + + class CAnalyzedPhoneme + { + public: + char phoneme[ 32 ]; + double starttime; + double endtime; + }; + + bool InitLipSinc( void ); + void ShutdownLipSinc( void ); + + void DescribeError( TALKBACK_ERR err ); + void Printf( char const *fmt, ... ); + + bool CheckSoundFile( char const *filename ); + bool GetInitialized( void ); + void SetInitialized( bool init ); + + void (*m_pfnPrint)( const char *fmt, ... ); + + char const *ConstructInputSentence( CSentence& inwords ); + bool AttemptAnalysis( TALKBACK_ANALYSIS **ppAnalysis, char const *wavfile, CSentence& inwords ); + + char const *ApplyTBWordRules( char const *word ); + + void ProcessWords( TALKBACK_ANALYSIS *analysis, CSentence& inwords, CSentence& outwords ); + void ProcessWordsTextless( TALKBACK_ANALYSIS *analysis, CSentence& outwords ); + + int GetPhonemeIndexAtWord( TALKBACK_ANALYSIS *analysis, double time, bool checkstart ); + + int GetPhonemeIndexAtWordStart( TALKBACK_ANALYSIS *analysis, double starttime ); + int GetPhonemeIndexAtWordEnd( TALKBACK_ANALYSIS *analysis, double endtime ); + + CAnalyzedWord *GetAnalyzedWord( TALKBACK_ANALYSIS *analysis, int index ); + CAnalyzedPhoneme *GetAnalyzedPhoneme( TALKBACK_ANALYSIS *analysis, int index ); + + int ComputeByteFromTime( float time ); + + bool m_bInitialized; + + float m_flSampleCount; + float m_flDuration; + + float m_flSamplesPerSecond; + + int m_nBytesPerSample; + + HMODULE m_hHelper; +}; + +CPhonemeExtractorLipSinc::CPhonemeExtractorLipSinc( void ) +{ + m_hHelper = (HMODULE)0; + m_pfnPrint = NULL; + + m_bInitialized = false; + + m_flSampleCount = 0.0f; + m_flDuration = 0.0f; + + m_flSamplesPerSecond = 0.0f; + + m_nBytesPerSample = 0; +} + +CPhonemeExtractorLipSinc::~CPhonemeExtractorLipSinc( void ) +{ + if ( GetInitialized() ) + { + ShutdownLipSinc(); + } +} + +bool CPhonemeExtractorLipSinc::GetInitialized( void ) +{ + return m_bInitialized; +} + +void CPhonemeExtractorLipSinc::SetInitialized( bool init ) +{ + m_bInitialized = init; +} + +int CPhonemeExtractorLipSinc::ComputeByteFromTime( float time ) +{ + if ( !m_flDuration ) + return 0; + + float frac = time / m_flDuration; + + float sampleNumber = frac * m_flSampleCount; + + int bytenumber = sampleNumber * m_nBytesPerSample; + + return bytenumber; +} + +void CPhonemeExtractorLipSinc::DescribeError( TALKBACK_ERR err ) +{ + Assert( m_pfnPrint ); + + // Get the error description. + char errorDesc[256] = ""; + if ( err != TALKBACK_NOERR ) + { + talkback->TalkBackGetErrorString( err, sizeof(errorDesc), errorDesc ); + } + + // Report or log the error... + (*m_pfnPrint)( "LIPSINC ERROR: %s\n", errorDesc ); +} + +//----------------------------------------------------------------------------- +// Purpose: +// Input : *fmt - +// .. - +//----------------------------------------------------------------------------- +void CPhonemeExtractorLipSinc::Printf( char const *fmt, ... ) +{ + Assert( m_pfnPrint ); + + char string[ 4096 ]; + + va_list argptr; + va_start( argptr, fmt ); + vsprintf( string, fmt, argptr ); + va_end( argptr ); + + (*m_pfnPrint)( "%s", string ); +} + +bool CPhonemeExtractorLipSinc::CheckSoundFile( char const *filename ) +{ + TALKBACK_SOUND_FILE_METRICS fm; + memset( &fm, 0, sizeof( fm ) ); + fm.m_size = sizeof( fm ); + + TALKBACK_ERR err = talkback->TalkBackGetSoundFileMetrics( filename, &fm ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return false; + } + + if ( fm.m_canBeAnalyzed ) + { + Printf( "%s: %.2f s, rate %i, bits %i, channels %i\n", + filename, + fm.m_duration, + fm.m_sampleRate, + fm.m_bitsPerSample, + fm.m_channelCount ); + } + + m_flDuration = fm.m_duration; + if ( m_flDuration > 0 ) + { + m_flSamplesPerSecond = m_flSampleCount / m_flDuration; + } + else + { + m_flSamplesPerSecond = 0.0f; + } + + m_nBytesPerSample = ( fm.m_bitsPerSample >> 3 ); + + m_flSampleCount /= m_nBytesPerSample; + + m_nBytesPerSample /= fm.m_channelCount; + + return fm.m_canBeAnalyzed ? true : false; +} + +typedef IImsHelper *(*pfnImsHelper)(void); + +//----------------------------------------------------------------------------- +// Purpose: +// Output : Returns true on success, false on failure. +//----------------------------------------------------------------------------- +bool CPhonemeExtractorLipSinc::InitLipSinc( void ) +{ + if ( GetInitialized() ) + { + return true; + } + + m_hHelper = LoadLibrary( "ims_helper.dll" ); + if ( !m_hHelper ) + { + return false; + } + + pfnImsHelper factory = (pfnImsHelper)::GetProcAddress( m_hHelper, "GetImsHelper" ); + if ( !factory ) + { + FreeLibrary( m_hHelper ); + return false; + } + + talkback = reinterpret_cast< IImsHelper * >( (*factory)() ); + if ( !talkback ) + { + FreeLibrary( m_hHelper ); + return false; + } + + char szExeName[ MAX_PATH ]; + szExeName[0] = 0; + GetModuleFileName( (HMODULE)0, szExeName, sizeof( szExeName ) ); + + char szBaseDir[ MAX_PATH ]; + Q_strncpy( szBaseDir, szExeName, sizeof( szBaseDir ) ); + + Q_StripLastDir( szBaseDir, sizeof( szBaseDir ) ); + Q_StripTrailingSlash( szBaseDir ); + Q_strlower( szBaseDir ); + + char coreDataDir[ 512 ]; + Q_snprintf( coreDataDir, sizeof( coreDataDir ), "%s\\lipsinc_data\\", + szBaseDir ); + Q_FixSlashes( coreDataDir ); + + char szCheck[ 512 ]; + Q_snprintf( szCheck, sizeof( szCheck ), "%sDtC6dal.dat", coreDataDir ); + struct __stat64 buf; + + if ( _stat64( szCheck, &buf ) != 0 ) + { + Q_snprintf( coreDataDir, sizeof( coreDataDir ), "%s\\bin\\lipsinc_data\\", + szBaseDir ); + Q_FixSlashes( coreDataDir ); + Q_snprintf( szCheck, sizeof( szCheck ), "%sDtC6dal.dat", coreDataDir ); + + if ( _stat64( szCheck, &buf ) != 0 ) + { + Error( "Unable to find talkback data files in %s.", coreDataDir ); + } + } + + TALKBACK_ERR err; + + err = talkback->TalkBackStartupLibrary( coreDataDir ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + FreeLibrary( m_hHelper ); + return false; + } + + long verMajor = 0; + long verMinor = 0; + long verRevision = 0; + + err = talkback->TalkBackGetVersion( + &verMajor, + &verMinor, + &verRevision); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + FreeLibrary( m_hHelper ); + return false; + } + + Printf( "Lipsinc TalkBack Version %i.%i.%i\n", verMajor, verMinor, verRevision ); + + m_bInitialized = true; + + return true; +} + +//----------------------------------------------------------------------------- +// Purpose: +//----------------------------------------------------------------------------- +void CPhonemeExtractorLipSinc::ShutdownLipSinc( void ) +{ + // HACK HACK: This seems to crash on exit sometimes + __try + { + talkback->TalkBackShutdownLibrary(); + + FreeLibrary( m_hHelper ); + } + __except(EXCEPTION_EXECUTE_HANDLER ) + { + OutputDebugString( "----> Crash shutting down TALKBACK sdk, exception caught and ignored\n" ); + } +} + +//----------------------------------------------------------------------------- +// Purpose: +// Input : inwords - +// Output : char const +//----------------------------------------------------------------------------- +char const *CPhonemeExtractorLipSinc::ConstructInputSentence( CSentence& inwords ) +{ + static char sentence[ 16384 ]; + + sentence[ 0 ] = 0; + + int last = inwords.m_Words.Size() - 1; + + for ( int i = 0 ; i <= last; i++ ) + { + CWordTag *w = inwords.m_Words[ i ]; + + strcat( sentence, w->GetWord() ); + if ( i != last ) + { + strcat( sentence, " " ); + } + } + + if ( inwords.m_Words.Count() == 1 && + !Q_strnicmp( inwords.GetText(), TEXTLESS_WORDNAME, Q_strlen( TEXTLESS_WORDNAME ) ) ) + { + sentence[ 0 ] = 0; + } + + return sentence; +} + +bool CPhonemeExtractorLipSinc::AttemptAnalysis( TALKBACK_ANALYSIS **ppAnalysis, char const *wavfile, CSentence& inwords ) +{ + *ppAnalysis = NULL; + + TALKBACK_ANALYSIS_SETTINGS settings; + memset( &settings, 0, sizeof( settings ) ); + + // Set this field to sizeof(TALKBACK_ANALYSIS_SETTINGS) before using the + // structure. + settings.fSize = sizeof( TALKBACK_ANALYSIS_SETTINGS ); + + + // Default value: 30 (frames per second). + settings.fFrameRate = 100; + // Set this to 1 to optimize for flipbook output, 0 to do analysis normally. + // + // Default value: 0 (normal analysis). + settings.fOptimizeForFlipbook = 0; + // Set this to -1 to seed the random number generator with the current time. + // Any other number will be used directly for the random number seed, which + // is useful if you want repeatable speech gestures. This value does not + // influence lip-synching at all. + // + // Default value: -1 (use current time). + settings.fRandomSeed = -1; + // Path to the configuration (.INI) file with phoneme-to-speech-target + // mapping. Set this to NULL to use the default mapping. + // + // Default value: NULL (use default mapping). + settings.fConfigFile = NULL; + + char const *text = ConstructInputSentence( inwords ); + + Printf( "Analyzing: \"%s\"\n", text[ 0 ] ? text : TEXTLESS_WORDNAME ); + + TALKBACK_ERR err = talkback->TalkBackGetAnalysis( + ppAnalysis, + wavfile, + text, + &settings ); + + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return false; + } + + Printf( "Analysis successful...\n" ); + + return true; +} + +typedef struct +{ + TALKBACK_PHONEME phoneme; + char const *string; +} TBPHONEMES_t; + +static TBPHONEMES_t g_TBPhonemeList[]= +{ + { TALKBACK_PHONEME_IY, "iy" }, + { TALKBACK_PHONEME_IH, "ih" }, + { TALKBACK_PHONEME_EH, "eh" }, + { TALKBACK_PHONEME_EY, "ey" }, + { TALKBACK_PHONEME_AE, "ae" }, + { TALKBACK_PHONEME_AA, "aa" }, + { TALKBACK_PHONEME_AW, "aw" }, + { TALKBACK_PHONEME_AY, "ay" }, + { TALKBACK_PHONEME_AH, "ah" }, + { TALKBACK_PHONEME_AO, "ao" }, + { TALKBACK_PHONEME_OY, "oy" }, + { TALKBACK_PHONEME_OW, "ow" }, + { TALKBACK_PHONEME_UH, "uh" }, + { TALKBACK_PHONEME_UW, "uw" }, + { TALKBACK_PHONEME_ER, "er" }, + { TALKBACK_PHONEME_AX, "ax" }, + { TALKBACK_PHONEME_S, "s" }, + { TALKBACK_PHONEME_SH, "sh" }, + { TALKBACK_PHONEME_Z, "z" }, + { TALKBACK_PHONEME_ZH, "zh" }, + { TALKBACK_PHONEME_F, "f" }, + { TALKBACK_PHONEME_TH, "th" }, + { TALKBACK_PHONEME_V, "v" }, + { TALKBACK_PHONEME_DH, "dh" }, + { TALKBACK_PHONEME_M, "m" }, + { TALKBACK_PHONEME_N, "n" }, + { TALKBACK_PHONEME_NG, "ng" }, + { TALKBACK_PHONEME_L, "l" }, + { TALKBACK_PHONEME_R, "r" }, + { TALKBACK_PHONEME_W, "w" }, + { TALKBACK_PHONEME_Y, "y" }, + { TALKBACK_PHONEME_HH, "hh" }, + { TALKBACK_PHONEME_B, "b" }, + { TALKBACK_PHONEME_D, "d" }, + { TALKBACK_PHONEME_JH, "jh" }, + { TALKBACK_PHONEME_G, "g" }, + { TALKBACK_PHONEME_P, "p" }, + { TALKBACK_PHONEME_T, "t" }, + { TALKBACK_PHONEME_K, "k" }, + { TALKBACK_PHONEME_CH, "ch" }, + { TALKBACK_PHONEME_SIL, "" }, + { -1, NULL } +}; + +char const *TBPhonemeToString( TALKBACK_PHONEME phoneme ) +{ + if ( phoneme < TALKBACK_PHONEME_FIRST || phoneme > TALKBACK_PHONEME_LAST ) + { + return "Bogus"; + } + + TBPHONEMES_t *item = &g_TBPhonemeList[ phoneme ]; + return item->string; +} + +//----------------------------------------------------------------------------- +// Purpose: +// Input : *analysis - +// time - +// start - +// Output : int +//----------------------------------------------------------------------------- +int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWord( TALKBACK_ANALYSIS *analysis, double time, bool start ) +{ + long count; + + TALKBACK_ERR err = talkback->TalkBackGetNumPhonemes( analysis, &count ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return -1; + } + + if ( count <= 0L ) + return -1; + + // Bogus + if ( count >= 100000L ) + return -1; + + for ( int i = 0; i < (int)count; i++ ) + { + TALKBACK_PHONEME tbPhoneme = TALKBACK_PHONEME_INVALID; + err = talkback->TalkBackGetPhonemeEnum( analysis, i, &tbPhoneme ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + continue; + } + + double t; + + if ( start ) + { + err = talkback->TalkBackGetPhonemeStartTime( analysis, i, &t ); + } + else + { + err = talkback->TalkBackGetPhonemeEndTime( analysis, i, &t ); + } + + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + continue; + } + + if ( t == time ) + { + return i; + } + } + + return -1; +} + +//----------------------------------------------------------------------------- +// Purpose: +// Input : *analysis - +// starttime - +// Output : int +//----------------------------------------------------------------------------- +int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWordStart( TALKBACK_ANALYSIS *analysis, double starttime ) +{ + return GetPhonemeIndexAtWord( analysis, starttime, true ); +} + +//----------------------------------------------------------------------------- +// Purpose: +// Input : *analysis - +// endtime - +// Output : int +//----------------------------------------------------------------------------- +int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWordEnd( TALKBACK_ANALYSIS *analysis, double endtime ) +{ + return GetPhonemeIndexAtWord( analysis, endtime, false ); +} + +CPhonemeExtractorLipSinc::CAnalyzedPhoneme *CPhonemeExtractorLipSinc::GetAnalyzedPhoneme( TALKBACK_ANALYSIS *analysis, int index ) +{ + static CAnalyzedPhoneme p; + + memset( &p, 0, sizeof( p ) ); + + TALKBACK_PHONEME tb; + + TALKBACK_ERR err = talkback->TalkBackGetPhonemeEnum( analysis, index, &tb ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return NULL; + } + + strcpy( p.phoneme, TBPhonemeToString( tb ) ); + + err = talkback->TalkBackGetPhonemeStartTime( analysis, index, &p.starttime ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return NULL; + } + err = talkback->TalkBackGetPhonemeEndTime( analysis, index, &p.endtime ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return NULL; + } + + return &p; +} + +CPhonemeExtractorLipSinc::CAnalyzedWord *CPhonemeExtractorLipSinc::GetAnalyzedWord( TALKBACK_ANALYSIS *analysis, int index ) +{ + static CAnalyzedWord w; + + memset( &w, 0, sizeof( w ) ); + + long chars = sizeof( w.buffer ); + + TALKBACK_ERR err = talkback->TalkBackGetWord( analysis, index, chars, w.buffer ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return NULL; + } + + err = talkback->TalkBackGetWordStartTime( analysis, index, &w.starttime ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return NULL; + } + err = talkback->TalkBackGetWordEndTime( analysis, index, &w.endtime ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return NULL; + } + + return &w; +} + +//----------------------------------------------------------------------------- +// Purpose: +// Input : *w1 - +// *w2 - +// Output : Returns true on success, false on failure. +//----------------------------------------------------------------------------- +bool FuzzyWordMatch( char const *w1, char const *w2 ) +{ + int len1 = strlen( w1 ); + int len2 = strlen( w2 ); + + int minlen = min( len1, len2 ); + + // Found a match + if ( !strnicmp( w1, w2, minlen ) ) + return true; + + int letterdiff = abs( len1 - len2 ); + // More than three letters different, don't bother + if ( letterdiff > 5 ) + return false; + + // Compute a "delta" + char *p1 = (char *)w1; + char *p2 = (char *)w2; + + CUtlVector word1; + CUtlVector word2; + + while ( *p1 ) + { + if ( V_isalpha( *p1 ) ) + { + word1.AddToTail( *p1 ); + } + p1++; + } + + while ( *p2 ) + { + if ( V_isalpha( *p2 ) ) + { + word2.AddToTail( *p2 ); + } + p2++; + } + + int i; + for ( i = 0; i < word1.Size(); i++ ) + { + char c = word1[ i ]; + + // See if c is in word 2, if so subtract it out + int idx = word2.Find( c ); + + if ( idx != word2.InvalidIndex() ) + { + word2.Remove( idx ); + } + } + + if ( word2.Size() <= letterdiff ) + return true; + + word2.RemoveAll(); + + while ( *p2 ) + { + if ( V_isalpha( *p2 ) ) + { + word2.AddToTail( *p2 ); + } + p2++; + } + + for ( i = 0; i < word2.Size(); i++ ) + { + char c = word2[ i ]; + + // See if c is in word 2, if so subtract it out + int idx = word1.Find( c ); + + if ( idx != word1.InvalidIndex() ) + { + word1.Remove( idx ); + } + } + + if ( word1.Size() <= letterdiff ) + return true; + + return false; +} + +//----------------------------------------------------------------------------- +// Purpose: For foreign language stuff, if inwords is empty, process anyway... +// Input : *analysis - +// outwords - +//----------------------------------------------------------------------------- +void CPhonemeExtractorLipSinc::ProcessWordsTextless( TALKBACK_ANALYSIS *analysis, CSentence& outwords ) +{ + long count; + + TALKBACK_ERR err = talkback->TalkBackGetNumPhonemes( analysis, &count ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return; + } + + CWordTag *newWord = new CWordTag; + + newWord->SetWord( TEXTLESS_WORDNAME ); + + float starttime = 0.0f; + float endtime = 1.0f; + + + for ( int i = 0; i < count; ++i ) + { + // Get phoneme and timing info + CAnalyzedPhoneme *ph = GetAnalyzedPhoneme( analysis, i ); + if ( !ph ) + continue; + + CPhonemeTag *ptag = new CPhonemeTag; + + if ( i == 0 || ( ph->starttime < starttime ) ) + { + starttime = ph->starttime; + } + + if ( i == 0 || ( ph->endtime > endtime ) ) + { + endtime = ph->endtime; + } + + ptag->SetStartTime( ph->starttime ); + ptag->SetEndTime( ph->endtime ); + + ptag->m_uiStartByte = ComputeByteFromTime( ph->starttime ); + ptag->m_uiEndByte = ComputeByteFromTime( ph->endtime ); + + ptag->SetTag( ph->phoneme ); + ptag->SetPhonemeCode( TextToPhoneme( ptag->GetTag() ) ); + + newWord->m_Phonemes.AddToTail( ptag ); + } + + newWord->m_flStartTime = starttime; + newWord->m_flEndTime = endtime; + + newWord->m_uiStartByte = ComputeByteFromTime( starttime ); + newWord->m_uiEndByte = ComputeByteFromTime( endtime ); + + outwords.Reset(); + outwords.AddWordTag( newWord ); + outwords.SetTextFromWords(); +} + +//----------------------------------------------------------------------------- +// Purpose: +// Input : *analysis - +// inwords - +// outwords - +//----------------------------------------------------------------------------- +void CPhonemeExtractorLipSinc::ProcessWords( TALKBACK_ANALYSIS *analysis, CSentence& inwords, CSentence& outwords ) +{ + long count; + + TALKBACK_ERR err = talkback->TalkBackGetNumWords( analysis, &count ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return; + } + + if ( count <= 0L ) + { + if ( inwords.m_Words.Count() == 0 || + !Q_strnicmp( inwords.GetText(), TEXTLESS_WORDNAME, Q_strlen( TEXTLESS_WORDNAME ) ) ) + { + ProcessWordsTextless( analysis, outwords ); + } + return; + } + + // Bogus + if ( count >= 100000L ) + return; + + int inwordpos = 0; + int awordpos = 0; + + outwords.Reset(); + + char previous[ 256 ]; + previous[ 0 ] = 0; + + while ( inwordpos < inwords.m_Words.Size() ) + { + CWordTag *in = inwords.m_Words[ inwordpos ]; + + if ( awordpos >= count ) + { + // Just copy the rest over without phonemes + CWordTag *copy = new CWordTag( *in ); + + outwords.AddWordTag( copy ); + + inwordpos++; + continue; + } + + // Should never fail + CAnalyzedWord *w = GetAnalyzedWord( analysis, awordpos ); + if ( !w ) + { + return; + } + + if ( !stricmp( w->buffer, "" ) ) + { + awordpos++; + continue; + } + + char const *check = ApplyTBWordRules( in->GetWord() ); + if ( !FuzzyWordMatch( check, w->buffer ) ) + { + bool advance_input = true; + if ( previous[ 0 ] ) + { + if ( FuzzyWordMatch( previous, w->buffer ) ) + { + advance_input = false; + } + } + + if ( advance_input ) + { + inwordpos++; + } + awordpos++; + continue; + } + strcpy( previous, check ); + + CWordTag *newWord = new CWordTag; + + newWord->SetWord( in->GetWord() ); + + newWord->m_flStartTime = w->starttime; + newWord->m_flEndTime = w->endtime; + + newWord->m_uiStartByte = ComputeByteFromTime( w->starttime ); + newWord->m_uiEndByte = ComputeByteFromTime( w->endtime ); + + int phonemestart, phonemeend; + + phonemestart = GetPhonemeIndexAtWordStart( analysis, w->starttime ); + phonemeend = GetPhonemeIndexAtWordEnd( analysis, w->endtime ); + + if ( phonemestart >= 0 && phonemeend >= 0 ) + { + for ( ; phonemestart <= phonemeend; phonemestart++ ) + { + // Get phoneme and timing info + CAnalyzedPhoneme *ph = GetAnalyzedPhoneme( analysis, phonemestart ); + if ( !ph ) + continue; + + CPhonemeTag *ptag = new CPhonemeTag; + ptag->SetStartTime( ph->starttime ); + ptag->SetEndTime( ph->endtime ); + + ptag->m_uiStartByte = ComputeByteFromTime( ph->starttime ); + ptag->m_uiEndByte = ComputeByteFromTime( ph->endtime ); + + ptag->SetTag( ph->phoneme ); + ptag->SetPhonemeCode( TextToPhoneme( ptag->GetTag() ) ); + + newWord->m_Phonemes.AddToTail( ptag ); + } + } + + outwords.AddWordTag( newWord ); + inwordpos++; + awordpos++; + } +} + +char const *CPhonemeExtractorLipSinc::ApplyTBWordRules( char const *word ) +{ + static char outword[ 256 ]; + + char const *in = word; + char *out = outword; + + while ( *in && ( ( out - outword ) <= 255 ) ) + { + if ( *in == '\t' || + *in == ' ' || + *in == '\n' || + *in == '-' || + *in == '.' || + *in == ',' || + *in == ';' || + *in == '?' || + *in == '"' || + *in == ':' || + *in == '(' || + *in == ')' ) + { + in++; + *out++ = ' '; + continue; + } + + if ( !V_isprint( *in ) ) + { + in++; + continue; + } + + if ( *in >= 128 ) + { + in++; + continue; + } + + // Skip numbers + if ( *in >= '0' && *in <= '9' ) + { + in++; + continue; + } + + // Convert all letters to upper case + if ( *in >= 'a' && *in <= 'z' ) + { + *out++ = ( *in++ ) - 'a' + 'A'; + continue; + } + + if ( *in >= 'A' && *in <= 'Z' ) + { + *out++ = *in++; + continue; + } + + if ( *in == '\'' ) + { + *out++ = *in++; + continue; + } + + in++; + } + + *out = 0; + + return outword; +} + +//----------------------------------------------------------------------------- +// Purpose: Given a wavfile and a list of inwords, determines the word/phonene +// sample counts for the sentce +// Output : SR_RESULT +//----------------------------------------------------------------------------- +SR_RESULT CPhonemeExtractorLipSinc::Extract( + const char *wavfile, + int numsamples, + void (*pfnPrint)( const char *fmt, ... ), + CSentence& inwords, + CSentence& outwords ) +{ + // g_enableTalkBackDebuggingOutput = 1; + + m_pfnPrint = pfnPrint; + + if ( !InitLipSinc() ) + { + return SR_RESULT_ERROR; + } + + m_flSampleCount = numsamples; + + if ( !CheckSoundFile( wavfile ) ) + { + FreeLibrary( m_hHelper ); + return SR_RESULT_ERROR; + } + + TALKBACK_ANALYSIS *analysis = NULL; + + if ( !AttemptAnalysis( &analysis, wavfile, inwords ) ) + { + FreeLibrary( m_hHelper ); + return SR_RESULT_FAILED; + } + + if ( strlen( inwords.GetText() ) <= 0 ) + { + inwords.SetTextFromWords(); + } + + outwords = inwords; + + // Examine data + ProcessWords( analysis, inwords, outwords ); + + if ( analysis ) + { + talkback->TalkBackFreeAnalysis( &analysis ); + } + + return SR_RESULT_SUCCESS; +} + EXPOSE_SINGLE_INTERFACE( CPhonemeExtractorLipSinc, IPhonemeExtractor, VPHONEME_EXTRACTOR_INTERFACE ); \ No newline at end of file -- cgit v1.2.3