From 39ed87570bdb2f86969d4be821c94b722dc71179 Mon Sep 17 00:00:00 2001 From: Joe Ludwig Date: Wed, 26 Jun 2013 15:22:04 -0700 Subject: First version of the SOurce SDK 2013 --- .../phonemeextractor/phonemeextractor_ims.cpp | 1075 ++++++++++++++++++++ 1 file changed, 1075 insertions(+) create mode 100644 mp/src/utils/phonemeextractor/phonemeextractor_ims.cpp (limited to 'mp/src/utils/phonemeextractor/phonemeextractor_ims.cpp') diff --git a/mp/src/utils/phonemeextractor/phonemeextractor_ims.cpp b/mp/src/utils/phonemeextractor/phonemeextractor_ims.cpp new file mode 100644 index 00000000..70819f8e --- /dev/null +++ b/mp/src/utils/phonemeextractor/phonemeextractor_ims.cpp @@ -0,0 +1,1075 @@ +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +// $NoKeywords: $ +//=============================================================================// +#include +#include +#include +#include +#include +#include +#include +#include + +#include "phonemeextractor/PhonemeExtractor.h" +#include "ims_helper/ims_helper.h" + +#include "tier0/dbg.h" +#include "sentence.h" +#include "PhonemeConverter.h" +#include "tier1/strtools.h" + +#define TEXTLESS_WORDNAME "[Textless]" + +static IImsHelper *talkback = NULL; + +//----------------------------------------------------------------------------- +// Purpose: Expose the interface +//----------------------------------------------------------------------------- +class CPhonemeExtractorLipSinc : public IPhonemeExtractor +{ +public: + virtual PE_APITYPE GetAPIType() const + { + return SPEECH_API_LIPSINC; + } + + // Used for menus, etc + virtual char const *GetName() const + { + return "IMS (LipSinc)"; + } + + SR_RESULT Extract( + const char *wavfile, + int numsamples, + void (*pfnPrint)( const char *fmt, ... ), + CSentence& inwords, + CSentence& outwords ); + + + CPhonemeExtractorLipSinc( void ); + ~CPhonemeExtractorLipSinc( void ); + + enum + { + MAX_WORD_LENGTH = 128, + }; +private: + + + class CAnalyzedWord + { + public: + char buffer[ MAX_WORD_LENGTH ]; + double starttime; + double endtime; + }; + + class CAnalyzedPhoneme + { + public: + char phoneme[ 32 ]; + double starttime; + double endtime; + }; + + bool InitLipSinc( void ); + void ShutdownLipSinc( void ); + + void DescribeError( TALKBACK_ERR err ); + void Printf( char const *fmt, ... ); + + bool CheckSoundFile( char const *filename ); + bool GetInitialized( void ); + void SetInitialized( bool init ); + + void (*m_pfnPrint)( const char *fmt, ... ); + + char const *ConstructInputSentence( CSentence& inwords ); + bool AttemptAnalysis( TALKBACK_ANALYSIS **ppAnalysis, char const *wavfile, CSentence& inwords ); + + char const *ApplyTBWordRules( char const *word ); + + void ProcessWords( TALKBACK_ANALYSIS *analysis, CSentence& inwords, CSentence& outwords ); + void ProcessWordsTextless( TALKBACK_ANALYSIS *analysis, CSentence& outwords ); + + int GetPhonemeIndexAtWord( TALKBACK_ANALYSIS *analysis, double time, bool checkstart ); + + int GetPhonemeIndexAtWordStart( TALKBACK_ANALYSIS *analysis, double starttime ); + int GetPhonemeIndexAtWordEnd( TALKBACK_ANALYSIS *analysis, double endtime ); + + CAnalyzedWord *GetAnalyzedWord( TALKBACK_ANALYSIS *analysis, int index ); + CAnalyzedPhoneme *GetAnalyzedPhoneme( TALKBACK_ANALYSIS *analysis, int index ); + + int ComputeByteFromTime( float time ); + + bool m_bInitialized; + + float m_flSampleCount; + float m_flDuration; + + float m_flSamplesPerSecond; + + int m_nBytesPerSample; + + HMODULE m_hHelper; +}; + +CPhonemeExtractorLipSinc::CPhonemeExtractorLipSinc( void ) +{ + m_hHelper = (HMODULE)0; + m_pfnPrint = NULL; + + m_bInitialized = false; + + m_flSampleCount = 0.0f; + m_flDuration = 0.0f; + + m_flSamplesPerSecond = 0.0f; + + m_nBytesPerSample = 0; +} + +CPhonemeExtractorLipSinc::~CPhonemeExtractorLipSinc( void ) +{ + if ( GetInitialized() ) + { + ShutdownLipSinc(); + } +} + +bool CPhonemeExtractorLipSinc::GetInitialized( void ) +{ + return m_bInitialized; +} + +void CPhonemeExtractorLipSinc::SetInitialized( bool init ) +{ + m_bInitialized = init; +} + +int CPhonemeExtractorLipSinc::ComputeByteFromTime( float time ) +{ + if ( !m_flDuration ) + return 0; + + float frac = time / m_flDuration; + + float sampleNumber = frac * m_flSampleCount; + + int bytenumber = sampleNumber * m_nBytesPerSample; + + return bytenumber; +} + +void CPhonemeExtractorLipSinc::DescribeError( TALKBACK_ERR err ) +{ + Assert( m_pfnPrint ); + + // Get the error description. + char errorDesc[256] = ""; + if ( err != TALKBACK_NOERR ) + { + talkback->TalkBackGetErrorString( err, sizeof(errorDesc), errorDesc ); + } + + // Report or log the error... + (*m_pfnPrint)( "LIPSINC ERROR: %s\n", errorDesc ); +} + +//----------------------------------------------------------------------------- +// Purpose: +// Input : *fmt - +// .. - +//----------------------------------------------------------------------------- +void CPhonemeExtractorLipSinc::Printf( char const *fmt, ... ) +{ + Assert( m_pfnPrint ); + + char string[ 4096 ]; + + va_list argptr; + va_start( argptr, fmt ); + vsprintf( string, fmt, argptr ); + va_end( argptr ); + + (*m_pfnPrint)( "%s", string ); +} + +bool CPhonemeExtractorLipSinc::CheckSoundFile( char const *filename ) +{ + TALKBACK_SOUND_FILE_METRICS fm; + memset( &fm, 0, sizeof( fm ) ); + fm.m_size = sizeof( fm ); + + TALKBACK_ERR err = talkback->TalkBackGetSoundFileMetrics( filename, &fm ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return false; + } + + if ( fm.m_canBeAnalyzed ) + { + Printf( "%s: %.2f s, rate %i, bits %i, channels %i\n", + filename, + fm.m_duration, + fm.m_sampleRate, + fm.m_bitsPerSample, + fm.m_channelCount ); + } + + m_flDuration = fm.m_duration; + if ( m_flDuration > 0 ) + { + m_flSamplesPerSecond = m_flSampleCount / m_flDuration; + } + else + { + m_flSamplesPerSecond = 0.0f; + } + + m_nBytesPerSample = ( fm.m_bitsPerSample >> 3 ); + + m_flSampleCount /= m_nBytesPerSample; + + m_nBytesPerSample /= fm.m_channelCount; + + return fm.m_canBeAnalyzed ? true : false; +} + +typedef IImsHelper *(*pfnImsHelper)(void); + +//----------------------------------------------------------------------------- +// Purpose: +// Output : Returns true on success, false on failure. +//----------------------------------------------------------------------------- +bool CPhonemeExtractorLipSinc::InitLipSinc( void ) +{ + if ( GetInitialized() ) + { + return true; + } + + m_hHelper = LoadLibrary( "ims_helper.dll" ); + if ( !m_hHelper ) + { + return false; + } + + pfnImsHelper factory = (pfnImsHelper)::GetProcAddress( m_hHelper, "GetImsHelper" ); + if ( !factory ) + { + FreeLibrary( m_hHelper ); + return false; + } + + talkback = reinterpret_cast< IImsHelper * >( (*factory)() ); + if ( !talkback ) + { + FreeLibrary( m_hHelper ); + return false; + } + + char szExeName[ MAX_PATH ]; + szExeName[0] = 0; + GetModuleFileName( (HMODULE)0, szExeName, sizeof( szExeName ) ); + + char szBaseDir[ MAX_PATH ]; + Q_strncpy( szBaseDir, szExeName, sizeof( szBaseDir ) ); + + Q_StripLastDir( szBaseDir, sizeof( szBaseDir ) ); + Q_StripTrailingSlash( szBaseDir ); + Q_strlower( szBaseDir ); + + char coreDataDir[ 512 ]; + Q_snprintf( coreDataDir, sizeof( coreDataDir ), "%s\\lipsinc_data\\", + szBaseDir ); + Q_FixSlashes( coreDataDir ); + + char szCheck[ 512 ]; + Q_snprintf( szCheck, sizeof( szCheck ), "%sDtC6dal.dat", coreDataDir ); + struct __stat64 buf; + + if ( _stat64( szCheck, &buf ) != 0 ) + { + Q_snprintf( coreDataDir, sizeof( coreDataDir ), "%s\\bin\\lipsinc_data\\", + szBaseDir ); + Q_FixSlashes( coreDataDir ); + Q_snprintf( szCheck, sizeof( szCheck ), "%sDtC6dal.dat", coreDataDir ); + + if ( _stat64( szCheck, &buf ) != 0 ) + { + Error( "Unable to find talkback data files in %s.", coreDataDir ); + } + } + + TALKBACK_ERR err; + + err = talkback->TalkBackStartupLibrary( coreDataDir ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + FreeLibrary( m_hHelper ); + return false; + } + + long verMajor = 0; + long verMinor = 0; + long verRevision = 0; + + err = talkback->TalkBackGetVersion( + &verMajor, + &verMinor, + &verRevision); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + FreeLibrary( m_hHelper ); + return false; + } + + Printf( "Lipsinc TalkBack Version %i.%i.%i\n", verMajor, verMinor, verRevision ); + + m_bInitialized = true; + + return true; +} + +//----------------------------------------------------------------------------- +// Purpose: +//----------------------------------------------------------------------------- +void CPhonemeExtractorLipSinc::ShutdownLipSinc( void ) +{ + // HACK HACK: This seems to crash on exit sometimes + __try + { + talkback->TalkBackShutdownLibrary(); + + FreeLibrary( m_hHelper ); + } + __except(EXCEPTION_EXECUTE_HANDLER ) + { + OutputDebugString( "----> Crash shutting down TALKBACK sdk, exception caught and ignored\n" ); + } +} + +//----------------------------------------------------------------------------- +// Purpose: +// Input : inwords - +// Output : char const +//----------------------------------------------------------------------------- +char const *CPhonemeExtractorLipSinc::ConstructInputSentence( CSentence& inwords ) +{ + static char sentence[ 16384 ]; + + sentence[ 0 ] = 0; + + int last = inwords.m_Words.Size() - 1; + + for ( int i = 0 ; i <= last; i++ ) + { + CWordTag *w = inwords.m_Words[ i ]; + + strcat( sentence, w->GetWord() ); + if ( i != last ) + { + strcat( sentence, " " ); + } + } + + if ( inwords.m_Words.Count() == 1 && + !Q_strnicmp( inwords.GetText(), TEXTLESS_WORDNAME, Q_strlen( TEXTLESS_WORDNAME ) ) ) + { + sentence[ 0 ] = 0; + } + + return sentence; +} + +bool CPhonemeExtractorLipSinc::AttemptAnalysis( TALKBACK_ANALYSIS **ppAnalysis, char const *wavfile, CSentence& inwords ) +{ + *ppAnalysis = NULL; + + TALKBACK_ANALYSIS_SETTINGS settings; + memset( &settings, 0, sizeof( settings ) ); + + // Set this field to sizeof(TALKBACK_ANALYSIS_SETTINGS) before using the + // structure. + settings.fSize = sizeof( TALKBACK_ANALYSIS_SETTINGS ); + + + // Default value: 30 (frames per second). + settings.fFrameRate = 100; + // Set this to 1 to optimize for flipbook output, 0 to do analysis normally. + // + // Default value: 0 (normal analysis). + settings.fOptimizeForFlipbook = 0; + // Set this to -1 to seed the random number generator with the current time. + // Any other number will be used directly for the random number seed, which + // is useful if you want repeatable speech gestures. This value does not + // influence lip-synching at all. + // + // Default value: -1 (use current time). + settings.fRandomSeed = -1; + // Path to the configuration (.INI) file with phoneme-to-speech-target + // mapping. Set this to NULL to use the default mapping. + // + // Default value: NULL (use default mapping). + settings.fConfigFile = NULL; + + char const *text = ConstructInputSentence( inwords ); + + Printf( "Analyzing: \"%s\"\n", text[ 0 ] ? text : TEXTLESS_WORDNAME ); + + TALKBACK_ERR err = talkback->TalkBackGetAnalysis( + ppAnalysis, + wavfile, + text, + &settings ); + + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return false; + } + + Printf( "Analysis successful...\n" ); + + return true; +} + +typedef struct +{ + TALKBACK_PHONEME phoneme; + char const *string; +} TBPHONEMES_t; + +static TBPHONEMES_t g_TBPhonemeList[]= +{ + { TALKBACK_PHONEME_IY, "iy" }, + { TALKBACK_PHONEME_IH, "ih" }, + { TALKBACK_PHONEME_EH, "eh" }, + { TALKBACK_PHONEME_EY, "ey" }, + { TALKBACK_PHONEME_AE, "ae" }, + { TALKBACK_PHONEME_AA, "aa" }, + { TALKBACK_PHONEME_AW, "aw" }, + { TALKBACK_PHONEME_AY, "ay" }, + { TALKBACK_PHONEME_AH, "ah" }, + { TALKBACK_PHONEME_AO, "ao" }, + { TALKBACK_PHONEME_OY, "oy" }, + { TALKBACK_PHONEME_OW, "ow" }, + { TALKBACK_PHONEME_UH, "uh" }, + { TALKBACK_PHONEME_UW, "uw" }, + { TALKBACK_PHONEME_ER, "er" }, + { TALKBACK_PHONEME_AX, "ax" }, + { TALKBACK_PHONEME_S, "s" }, + { TALKBACK_PHONEME_SH, "sh" }, + { TALKBACK_PHONEME_Z, "z" }, + { TALKBACK_PHONEME_ZH, "zh" }, + { TALKBACK_PHONEME_F, "f" }, + { TALKBACK_PHONEME_TH, "th" }, + { TALKBACK_PHONEME_V, "v" }, + { TALKBACK_PHONEME_DH, "dh" }, + { TALKBACK_PHONEME_M, "m" }, + { TALKBACK_PHONEME_N, "n" }, + { TALKBACK_PHONEME_NG, "ng" }, + { TALKBACK_PHONEME_L, "l" }, + { TALKBACK_PHONEME_R, "r" }, + { TALKBACK_PHONEME_W, "w" }, + { TALKBACK_PHONEME_Y, "y" }, + { TALKBACK_PHONEME_HH, "hh" }, + { TALKBACK_PHONEME_B, "b" }, + { TALKBACK_PHONEME_D, "d" }, + { TALKBACK_PHONEME_JH, "jh" }, + { TALKBACK_PHONEME_G, "g" }, + { TALKBACK_PHONEME_P, "p" }, + { TALKBACK_PHONEME_T, "t" }, + { TALKBACK_PHONEME_K, "k" }, + { TALKBACK_PHONEME_CH, "ch" }, + { TALKBACK_PHONEME_SIL, "" }, + { -1, NULL } +}; + +char const *TBPhonemeToString( TALKBACK_PHONEME phoneme ) +{ + if ( phoneme < TALKBACK_PHONEME_FIRST || phoneme > TALKBACK_PHONEME_LAST ) + { + return "Bogus"; + } + + TBPHONEMES_t *item = &g_TBPhonemeList[ phoneme ]; + return item->string; +} + +//----------------------------------------------------------------------------- +// Purpose: +// Input : *analysis - +// time - +// start - +// Output : int +//----------------------------------------------------------------------------- +int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWord( TALKBACK_ANALYSIS *analysis, double time, bool start ) +{ + long count; + + TALKBACK_ERR err = talkback->TalkBackGetNumPhonemes( analysis, &count ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return -1; + } + + if ( count <= 0L ) + return -1; + + // Bogus + if ( count >= 100000L ) + return -1; + + for ( int i = 0; i < (int)count; i++ ) + { + TALKBACK_PHONEME tbPhoneme = TALKBACK_PHONEME_INVALID; + err = talkback->TalkBackGetPhonemeEnum( analysis, i, &tbPhoneme ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + continue; + } + + double t; + + if ( start ) + { + err = talkback->TalkBackGetPhonemeStartTime( analysis, i, &t ); + } + else + { + err = talkback->TalkBackGetPhonemeEndTime( analysis, i, &t ); + } + + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + continue; + } + + if ( t == time ) + { + return i; + } + } + + return -1; +} + +//----------------------------------------------------------------------------- +// Purpose: +// Input : *analysis - +// starttime - +// Output : int +//----------------------------------------------------------------------------- +int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWordStart( TALKBACK_ANALYSIS *analysis, double starttime ) +{ + return GetPhonemeIndexAtWord( analysis, starttime, true ); +} + +//----------------------------------------------------------------------------- +// Purpose: +// Input : *analysis - +// endtime - +// Output : int +//----------------------------------------------------------------------------- +int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWordEnd( TALKBACK_ANALYSIS *analysis, double endtime ) +{ + return GetPhonemeIndexAtWord( analysis, endtime, false ); +} + +CPhonemeExtractorLipSinc::CAnalyzedPhoneme *CPhonemeExtractorLipSinc::GetAnalyzedPhoneme( TALKBACK_ANALYSIS *analysis, int index ) +{ + static CAnalyzedPhoneme p; + + memset( &p, 0, sizeof( p ) ); + + TALKBACK_PHONEME tb; + + TALKBACK_ERR err = talkback->TalkBackGetPhonemeEnum( analysis, index, &tb ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return NULL; + } + + strcpy( p.phoneme, TBPhonemeToString( tb ) ); + + err = talkback->TalkBackGetPhonemeStartTime( analysis, index, &p.starttime ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return NULL; + } + err = talkback->TalkBackGetPhonemeEndTime( analysis, index, &p.endtime ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return NULL; + } + + return &p; +} + +CPhonemeExtractorLipSinc::CAnalyzedWord *CPhonemeExtractorLipSinc::GetAnalyzedWord( TALKBACK_ANALYSIS *analysis, int index ) +{ + static CAnalyzedWord w; + + memset( &w, 0, sizeof( w ) ); + + long chars = sizeof( w.buffer ); + + TALKBACK_ERR err = talkback->TalkBackGetWord( analysis, index, chars, w.buffer ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return NULL; + } + + err = talkback->TalkBackGetWordStartTime( analysis, index, &w.starttime ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return NULL; + } + err = talkback->TalkBackGetWordEndTime( analysis, index, &w.endtime ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return NULL; + } + + return &w; +} + +//----------------------------------------------------------------------------- +// Purpose: +// Input : *w1 - +// *w2 - +// Output : Returns true on success, false on failure. +//----------------------------------------------------------------------------- +bool FuzzyWordMatch( char const *w1, char const *w2 ) +{ + int len1 = strlen( w1 ); + int len2 = strlen( w2 ); + + int minlen = min( len1, len2 ); + + // Found a match + if ( !strnicmp( w1, w2, minlen ) ) + return true; + + int letterdiff = abs( len1 - len2 ); + // More than three letters different, don't bother + if ( letterdiff > 5 ) + return false; + + // Compute a "delta" + char *p1 = (char *)w1; + char *p2 = (char *)w2; + + CUtlVector word1; + CUtlVector word2; + + while ( *p1 ) + { + if ( V_isalpha( *p1 ) ) + { + word1.AddToTail( *p1 ); + } + p1++; + } + + while ( *p2 ) + { + if ( V_isalpha( *p2 ) ) + { + word2.AddToTail( *p2 ); + } + p2++; + } + + int i; + for ( i = 0; i < word1.Size(); i++ ) + { + char c = word1[ i ]; + + // See if c is in word 2, if so subtract it out + int idx = word2.Find( c ); + + if ( idx != word2.InvalidIndex() ) + { + word2.Remove( idx ); + } + } + + if ( word2.Size() <= letterdiff ) + return true; + + word2.RemoveAll(); + + while ( *p2 ) + { + if ( V_isalpha( *p2 ) ) + { + word2.AddToTail( *p2 ); + } + p2++; + } + + for ( i = 0; i < word2.Size(); i++ ) + { + char c = word2[ i ]; + + // See if c is in word 2, if so subtract it out + int idx = word1.Find( c ); + + if ( idx != word1.InvalidIndex() ) + { + word1.Remove( idx ); + } + } + + if ( word1.Size() <= letterdiff ) + return true; + + return false; +} + +//----------------------------------------------------------------------------- +// Purpose: For foreign language stuff, if inwords is empty, process anyway... +// Input : *analysis - +// outwords - +//----------------------------------------------------------------------------- +void CPhonemeExtractorLipSinc::ProcessWordsTextless( TALKBACK_ANALYSIS *analysis, CSentence& outwords ) +{ + long count; + + TALKBACK_ERR err = talkback->TalkBackGetNumPhonemes( analysis, &count ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return; + } + + CWordTag *newWord = new CWordTag; + + newWord->SetWord( TEXTLESS_WORDNAME ); + + float starttime = 0.0f; + float endtime = 1.0f; + + + for ( int i = 0; i < count; ++i ) + { + // Get phoneme and timing info + CAnalyzedPhoneme *ph = GetAnalyzedPhoneme( analysis, i ); + if ( !ph ) + continue; + + CPhonemeTag *ptag = new CPhonemeTag; + + if ( i == 0 || ( ph->starttime < starttime ) ) + { + starttime = ph->starttime; + } + + if ( i == 0 || ( ph->endtime > endtime ) ) + { + endtime = ph->endtime; + } + + ptag->SetStartTime( ph->starttime ); + ptag->SetEndTime( ph->endtime ); + + ptag->m_uiStartByte = ComputeByteFromTime( ph->starttime ); + ptag->m_uiEndByte = ComputeByteFromTime( ph->endtime ); + + ptag->SetTag( ph->phoneme ); + ptag->SetPhonemeCode( TextToPhoneme( ptag->GetTag() ) ); + + newWord->m_Phonemes.AddToTail( ptag ); + } + + newWord->m_flStartTime = starttime; + newWord->m_flEndTime = endtime; + + newWord->m_uiStartByte = ComputeByteFromTime( starttime ); + newWord->m_uiEndByte = ComputeByteFromTime( endtime ); + + outwords.Reset(); + outwords.AddWordTag( newWord ); + outwords.SetTextFromWords(); +} + +//----------------------------------------------------------------------------- +// Purpose: +// Input : *analysis - +// inwords - +// outwords - +//----------------------------------------------------------------------------- +void CPhonemeExtractorLipSinc::ProcessWords( TALKBACK_ANALYSIS *analysis, CSentence& inwords, CSentence& outwords ) +{ + long count; + + TALKBACK_ERR err = talkback->TalkBackGetNumWords( analysis, &count ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return; + } + + if ( count <= 0L ) + { + if ( inwords.m_Words.Count() == 0 || + !Q_strnicmp( inwords.GetText(), TEXTLESS_WORDNAME, Q_strlen( TEXTLESS_WORDNAME ) ) ) + { + ProcessWordsTextless( analysis, outwords ); + } + return; + } + + // Bogus + if ( count >= 100000L ) + return; + + int inwordpos = 0; + int awordpos = 0; + + outwords.Reset(); + + char previous[ 256 ]; + previous[ 0 ] = 0; + + while ( inwordpos < inwords.m_Words.Size() ) + { + CWordTag *in = inwords.m_Words[ inwordpos ]; + + if ( awordpos >= count ) + { + // Just copy the rest over without phonemes + CWordTag *copy = new CWordTag( *in ); + + outwords.AddWordTag( copy ); + + inwordpos++; + continue; + } + + // Should never fail + CAnalyzedWord *w = GetAnalyzedWord( analysis, awordpos ); + if ( !w ) + { + return; + } + + if ( !stricmp( w->buffer, "" ) ) + { + awordpos++; + continue; + } + + char const *check = ApplyTBWordRules( in->GetWord() ); + if ( !FuzzyWordMatch( check, w->buffer ) ) + { + bool advance_input = true; + if ( previous[ 0 ] ) + { + if ( FuzzyWordMatch( previous, w->buffer ) ) + { + advance_input = false; + } + } + + if ( advance_input ) + { + inwordpos++; + } + awordpos++; + continue; + } + strcpy( previous, check ); + + CWordTag *newWord = new CWordTag; + + newWord->SetWord( in->GetWord() ); + + newWord->m_flStartTime = w->starttime; + newWord->m_flEndTime = w->endtime; + + newWord->m_uiStartByte = ComputeByteFromTime( w->starttime ); + newWord->m_uiEndByte = ComputeByteFromTime( w->endtime ); + + int phonemestart, phonemeend; + + phonemestart = GetPhonemeIndexAtWordStart( analysis, w->starttime ); + phonemeend = GetPhonemeIndexAtWordEnd( analysis, w->endtime ); + + if ( phonemestart >= 0 && phonemeend >= 0 ) + { + for ( ; phonemestart <= phonemeend; phonemestart++ ) + { + // Get phoneme and timing info + CAnalyzedPhoneme *ph = GetAnalyzedPhoneme( analysis, phonemestart ); + if ( !ph ) + continue; + + CPhonemeTag *ptag = new CPhonemeTag; + ptag->SetStartTime( ph->starttime ); + ptag->SetEndTime( ph->endtime ); + + ptag->m_uiStartByte = ComputeByteFromTime( ph->starttime ); + ptag->m_uiEndByte = ComputeByteFromTime( ph->endtime ); + + ptag->SetTag( ph->phoneme ); + ptag->SetPhonemeCode( TextToPhoneme( ptag->GetTag() ) ); + + newWord->m_Phonemes.AddToTail( ptag ); + } + } + + outwords.AddWordTag( newWord ); + inwordpos++; + awordpos++; + } +} + +char const *CPhonemeExtractorLipSinc::ApplyTBWordRules( char const *word ) +{ + static char outword[ 256 ]; + + char const *in = word; + char *out = outword; + + while ( *in && ( ( out - outword ) <= 255 ) ) + { + if ( *in == '\t' || + *in == ' ' || + *in == '\n' || + *in == '-' || + *in == '.' || + *in == ',' || + *in == ';' || + *in == '?' || + *in == '"' || + *in == ':' || + *in == '(' || + *in == ')' ) + { + in++; + *out++ = ' '; + continue; + } + + if ( !V_isprint( *in ) ) + { + in++; + continue; + } + + if ( *in >= 128 ) + { + in++; + continue; + } + + // Skip numbers + if ( *in >= '0' && *in <= '9' ) + { + in++; + continue; + } + + // Convert all letters to upper case + if ( *in >= 'a' && *in <= 'z' ) + { + *out++ = ( *in++ ) - 'a' + 'A'; + continue; + } + + if ( *in >= 'A' && *in <= 'Z' ) + { + *out++ = *in++; + continue; + } + + if ( *in == '\'' ) + { + *out++ = *in++; + continue; + } + + in++; + } + + *out = 0; + + return outword; +} + +//----------------------------------------------------------------------------- +// Purpose: Given a wavfile and a list of inwords, determines the word/phonene +// sample counts for the sentce +// Output : SR_RESULT +//----------------------------------------------------------------------------- +SR_RESULT CPhonemeExtractorLipSinc::Extract( + const char *wavfile, + int numsamples, + void (*pfnPrint)( const char *fmt, ... ), + CSentence& inwords, + CSentence& outwords ) +{ + // g_enableTalkBackDebuggingOutput = 1; + + m_pfnPrint = pfnPrint; + + if ( !InitLipSinc() ) + { + return SR_RESULT_ERROR; + } + + m_flSampleCount = numsamples; + + if ( !CheckSoundFile( wavfile ) ) + { + FreeLibrary( m_hHelper ); + return SR_RESULT_ERROR; + } + + TALKBACK_ANALYSIS *analysis = NULL; + + if ( !AttemptAnalysis( &analysis, wavfile, inwords ) ) + { + FreeLibrary( m_hHelper ); + return SR_RESULT_FAILED; + } + + if ( strlen( inwords.GetText() ) <= 0 ) + { + inwords.SetTextFromWords(); + } + + outwords = inwords; + + // Examine data + ProcessWords( analysis, inwords, outwords ); + + if ( analysis ) + { + talkback->TalkBackFreeAnalysis( &analysis ); + } + + return SR_RESULT_SUCCESS; +} + +EXPOSE_SINGLE_INTERFACE( CPhonemeExtractorLipSinc, IPhonemeExtractor, VPHONEME_EXTRACTOR_INTERFACE ); \ No newline at end of file -- cgit v1.2.3