aboutsummaryrefslogtreecommitdiff
path: root/mp/src/utils/phonemeextractor/phonemeextractor_ims.cpp
diff options
context:
space:
mode:
authorJoe Ludwig <[email protected]>2013-06-26 15:22:04 -0700
committerJoe Ludwig <[email protected]>2013-06-26 15:22:04 -0700
commit39ed87570bdb2f86969d4be821c94b722dc71179 (patch)
treeabc53757f75f40c80278e87650ea92808274aa59 /mp/src/utils/phonemeextractor/phonemeextractor_ims.cpp
downloadsource-sdk-2013-39ed87570bdb2f86969d4be821c94b722dc71179.tar.xz
source-sdk-2013-39ed87570bdb2f86969d4be821c94b722dc71179.zip
First version of the SOurce SDK 2013
Diffstat (limited to 'mp/src/utils/phonemeextractor/phonemeextractor_ims.cpp')
-rw-r--r--mp/src/utils/phonemeextractor/phonemeextractor_ims.cpp1075
1 files changed, 1075 insertions, 0 deletions
diff --git a/mp/src/utils/phonemeextractor/phonemeextractor_ims.cpp b/mp/src/utils/phonemeextractor/phonemeextractor_ims.cpp
new file mode 100644
index 00000000..70819f8e
--- /dev/null
+++ b/mp/src/utils/phonemeextractor/phonemeextractor_ims.cpp
@@ -0,0 +1,1075 @@
+//========= Copyright Valve Corporation, All rights reserved. ============//
+//
+// Purpose:
+//
+// $NoKeywords: $
+//=============================================================================//
+#include <stdio.h>
+#include <stdarg.h>
+#include <memory.h>
+#include <windows.h>
+#include <mmsystem.h>
+#include <mmreg.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "phonemeextractor/PhonemeExtractor.h"
+#include "ims_helper/ims_helper.h"
+
+#include "tier0/dbg.h"
+#include "sentence.h"
+#include "PhonemeConverter.h"
+#include "tier1/strtools.h"
+
+#define TEXTLESS_WORDNAME "[Textless]"
+
+static IImsHelper *talkback = NULL;
+
+//-----------------------------------------------------------------------------
+// Purpose: Expose the interface
+//-----------------------------------------------------------------------------
+class CPhonemeExtractorLipSinc : public IPhonemeExtractor
+{
+public:
+ virtual PE_APITYPE GetAPIType() const
+ {
+ return SPEECH_API_LIPSINC;
+ }
+
+ // Used for menus, etc
+ virtual char const *GetName() const
+ {
+ return "IMS (LipSinc)";
+ }
+
+ SR_RESULT Extract(
+ const char *wavfile,
+ int numsamples,
+ void (*pfnPrint)( const char *fmt, ... ),
+ CSentence& inwords,
+ CSentence& outwords );
+
+
+ CPhonemeExtractorLipSinc( void );
+ ~CPhonemeExtractorLipSinc( void );
+
+ enum
+ {
+ MAX_WORD_LENGTH = 128,
+ };
+private:
+
+
+ class CAnalyzedWord
+ {
+ public:
+ char buffer[ MAX_WORD_LENGTH ];
+ double starttime;
+ double endtime;
+ };
+
+ class CAnalyzedPhoneme
+ {
+ public:
+ char phoneme[ 32 ];
+ double starttime;
+ double endtime;
+ };
+
+ bool InitLipSinc( void );
+ void ShutdownLipSinc( void );
+
+ void DescribeError( TALKBACK_ERR err );
+ void Printf( char const *fmt, ... );
+
+ bool CheckSoundFile( char const *filename );
+ bool GetInitialized( void );
+ void SetInitialized( bool init );
+
+ void (*m_pfnPrint)( const char *fmt, ... );
+
+ char const *ConstructInputSentence( CSentence& inwords );
+ bool AttemptAnalysis( TALKBACK_ANALYSIS **ppAnalysis, char const *wavfile, CSentence& inwords );
+
+ char const *ApplyTBWordRules( char const *word );
+
+ void ProcessWords( TALKBACK_ANALYSIS *analysis, CSentence& inwords, CSentence& outwords );
+ void ProcessWordsTextless( TALKBACK_ANALYSIS *analysis, CSentence& outwords );
+
+ int GetPhonemeIndexAtWord( TALKBACK_ANALYSIS *analysis, double time, bool checkstart );
+
+ int GetPhonemeIndexAtWordStart( TALKBACK_ANALYSIS *analysis, double starttime );
+ int GetPhonemeIndexAtWordEnd( TALKBACK_ANALYSIS *analysis, double endtime );
+
+ CAnalyzedWord *GetAnalyzedWord( TALKBACK_ANALYSIS *analysis, int index );
+ CAnalyzedPhoneme *GetAnalyzedPhoneme( TALKBACK_ANALYSIS *analysis, int index );
+
+ int ComputeByteFromTime( float time );
+
+ bool m_bInitialized;
+
+ float m_flSampleCount;
+ float m_flDuration;
+
+ float m_flSamplesPerSecond;
+
+ int m_nBytesPerSample;
+
+ HMODULE m_hHelper;
+};
+
+CPhonemeExtractorLipSinc::CPhonemeExtractorLipSinc( void )
+{
+ m_hHelper = (HMODULE)0;
+ m_pfnPrint = NULL;
+
+ m_bInitialized = false;
+
+ m_flSampleCount = 0.0f;
+ m_flDuration = 0.0f;
+
+ m_flSamplesPerSecond = 0.0f;
+
+ m_nBytesPerSample = 0;
+}
+
+CPhonemeExtractorLipSinc::~CPhonemeExtractorLipSinc( void )
+{
+ if ( GetInitialized() )
+ {
+ ShutdownLipSinc();
+ }
+}
+
+bool CPhonemeExtractorLipSinc::GetInitialized( void )
+{
+ return m_bInitialized;
+}
+
+void CPhonemeExtractorLipSinc::SetInitialized( bool init )
+{
+ m_bInitialized = init;
+}
+
+int CPhonemeExtractorLipSinc::ComputeByteFromTime( float time )
+{
+ if ( !m_flDuration )
+ return 0;
+
+ float frac = time / m_flDuration;
+
+ float sampleNumber = frac * m_flSampleCount;
+
+ int bytenumber = sampleNumber * m_nBytesPerSample;
+
+ return bytenumber;
+}
+
+void CPhonemeExtractorLipSinc::DescribeError( TALKBACK_ERR err )
+{
+ Assert( m_pfnPrint );
+
+ // Get the error description.
+ char errorDesc[256] = "";
+ if ( err != TALKBACK_NOERR )
+ {
+ talkback->TalkBackGetErrorString( err, sizeof(errorDesc), errorDesc );
+ }
+
+ // Report or log the error...
+ (*m_pfnPrint)( "LIPSINC ERROR: %s\n", errorDesc );
+}
+
+//-----------------------------------------------------------------------------
+// Purpose:
+// Input : *fmt -
+// .. -
+//-----------------------------------------------------------------------------
+void CPhonemeExtractorLipSinc::Printf( char const *fmt, ... )
+{
+ Assert( m_pfnPrint );
+
+ char string[ 4096 ];
+
+ va_list argptr;
+ va_start( argptr, fmt );
+ vsprintf( string, fmt, argptr );
+ va_end( argptr );
+
+ (*m_pfnPrint)( "%s", string );
+}
+
+bool CPhonemeExtractorLipSinc::CheckSoundFile( char const *filename )
+{
+ TALKBACK_SOUND_FILE_METRICS fm;
+ memset( &fm, 0, sizeof( fm ) );
+ fm.m_size = sizeof( fm );
+
+ TALKBACK_ERR err = talkback->TalkBackGetSoundFileMetrics( filename, &fm );
+ if ( err != TALKBACK_NOERR )
+ {
+ DescribeError( err );
+ return false;
+ }
+
+ if ( fm.m_canBeAnalyzed )
+ {
+ Printf( "%s: %.2f s, rate %i, bits %i, channels %i\n",
+ filename,
+ fm.m_duration,
+ fm.m_sampleRate,
+ fm.m_bitsPerSample,
+ fm.m_channelCount );
+ }
+
+ m_flDuration = fm.m_duration;
+ if ( m_flDuration > 0 )
+ {
+ m_flSamplesPerSecond = m_flSampleCount / m_flDuration;
+ }
+ else
+ {
+ m_flSamplesPerSecond = 0.0f;
+ }
+
+ m_nBytesPerSample = ( fm.m_bitsPerSample >> 3 );
+
+ m_flSampleCount /= m_nBytesPerSample;
+
+ m_nBytesPerSample /= fm.m_channelCount;
+
+ return fm.m_canBeAnalyzed ? true : false;
+}
+
+typedef IImsHelper *(*pfnImsHelper)(void);
+
+//-----------------------------------------------------------------------------
+// Purpose:
+// Output : Returns true on success, false on failure.
+//-----------------------------------------------------------------------------
+bool CPhonemeExtractorLipSinc::InitLipSinc( void )
+{
+ if ( GetInitialized() )
+ {
+ return true;
+ }
+
+ m_hHelper = LoadLibrary( "ims_helper.dll" );
+ if ( !m_hHelper )
+ {
+ return false;
+ }
+
+ pfnImsHelper factory = (pfnImsHelper)::GetProcAddress( m_hHelper, "GetImsHelper" );
+ if ( !factory )
+ {
+ FreeLibrary( m_hHelper );
+ return false;
+ }
+
+ talkback = reinterpret_cast< IImsHelper * >( (*factory)() );
+ if ( !talkback )
+ {
+ FreeLibrary( m_hHelper );
+ return false;
+ }
+
+ char szExeName[ MAX_PATH ];
+ szExeName[0] = 0;
+ GetModuleFileName( (HMODULE)0, szExeName, sizeof( szExeName ) );
+
+ char szBaseDir[ MAX_PATH ];
+ Q_strncpy( szBaseDir, szExeName, sizeof( szBaseDir ) );
+
+ Q_StripLastDir( szBaseDir, sizeof( szBaseDir ) );
+ Q_StripTrailingSlash( szBaseDir );
+ Q_strlower( szBaseDir );
+
+ char coreDataDir[ 512 ];
+ Q_snprintf( coreDataDir, sizeof( coreDataDir ), "%s\\lipsinc_data\\",
+ szBaseDir );
+ Q_FixSlashes( coreDataDir );
+
+ char szCheck[ 512 ];
+ Q_snprintf( szCheck, sizeof( szCheck ), "%sDtC6dal.dat", coreDataDir );
+ struct __stat64 buf;
+
+ if ( _stat64( szCheck, &buf ) != 0 )
+ {
+ Q_snprintf( coreDataDir, sizeof( coreDataDir ), "%s\\bin\\lipsinc_data\\",
+ szBaseDir );
+ Q_FixSlashes( coreDataDir );
+ Q_snprintf( szCheck, sizeof( szCheck ), "%sDtC6dal.dat", coreDataDir );
+
+ if ( _stat64( szCheck, &buf ) != 0 )
+ {
+ Error( "Unable to find talkback data files in %s.", coreDataDir );
+ }
+ }
+
+ TALKBACK_ERR err;
+
+ err = talkback->TalkBackStartupLibrary( coreDataDir );
+ if ( err != TALKBACK_NOERR )
+ {
+ DescribeError( err );
+ FreeLibrary( m_hHelper );
+ return false;
+ }
+
+ long verMajor = 0;
+ long verMinor = 0;
+ long verRevision = 0;
+
+ err = talkback->TalkBackGetVersion(
+ &verMajor,
+ &verMinor,
+ &verRevision);
+ if ( err != TALKBACK_NOERR )
+ {
+ DescribeError( err );
+ FreeLibrary( m_hHelper );
+ return false;
+ }
+
+ Printf( "Lipsinc TalkBack Version %i.%i.%i\n", verMajor, verMinor, verRevision );
+
+ m_bInitialized = true;
+
+ return true;
+}
+
+//-----------------------------------------------------------------------------
+// Purpose:
+//-----------------------------------------------------------------------------
+void CPhonemeExtractorLipSinc::ShutdownLipSinc( void )
+{
+ // HACK HACK: This seems to crash on exit sometimes
+ __try
+ {
+ talkback->TalkBackShutdownLibrary();
+
+ FreeLibrary( m_hHelper );
+ }
+ __except(EXCEPTION_EXECUTE_HANDLER )
+ {
+ OutputDebugString( "----> Crash shutting down TALKBACK sdk, exception caught and ignored\n" );
+ }
+}
+
+//-----------------------------------------------------------------------------
+// Purpose:
+// Input : inwords -
+// Output : char const
+//-----------------------------------------------------------------------------
+char const *CPhonemeExtractorLipSinc::ConstructInputSentence( CSentence& inwords )
+{
+ static char sentence[ 16384 ];
+
+ sentence[ 0 ] = 0;
+
+ int last = inwords.m_Words.Size() - 1;
+
+ for ( int i = 0 ; i <= last; i++ )
+ {
+ CWordTag *w = inwords.m_Words[ i ];
+
+ strcat( sentence, w->GetWord() );
+ if ( i != last )
+ {
+ strcat( sentence, " " );
+ }
+ }
+
+ if ( inwords.m_Words.Count() == 1 &&
+ !Q_strnicmp( inwords.GetText(), TEXTLESS_WORDNAME, Q_strlen( TEXTLESS_WORDNAME ) ) )
+ {
+ sentence[ 0 ] = 0;
+ }
+
+ return sentence;
+}
+
+bool CPhonemeExtractorLipSinc::AttemptAnalysis( TALKBACK_ANALYSIS **ppAnalysis, char const *wavfile, CSentence& inwords )
+{
+ *ppAnalysis = NULL;
+
+ TALKBACK_ANALYSIS_SETTINGS settings;
+ memset( &settings, 0, sizeof( settings ) );
+
+ // Set this field to sizeof(TALKBACK_ANALYSIS_SETTINGS) before using the
+ // structure.
+ settings.fSize = sizeof( TALKBACK_ANALYSIS_SETTINGS );
+
+
+ // Default value: 30 (frames per second).
+ settings.fFrameRate = 100;
+ // Set this to 1 to optimize for flipbook output, 0 to do analysis normally.
+ //
+ // Default value: 0 (normal analysis).
+ settings.fOptimizeForFlipbook = 0;
+ // Set this to -1 to seed the random number generator with the current time.
+ // Any other number will be used directly for the random number seed, which
+ // is useful if you want repeatable speech gestures. This value does not
+ // influence lip-synching at all.
+ //
+ // Default value: -1 (use current time).
+ settings.fRandomSeed = -1;
+ // Path to the configuration (.INI) file with phoneme-to-speech-target
+ // mapping. Set this to NULL to use the default mapping.
+ //
+ // Default value: NULL (use default mapping).
+ settings.fConfigFile = NULL;
+
+ char const *text = ConstructInputSentence( inwords );
+
+ Printf( "Analyzing: \"%s\"\n", text[ 0 ] ? text : TEXTLESS_WORDNAME );
+
+ TALKBACK_ERR err = talkback->TalkBackGetAnalysis(
+ ppAnalysis,
+ wavfile,
+ text,
+ &settings );
+
+ if ( err != TALKBACK_NOERR )
+ {
+ DescribeError( err );
+ return false;
+ }
+
+ Printf( "Analysis successful...\n" );
+
+ return true;
+}
+
+typedef struct
+{
+ TALKBACK_PHONEME phoneme;
+ char const *string;
+} TBPHONEMES_t;
+
+static TBPHONEMES_t g_TBPhonemeList[]=
+{
+ { TALKBACK_PHONEME_IY, "iy" },
+ { TALKBACK_PHONEME_IH, "ih" },
+ { TALKBACK_PHONEME_EH, "eh" },
+ { TALKBACK_PHONEME_EY, "ey" },
+ { TALKBACK_PHONEME_AE, "ae" },
+ { TALKBACK_PHONEME_AA, "aa" },
+ { TALKBACK_PHONEME_AW, "aw" },
+ { TALKBACK_PHONEME_AY, "ay" },
+ { TALKBACK_PHONEME_AH, "ah" },
+ { TALKBACK_PHONEME_AO, "ao" },
+ { TALKBACK_PHONEME_OY, "oy" },
+ { TALKBACK_PHONEME_OW, "ow" },
+ { TALKBACK_PHONEME_UH, "uh" },
+ { TALKBACK_PHONEME_UW, "uw" },
+ { TALKBACK_PHONEME_ER, "er" },
+ { TALKBACK_PHONEME_AX, "ax" },
+ { TALKBACK_PHONEME_S, "s" },
+ { TALKBACK_PHONEME_SH, "sh" },
+ { TALKBACK_PHONEME_Z, "z" },
+ { TALKBACK_PHONEME_ZH, "zh" },
+ { TALKBACK_PHONEME_F, "f" },
+ { TALKBACK_PHONEME_TH, "th" },
+ { TALKBACK_PHONEME_V, "v" },
+ { TALKBACK_PHONEME_DH, "dh" },
+ { TALKBACK_PHONEME_M, "m" },
+ { TALKBACK_PHONEME_N, "n" },
+ { TALKBACK_PHONEME_NG, "ng" },
+ { TALKBACK_PHONEME_L, "l" },
+ { TALKBACK_PHONEME_R, "r" },
+ { TALKBACK_PHONEME_W, "w" },
+ { TALKBACK_PHONEME_Y, "y" },
+ { TALKBACK_PHONEME_HH, "hh" },
+ { TALKBACK_PHONEME_B, "b" },
+ { TALKBACK_PHONEME_D, "d" },
+ { TALKBACK_PHONEME_JH, "jh" },
+ { TALKBACK_PHONEME_G, "g" },
+ { TALKBACK_PHONEME_P, "p" },
+ { TALKBACK_PHONEME_T, "t" },
+ { TALKBACK_PHONEME_K, "k" },
+ { TALKBACK_PHONEME_CH, "ch" },
+ { TALKBACK_PHONEME_SIL, "<sil>" },
+ { -1, NULL }
+};
+
+char const *TBPhonemeToString( TALKBACK_PHONEME phoneme )
+{
+ if ( phoneme < TALKBACK_PHONEME_FIRST || phoneme > TALKBACK_PHONEME_LAST )
+ {
+ return "Bogus";
+ }
+
+ TBPHONEMES_t *item = &g_TBPhonemeList[ phoneme ];
+ return item->string;
+}
+
+//-----------------------------------------------------------------------------
+// Purpose:
+// Input : *analysis -
+// time -
+// start -
+// Output : int
+//-----------------------------------------------------------------------------
+int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWord( TALKBACK_ANALYSIS *analysis, double time, bool start )
+{
+ long count;
+
+ TALKBACK_ERR err = talkback->TalkBackGetNumPhonemes( analysis, &count );
+ if ( err != TALKBACK_NOERR )
+ {
+ DescribeError( err );
+ return -1;
+ }
+
+ if ( count <= 0L )
+ return -1;
+
+ // Bogus
+ if ( count >= 100000L )
+ return -1;
+
+ for ( int i = 0; i < (int)count; i++ )
+ {
+ TALKBACK_PHONEME tbPhoneme = TALKBACK_PHONEME_INVALID;
+ err = talkback->TalkBackGetPhonemeEnum( analysis, i, &tbPhoneme );
+ if ( err != TALKBACK_NOERR )
+ {
+ DescribeError( err );
+ continue;
+ }
+
+ double t;
+
+ if ( start )
+ {
+ err = talkback->TalkBackGetPhonemeStartTime( analysis, i, &t );
+ }
+ else
+ {
+ err = talkback->TalkBackGetPhonemeEndTime( analysis, i, &t );
+ }
+
+ if ( err != TALKBACK_NOERR )
+ {
+ DescribeError( err );
+ continue;
+ }
+
+ if ( t == time )
+ {
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+//-----------------------------------------------------------------------------
+// Purpose:
+// Input : *analysis -
+// starttime -
+// Output : int
+//-----------------------------------------------------------------------------
+int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWordStart( TALKBACK_ANALYSIS *analysis, double starttime )
+{
+ return GetPhonemeIndexAtWord( analysis, starttime, true );
+}
+
+//-----------------------------------------------------------------------------
+// Purpose:
+// Input : *analysis -
+// endtime -
+// Output : int
+//-----------------------------------------------------------------------------
+int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWordEnd( TALKBACK_ANALYSIS *analysis, double endtime )
+{
+ return GetPhonemeIndexAtWord( analysis, endtime, false );
+}
+
+CPhonemeExtractorLipSinc::CAnalyzedPhoneme *CPhonemeExtractorLipSinc::GetAnalyzedPhoneme( TALKBACK_ANALYSIS *analysis, int index )
+{
+ static CAnalyzedPhoneme p;
+
+ memset( &p, 0, sizeof( p ) );
+
+ TALKBACK_PHONEME tb;
+
+ TALKBACK_ERR err = talkback->TalkBackGetPhonemeEnum( analysis, index, &tb );
+ if ( err != TALKBACK_NOERR )
+ {
+ DescribeError( err );
+ return NULL;
+ }
+
+ strcpy( p.phoneme, TBPhonemeToString( tb ) );
+
+ err = talkback->TalkBackGetPhonemeStartTime( analysis, index, &p.starttime );
+ if ( err != TALKBACK_NOERR )
+ {
+ DescribeError( err );
+ return NULL;
+ }
+ err = talkback->TalkBackGetPhonemeEndTime( analysis, index, &p.endtime );
+ if ( err != TALKBACK_NOERR )
+ {
+ DescribeError( err );
+ return NULL;
+ }
+
+ return &p;
+}
+
+CPhonemeExtractorLipSinc::CAnalyzedWord *CPhonemeExtractorLipSinc::GetAnalyzedWord( TALKBACK_ANALYSIS *analysis, int index )
+{
+ static CAnalyzedWord w;
+
+ memset( &w, 0, sizeof( w ) );
+
+ long chars = sizeof( w.buffer );
+
+ TALKBACK_ERR err = talkback->TalkBackGetWord( analysis, index, chars, w.buffer );
+ if ( err != TALKBACK_NOERR )
+ {
+ DescribeError( err );
+ return NULL;
+ }
+
+ err = talkback->TalkBackGetWordStartTime( analysis, index, &w.starttime );
+ if ( err != TALKBACK_NOERR )
+ {
+ DescribeError( err );
+ return NULL;
+ }
+ err = talkback->TalkBackGetWordEndTime( analysis, index, &w.endtime );
+ if ( err != TALKBACK_NOERR )
+ {
+ DescribeError( err );
+ return NULL;
+ }
+
+ return &w;
+}
+
+//-----------------------------------------------------------------------------
+// Purpose:
+// Input : *w1 -
+// *w2 -
+// Output : Returns true on success, false on failure.
+//-----------------------------------------------------------------------------
+bool FuzzyWordMatch( char const *w1, char const *w2 )
+{
+ int len1 = strlen( w1 );
+ int len2 = strlen( w2 );
+
+ int minlen = min( len1, len2 );
+
+ // Found a match
+ if ( !strnicmp( w1, w2, minlen ) )
+ return true;
+
+ int letterdiff = abs( len1 - len2 );
+ // More than three letters different, don't bother
+ if ( letterdiff > 5 )
+ return false;
+
+ // Compute a "delta"
+ char *p1 = (char *)w1;
+ char *p2 = (char *)w2;
+
+ CUtlVector <char> word1;
+ CUtlVector <char> word2;
+
+ while ( *p1 )
+ {
+ if ( V_isalpha( *p1 ) )
+ {
+ word1.AddToTail( *p1 );
+ }
+ p1++;
+ }
+
+ while ( *p2 )
+ {
+ if ( V_isalpha( *p2 ) )
+ {
+ word2.AddToTail( *p2 );
+ }
+ p2++;
+ }
+
+ int i;
+ for ( i = 0; i < word1.Size(); i++ )
+ {
+ char c = word1[ i ];
+
+ // See if c is in word 2, if so subtract it out
+ int idx = word2.Find( c );
+
+ if ( idx != word2.InvalidIndex() )
+ {
+ word2.Remove( idx );
+ }
+ }
+
+ if ( word2.Size() <= letterdiff )
+ return true;
+
+ word2.RemoveAll();
+
+ while ( *p2 )
+ {
+ if ( V_isalpha( *p2 ) )
+ {
+ word2.AddToTail( *p2 );
+ }
+ p2++;
+ }
+
+ for ( i = 0; i < word2.Size(); i++ )
+ {
+ char c = word2[ i ];
+
+ // See if c is in word 2, if so subtract it out
+ int idx = word1.Find( c );
+
+ if ( idx != word1.InvalidIndex() )
+ {
+ word1.Remove( idx );
+ }
+ }
+
+ if ( word1.Size() <= letterdiff )
+ return true;
+
+ return false;
+}
+
+//-----------------------------------------------------------------------------
+// Purpose: For foreign language stuff, if inwords is empty, process anyway...
+// Input : *analysis -
+// outwords -
+//-----------------------------------------------------------------------------
+void CPhonemeExtractorLipSinc::ProcessWordsTextless( TALKBACK_ANALYSIS *analysis, CSentence& outwords )
+{
+ long count;
+
+ TALKBACK_ERR err = talkback->TalkBackGetNumPhonemes( analysis, &count );
+ if ( err != TALKBACK_NOERR )
+ {
+ DescribeError( err );
+ return;
+ }
+
+ CWordTag *newWord = new CWordTag;
+
+ newWord->SetWord( TEXTLESS_WORDNAME );
+
+ float starttime = 0.0f;
+ float endtime = 1.0f;
+
+
+ for ( int i = 0; i < count; ++i )
+ {
+ // Get phoneme and timing info
+ CAnalyzedPhoneme *ph = GetAnalyzedPhoneme( analysis, i );
+ if ( !ph )
+ continue;
+
+ CPhonemeTag *ptag = new CPhonemeTag;
+
+ if ( i == 0 || ( ph->starttime < starttime ) )
+ {
+ starttime = ph->starttime;
+ }
+
+ if ( i == 0 || ( ph->endtime > endtime ) )
+ {
+ endtime = ph->endtime;
+ }
+
+ ptag->SetStartTime( ph->starttime );
+ ptag->SetEndTime( ph->endtime );
+
+ ptag->m_uiStartByte = ComputeByteFromTime( ph->starttime );
+ ptag->m_uiEndByte = ComputeByteFromTime( ph->endtime );
+
+ ptag->SetTag( ph->phoneme );
+ ptag->SetPhonemeCode( TextToPhoneme( ptag->GetTag() ) );
+
+ newWord->m_Phonemes.AddToTail( ptag );
+ }
+
+ newWord->m_flStartTime = starttime;
+ newWord->m_flEndTime = endtime;
+
+ newWord->m_uiStartByte = ComputeByteFromTime( starttime );
+ newWord->m_uiEndByte = ComputeByteFromTime( endtime );
+
+ outwords.Reset();
+ outwords.AddWordTag( newWord );
+ outwords.SetTextFromWords();
+}
+
+//-----------------------------------------------------------------------------
+// Purpose:
+// Input : *analysis -
+// inwords -
+// outwords -
+//-----------------------------------------------------------------------------
+void CPhonemeExtractorLipSinc::ProcessWords( TALKBACK_ANALYSIS *analysis, CSentence& inwords, CSentence& outwords )
+{
+ long count;
+
+ TALKBACK_ERR err = talkback->TalkBackGetNumWords( analysis, &count );
+ if ( err != TALKBACK_NOERR )
+ {
+ DescribeError( err );
+ return;
+ }
+
+ if ( count <= 0L )
+ {
+ if ( inwords.m_Words.Count() == 0 ||
+ !Q_strnicmp( inwords.GetText(), TEXTLESS_WORDNAME, Q_strlen( TEXTLESS_WORDNAME ) ) )
+ {
+ ProcessWordsTextless( analysis, outwords );
+ }
+ return;
+ }
+
+ // Bogus
+ if ( count >= 100000L )
+ return;
+
+ int inwordpos = 0;
+ int awordpos = 0;
+
+ outwords.Reset();
+
+ char previous[ 256 ];
+ previous[ 0 ] = 0;
+
+ while ( inwordpos < inwords.m_Words.Size() )
+ {
+ CWordTag *in = inwords.m_Words[ inwordpos ];
+
+ if ( awordpos >= count )
+ {
+ // Just copy the rest over without phonemes
+ CWordTag *copy = new CWordTag( *in );
+
+ outwords.AddWordTag( copy );
+
+ inwordpos++;
+ continue;
+ }
+
+ // Should never fail
+ CAnalyzedWord *w = GetAnalyzedWord( analysis, awordpos );
+ if ( !w )
+ {
+ return;
+ }
+
+ if ( !stricmp( w->buffer, "<SIL>" ) )
+ {
+ awordpos++;
+ continue;
+ }
+
+ char const *check = ApplyTBWordRules( in->GetWord() );
+ if ( !FuzzyWordMatch( check, w->buffer ) )
+ {
+ bool advance_input = true;
+ if ( previous[ 0 ] )
+ {
+ if ( FuzzyWordMatch( previous, w->buffer ) )
+ {
+ advance_input = false;
+ }
+ }
+
+ if ( advance_input )
+ {
+ inwordpos++;
+ }
+ awordpos++;
+ continue;
+ }
+ strcpy( previous, check );
+
+ CWordTag *newWord = new CWordTag;
+
+ newWord->SetWord( in->GetWord() );
+
+ newWord->m_flStartTime = w->starttime;
+ newWord->m_flEndTime = w->endtime;
+
+ newWord->m_uiStartByte = ComputeByteFromTime( w->starttime );
+ newWord->m_uiEndByte = ComputeByteFromTime( w->endtime );
+
+ int phonemestart, phonemeend;
+
+ phonemestart = GetPhonemeIndexAtWordStart( analysis, w->starttime );
+ phonemeend = GetPhonemeIndexAtWordEnd( analysis, w->endtime );
+
+ if ( phonemestart >= 0 && phonemeend >= 0 )
+ {
+ for ( ; phonemestart <= phonemeend; phonemestart++ )
+ {
+ // Get phoneme and timing info
+ CAnalyzedPhoneme *ph = GetAnalyzedPhoneme( analysis, phonemestart );
+ if ( !ph )
+ continue;
+
+ CPhonemeTag *ptag = new CPhonemeTag;
+ ptag->SetStartTime( ph->starttime );
+ ptag->SetEndTime( ph->endtime );
+
+ ptag->m_uiStartByte = ComputeByteFromTime( ph->starttime );
+ ptag->m_uiEndByte = ComputeByteFromTime( ph->endtime );
+
+ ptag->SetTag( ph->phoneme );
+ ptag->SetPhonemeCode( TextToPhoneme( ptag->GetTag() ) );
+
+ newWord->m_Phonemes.AddToTail( ptag );
+ }
+ }
+
+ outwords.AddWordTag( newWord );
+ inwordpos++;
+ awordpos++;
+ }
+}
+
+char const *CPhonemeExtractorLipSinc::ApplyTBWordRules( char const *word )
+{
+ static char outword[ 256 ];
+
+ char const *in = word;
+ char *out = outword;
+
+ while ( *in && ( ( out - outword ) <= 255 ) )
+ {
+ if ( *in == '\t' ||
+ *in == ' ' ||
+ *in == '\n' ||
+ *in == '-' ||
+ *in == '.' ||
+ *in == ',' ||
+ *in == ';' ||
+ *in == '?' ||
+ *in == '"' ||
+ *in == ':' ||
+ *in == '(' ||
+ *in == ')' )
+ {
+ in++;
+ *out++ = ' ';
+ continue;
+ }
+
+ if ( !V_isprint( *in ) )
+ {
+ in++;
+ continue;
+ }
+
+ if ( *in >= 128 )
+ {
+ in++;
+ continue;
+ }
+
+ // Skip numbers
+ if ( *in >= '0' && *in <= '9' )
+ {
+ in++;
+ continue;
+ }
+
+ // Convert all letters to upper case
+ if ( *in >= 'a' && *in <= 'z' )
+ {
+ *out++ = ( *in++ ) - 'a' + 'A';
+ continue;
+ }
+
+ if ( *in >= 'A' && *in <= 'Z' )
+ {
+ *out++ = *in++;
+ continue;
+ }
+
+ if ( *in == '\'' )
+ {
+ *out++ = *in++;
+ continue;
+ }
+
+ in++;
+ }
+
+ *out = 0;
+
+ return outword;
+}
+
+//-----------------------------------------------------------------------------
+// Purpose: Given a wavfile and a list of inwords, determines the word/phonene
+// sample counts for the sentce
+// Output : SR_RESULT
+//-----------------------------------------------------------------------------
+SR_RESULT CPhonemeExtractorLipSinc::Extract(
+ const char *wavfile,
+ int numsamples,
+ void (*pfnPrint)( const char *fmt, ... ),
+ CSentence& inwords,
+ CSentence& outwords )
+{
+ // g_enableTalkBackDebuggingOutput = 1;
+
+ m_pfnPrint = pfnPrint;
+
+ if ( !InitLipSinc() )
+ {
+ return SR_RESULT_ERROR;
+ }
+
+ m_flSampleCount = numsamples;
+
+ if ( !CheckSoundFile( wavfile ) )
+ {
+ FreeLibrary( m_hHelper );
+ return SR_RESULT_ERROR;
+ }
+
+ TALKBACK_ANALYSIS *analysis = NULL;
+
+ if ( !AttemptAnalysis( &analysis, wavfile, inwords ) )
+ {
+ FreeLibrary( m_hHelper );
+ return SR_RESULT_FAILED;
+ }
+
+ if ( strlen( inwords.GetText() ) <= 0 )
+ {
+ inwords.SetTextFromWords();
+ }
+
+ outwords = inwords;
+
+ // Examine data
+ ProcessWords( analysis, inwords, outwords );
+
+ if ( analysis )
+ {
+ talkback->TalkBackFreeAnalysis( &analysis );
+ }
+
+ return SR_RESULT_SUCCESS;
+}
+
+EXPOSE_SINGLE_INTERFACE( CPhonemeExtractorLipSinc, IPhonemeExtractor, VPHONEME_EXTRACTOR_INTERFACE ); \ No newline at end of file