diff options
| author | Jørgen P. Tjernø <[email protected]> | 2013-12-02 19:31:46 -0800 |
|---|---|---|
| committer | Jørgen P. Tjernø <[email protected]> | 2013-12-02 19:46:31 -0800 |
| commit | f56bb35301836e56582a575a75864392a0177875 (patch) | |
| tree | de61ddd39de3e7df52759711950b4c288592f0dc /mp/src/utils/phonemeextractor/phonemeextractor_ims.cpp | |
| parent | Mark some more files as text. (diff) | |
| download | source-sdk-2013-f56bb35301836e56582a575a75864392a0177875.tar.xz source-sdk-2013-f56bb35301836e56582a575a75864392a0177875.zip | |
Fix line endings. WHAMMY.
Diffstat (limited to 'mp/src/utils/phonemeextractor/phonemeextractor_ims.cpp')
| -rw-r--r-- | mp/src/utils/phonemeextractor/phonemeextractor_ims.cpp | 2148 |
1 files changed, 1074 insertions, 1074 deletions
diff --git a/mp/src/utils/phonemeextractor/phonemeextractor_ims.cpp b/mp/src/utils/phonemeextractor/phonemeextractor_ims.cpp index 70819f8e..29dabab4 100644 --- a/mp/src/utils/phonemeextractor/phonemeextractor_ims.cpp +++ b/mp/src/utils/phonemeextractor/phonemeextractor_ims.cpp @@ -1,1075 +1,1075 @@ -//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-// $NoKeywords: $
-//=============================================================================//
-#include <stdio.h>
-#include <stdarg.h>
-#include <memory.h>
-#include <windows.h>
-#include <mmsystem.h>
-#include <mmreg.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "phonemeextractor/PhonemeExtractor.h"
-#include "ims_helper/ims_helper.h"
-
-#include "tier0/dbg.h"
-#include "sentence.h"
-#include "PhonemeConverter.h"
-#include "tier1/strtools.h"
-
-#define TEXTLESS_WORDNAME "[Textless]"
-
-static IImsHelper *talkback = NULL;
-
-//-----------------------------------------------------------------------------
-// Purpose: Expose the interface
-//-----------------------------------------------------------------------------
-class CPhonemeExtractorLipSinc : public IPhonemeExtractor
-{
-public:
- virtual PE_APITYPE GetAPIType() const
- {
- return SPEECH_API_LIPSINC;
- }
-
- // Used for menus, etc
- virtual char const *GetName() const
- {
- return "IMS (LipSinc)";
- }
-
- SR_RESULT Extract(
- const char *wavfile,
- int numsamples,
- void (*pfnPrint)( const char *fmt, ... ),
- CSentence& inwords,
- CSentence& outwords );
-
-
- CPhonemeExtractorLipSinc( void );
- ~CPhonemeExtractorLipSinc( void );
-
- enum
- {
- MAX_WORD_LENGTH = 128,
- };
-private:
-
-
- class CAnalyzedWord
- {
- public:
- char buffer[ MAX_WORD_LENGTH ];
- double starttime;
- double endtime;
- };
-
- class CAnalyzedPhoneme
- {
- public:
- char phoneme[ 32 ];
- double starttime;
- double endtime;
- };
-
- bool InitLipSinc( void );
- void ShutdownLipSinc( void );
-
- void DescribeError( TALKBACK_ERR err );
- void Printf( char const *fmt, ... );
-
- bool CheckSoundFile( char const *filename );
- bool GetInitialized( void );
- void SetInitialized( bool init );
-
- void (*m_pfnPrint)( const char *fmt, ... );
-
- char const *ConstructInputSentence( CSentence& inwords );
- bool AttemptAnalysis( TALKBACK_ANALYSIS **ppAnalysis, char const *wavfile, CSentence& inwords );
-
- char const *ApplyTBWordRules( char const *word );
-
- void ProcessWords( TALKBACK_ANALYSIS *analysis, CSentence& inwords, CSentence& outwords );
- void ProcessWordsTextless( TALKBACK_ANALYSIS *analysis, CSentence& outwords );
-
- int GetPhonemeIndexAtWord( TALKBACK_ANALYSIS *analysis, double time, bool checkstart );
-
- int GetPhonemeIndexAtWordStart( TALKBACK_ANALYSIS *analysis, double starttime );
- int GetPhonemeIndexAtWordEnd( TALKBACK_ANALYSIS *analysis, double endtime );
-
- CAnalyzedWord *GetAnalyzedWord( TALKBACK_ANALYSIS *analysis, int index );
- CAnalyzedPhoneme *GetAnalyzedPhoneme( TALKBACK_ANALYSIS *analysis, int index );
-
- int ComputeByteFromTime( float time );
-
- bool m_bInitialized;
-
- float m_flSampleCount;
- float m_flDuration;
-
- float m_flSamplesPerSecond;
-
- int m_nBytesPerSample;
-
- HMODULE m_hHelper;
-};
-
-CPhonemeExtractorLipSinc::CPhonemeExtractorLipSinc( void )
-{
- m_hHelper = (HMODULE)0;
- m_pfnPrint = NULL;
-
- m_bInitialized = false;
-
- m_flSampleCount = 0.0f;
- m_flDuration = 0.0f;
-
- m_flSamplesPerSecond = 0.0f;
-
- m_nBytesPerSample = 0;
-}
-
-CPhonemeExtractorLipSinc::~CPhonemeExtractorLipSinc( void )
-{
- if ( GetInitialized() )
- {
- ShutdownLipSinc();
- }
-}
-
-bool CPhonemeExtractorLipSinc::GetInitialized( void )
-{
- return m_bInitialized;
-}
-
-void CPhonemeExtractorLipSinc::SetInitialized( bool init )
-{
- m_bInitialized = init;
-}
-
-int CPhonemeExtractorLipSinc::ComputeByteFromTime( float time )
-{
- if ( !m_flDuration )
- return 0;
-
- float frac = time / m_flDuration;
-
- float sampleNumber = frac * m_flSampleCount;
-
- int bytenumber = sampleNumber * m_nBytesPerSample;
-
- return bytenumber;
-}
-
-void CPhonemeExtractorLipSinc::DescribeError( TALKBACK_ERR err )
-{
- Assert( m_pfnPrint );
-
- // Get the error description.
- char errorDesc[256] = "";
- if ( err != TALKBACK_NOERR )
- {
- talkback->TalkBackGetErrorString( err, sizeof(errorDesc), errorDesc );
- }
-
- // Report or log the error...
- (*m_pfnPrint)( "LIPSINC ERROR: %s\n", errorDesc );
-}
-
-//-----------------------------------------------------------------------------
-// Purpose:
-// Input : *fmt -
-// .. -
-//-----------------------------------------------------------------------------
-void CPhonemeExtractorLipSinc::Printf( char const *fmt, ... )
-{
- Assert( m_pfnPrint );
-
- char string[ 4096 ];
-
- va_list argptr;
- va_start( argptr, fmt );
- vsprintf( string, fmt, argptr );
- va_end( argptr );
-
- (*m_pfnPrint)( "%s", string );
-}
-
-bool CPhonemeExtractorLipSinc::CheckSoundFile( char const *filename )
-{
- TALKBACK_SOUND_FILE_METRICS fm;
- memset( &fm, 0, sizeof( fm ) );
- fm.m_size = sizeof( fm );
-
- TALKBACK_ERR err = talkback->TalkBackGetSoundFileMetrics( filename, &fm );
- if ( err != TALKBACK_NOERR )
- {
- DescribeError( err );
- return false;
- }
-
- if ( fm.m_canBeAnalyzed )
- {
- Printf( "%s: %.2f s, rate %i, bits %i, channels %i\n",
- filename,
- fm.m_duration,
- fm.m_sampleRate,
- fm.m_bitsPerSample,
- fm.m_channelCount );
- }
-
- m_flDuration = fm.m_duration;
- if ( m_flDuration > 0 )
- {
- m_flSamplesPerSecond = m_flSampleCount / m_flDuration;
- }
- else
- {
- m_flSamplesPerSecond = 0.0f;
- }
-
- m_nBytesPerSample = ( fm.m_bitsPerSample >> 3 );
-
- m_flSampleCount /= m_nBytesPerSample;
-
- m_nBytesPerSample /= fm.m_channelCount;
-
- return fm.m_canBeAnalyzed ? true : false;
-}
-
-typedef IImsHelper *(*pfnImsHelper)(void);
-
-//-----------------------------------------------------------------------------
-// Purpose:
-// Output : Returns true on success, false on failure.
-//-----------------------------------------------------------------------------
-bool CPhonemeExtractorLipSinc::InitLipSinc( void )
-{
- if ( GetInitialized() )
- {
- return true;
- }
-
- m_hHelper = LoadLibrary( "ims_helper.dll" );
- if ( !m_hHelper )
- {
- return false;
- }
-
- pfnImsHelper factory = (pfnImsHelper)::GetProcAddress( m_hHelper, "GetImsHelper" );
- if ( !factory )
- {
- FreeLibrary( m_hHelper );
- return false;
- }
-
- talkback = reinterpret_cast< IImsHelper * >( (*factory)() );
- if ( !talkback )
- {
- FreeLibrary( m_hHelper );
- return false;
- }
-
- char szExeName[ MAX_PATH ];
- szExeName[0] = 0;
- GetModuleFileName( (HMODULE)0, szExeName, sizeof( szExeName ) );
-
- char szBaseDir[ MAX_PATH ];
- Q_strncpy( szBaseDir, szExeName, sizeof( szBaseDir ) );
-
- Q_StripLastDir( szBaseDir, sizeof( szBaseDir ) );
- Q_StripTrailingSlash( szBaseDir );
- Q_strlower( szBaseDir );
-
- char coreDataDir[ 512 ];
- Q_snprintf( coreDataDir, sizeof( coreDataDir ), "%s\\lipsinc_data\\",
- szBaseDir );
- Q_FixSlashes( coreDataDir );
-
- char szCheck[ 512 ];
- Q_snprintf( szCheck, sizeof( szCheck ), "%sDtC6dal.dat", coreDataDir );
- struct __stat64 buf;
-
- if ( _stat64( szCheck, &buf ) != 0 )
- {
- Q_snprintf( coreDataDir, sizeof( coreDataDir ), "%s\\bin\\lipsinc_data\\",
- szBaseDir );
- Q_FixSlashes( coreDataDir );
- Q_snprintf( szCheck, sizeof( szCheck ), "%sDtC6dal.dat", coreDataDir );
-
- if ( _stat64( szCheck, &buf ) != 0 )
- {
- Error( "Unable to find talkback data files in %s.", coreDataDir );
- }
- }
-
- TALKBACK_ERR err;
-
- err = talkback->TalkBackStartupLibrary( coreDataDir );
- if ( err != TALKBACK_NOERR )
- {
- DescribeError( err );
- FreeLibrary( m_hHelper );
- return false;
- }
-
- long verMajor = 0;
- long verMinor = 0;
- long verRevision = 0;
-
- err = talkback->TalkBackGetVersion(
- &verMajor,
- &verMinor,
- &verRevision);
- if ( err != TALKBACK_NOERR )
- {
- DescribeError( err );
- FreeLibrary( m_hHelper );
- return false;
- }
-
- Printf( "Lipsinc TalkBack Version %i.%i.%i\n", verMajor, verMinor, verRevision );
-
- m_bInitialized = true;
-
- return true;
-}
-
-//-----------------------------------------------------------------------------
-// Purpose:
-//-----------------------------------------------------------------------------
-void CPhonemeExtractorLipSinc::ShutdownLipSinc( void )
-{
- // HACK HACK: This seems to crash on exit sometimes
- __try
- {
- talkback->TalkBackShutdownLibrary();
-
- FreeLibrary( m_hHelper );
- }
- __except(EXCEPTION_EXECUTE_HANDLER )
- {
- OutputDebugString( "----> Crash shutting down TALKBACK sdk, exception caught and ignored\n" );
- }
-}
-
-//-----------------------------------------------------------------------------
-// Purpose:
-// Input : inwords -
-// Output : char const
-//-----------------------------------------------------------------------------
-char const *CPhonemeExtractorLipSinc::ConstructInputSentence( CSentence& inwords )
-{
- static char sentence[ 16384 ];
-
- sentence[ 0 ] = 0;
-
- int last = inwords.m_Words.Size() - 1;
-
- for ( int i = 0 ; i <= last; i++ )
- {
- CWordTag *w = inwords.m_Words[ i ];
-
- strcat( sentence, w->GetWord() );
- if ( i != last )
- {
- strcat( sentence, " " );
- }
- }
-
- if ( inwords.m_Words.Count() == 1 &&
- !Q_strnicmp( inwords.GetText(), TEXTLESS_WORDNAME, Q_strlen( TEXTLESS_WORDNAME ) ) )
- {
- sentence[ 0 ] = 0;
- }
-
- return sentence;
-}
-
-bool CPhonemeExtractorLipSinc::AttemptAnalysis( TALKBACK_ANALYSIS **ppAnalysis, char const *wavfile, CSentence& inwords )
-{
- *ppAnalysis = NULL;
-
- TALKBACK_ANALYSIS_SETTINGS settings;
- memset( &settings, 0, sizeof( settings ) );
-
- // Set this field to sizeof(TALKBACK_ANALYSIS_SETTINGS) before using the
- // structure.
- settings.fSize = sizeof( TALKBACK_ANALYSIS_SETTINGS );
-
-
- // Default value: 30 (frames per second).
- settings.fFrameRate = 100;
- // Set this to 1 to optimize for flipbook output, 0 to do analysis normally.
- //
- // Default value: 0 (normal analysis).
- settings.fOptimizeForFlipbook = 0;
- // Set this to -1 to seed the random number generator with the current time.
- // Any other number will be used directly for the random number seed, which
- // is useful if you want repeatable speech gestures. This value does not
- // influence lip-synching at all.
- //
- // Default value: -1 (use current time).
- settings.fRandomSeed = -1;
- // Path to the configuration (.INI) file with phoneme-to-speech-target
- // mapping. Set this to NULL to use the default mapping.
- //
- // Default value: NULL (use default mapping).
- settings.fConfigFile = NULL;
-
- char const *text = ConstructInputSentence( inwords );
-
- Printf( "Analyzing: \"%s\"\n", text[ 0 ] ? text : TEXTLESS_WORDNAME );
-
- TALKBACK_ERR err = talkback->TalkBackGetAnalysis(
- ppAnalysis,
- wavfile,
- text,
- &settings );
-
- if ( err != TALKBACK_NOERR )
- {
- DescribeError( err );
- return false;
- }
-
- Printf( "Analysis successful...\n" );
-
- return true;
-}
-
-typedef struct
-{
- TALKBACK_PHONEME phoneme;
- char const *string;
-} TBPHONEMES_t;
-
-static TBPHONEMES_t g_TBPhonemeList[]=
-{
- { TALKBACK_PHONEME_IY, "iy" },
- { TALKBACK_PHONEME_IH, "ih" },
- { TALKBACK_PHONEME_EH, "eh" },
- { TALKBACK_PHONEME_EY, "ey" },
- { TALKBACK_PHONEME_AE, "ae" },
- { TALKBACK_PHONEME_AA, "aa" },
- { TALKBACK_PHONEME_AW, "aw" },
- { TALKBACK_PHONEME_AY, "ay" },
- { TALKBACK_PHONEME_AH, "ah" },
- { TALKBACK_PHONEME_AO, "ao" },
- { TALKBACK_PHONEME_OY, "oy" },
- { TALKBACK_PHONEME_OW, "ow" },
- { TALKBACK_PHONEME_UH, "uh" },
- { TALKBACK_PHONEME_UW, "uw" },
- { TALKBACK_PHONEME_ER, "er" },
- { TALKBACK_PHONEME_AX, "ax" },
- { TALKBACK_PHONEME_S, "s" },
- { TALKBACK_PHONEME_SH, "sh" },
- { TALKBACK_PHONEME_Z, "z" },
- { TALKBACK_PHONEME_ZH, "zh" },
- { TALKBACK_PHONEME_F, "f" },
- { TALKBACK_PHONEME_TH, "th" },
- { TALKBACK_PHONEME_V, "v" },
- { TALKBACK_PHONEME_DH, "dh" },
- { TALKBACK_PHONEME_M, "m" },
- { TALKBACK_PHONEME_N, "n" },
- { TALKBACK_PHONEME_NG, "ng" },
- { TALKBACK_PHONEME_L, "l" },
- { TALKBACK_PHONEME_R, "r" },
- { TALKBACK_PHONEME_W, "w" },
- { TALKBACK_PHONEME_Y, "y" },
- { TALKBACK_PHONEME_HH, "hh" },
- { TALKBACK_PHONEME_B, "b" },
- { TALKBACK_PHONEME_D, "d" },
- { TALKBACK_PHONEME_JH, "jh" },
- { TALKBACK_PHONEME_G, "g" },
- { TALKBACK_PHONEME_P, "p" },
- { TALKBACK_PHONEME_T, "t" },
- { TALKBACK_PHONEME_K, "k" },
- { TALKBACK_PHONEME_CH, "ch" },
- { TALKBACK_PHONEME_SIL, "<sil>" },
- { -1, NULL }
-};
-
-char const *TBPhonemeToString( TALKBACK_PHONEME phoneme )
-{
- if ( phoneme < TALKBACK_PHONEME_FIRST || phoneme > TALKBACK_PHONEME_LAST )
- {
- return "Bogus";
- }
-
- TBPHONEMES_t *item = &g_TBPhonemeList[ phoneme ];
- return item->string;
-}
-
-//-----------------------------------------------------------------------------
-// Purpose:
-// Input : *analysis -
-// time -
-// start -
-// Output : int
-//-----------------------------------------------------------------------------
-int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWord( TALKBACK_ANALYSIS *analysis, double time, bool start )
-{
- long count;
-
- TALKBACK_ERR err = talkback->TalkBackGetNumPhonemes( analysis, &count );
- if ( err != TALKBACK_NOERR )
- {
- DescribeError( err );
- return -1;
- }
-
- if ( count <= 0L )
- return -1;
-
- // Bogus
- if ( count >= 100000L )
- return -1;
-
- for ( int i = 0; i < (int)count; i++ )
- {
- TALKBACK_PHONEME tbPhoneme = TALKBACK_PHONEME_INVALID;
- err = talkback->TalkBackGetPhonemeEnum( analysis, i, &tbPhoneme );
- if ( err != TALKBACK_NOERR )
- {
- DescribeError( err );
- continue;
- }
-
- double t;
-
- if ( start )
- {
- err = talkback->TalkBackGetPhonemeStartTime( analysis, i, &t );
- }
- else
- {
- err = talkback->TalkBackGetPhonemeEndTime( analysis, i, &t );
- }
-
- if ( err != TALKBACK_NOERR )
- {
- DescribeError( err );
- continue;
- }
-
- if ( t == time )
- {
- return i;
- }
- }
-
- return -1;
-}
-
-//-----------------------------------------------------------------------------
-// Purpose:
-// Input : *analysis -
-// starttime -
-// Output : int
-//-----------------------------------------------------------------------------
-int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWordStart( TALKBACK_ANALYSIS *analysis, double starttime )
-{
- return GetPhonemeIndexAtWord( analysis, starttime, true );
-}
-
-//-----------------------------------------------------------------------------
-// Purpose:
-// Input : *analysis -
-// endtime -
-// Output : int
-//-----------------------------------------------------------------------------
-int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWordEnd( TALKBACK_ANALYSIS *analysis, double endtime )
-{
- return GetPhonemeIndexAtWord( analysis, endtime, false );
-}
-
-CPhonemeExtractorLipSinc::CAnalyzedPhoneme *CPhonemeExtractorLipSinc::GetAnalyzedPhoneme( TALKBACK_ANALYSIS *analysis, int index )
-{
- static CAnalyzedPhoneme p;
-
- memset( &p, 0, sizeof( p ) );
-
- TALKBACK_PHONEME tb;
-
- TALKBACK_ERR err = talkback->TalkBackGetPhonemeEnum( analysis, index, &tb );
- if ( err != TALKBACK_NOERR )
- {
- DescribeError( err );
- return NULL;
- }
-
- strcpy( p.phoneme, TBPhonemeToString( tb ) );
-
- err = talkback->TalkBackGetPhonemeStartTime( analysis, index, &p.starttime );
- if ( err != TALKBACK_NOERR )
- {
- DescribeError( err );
- return NULL;
- }
- err = talkback->TalkBackGetPhonemeEndTime( analysis, index, &p.endtime );
- if ( err != TALKBACK_NOERR )
- {
- DescribeError( err );
- return NULL;
- }
-
- return &p;
-}
-
-CPhonemeExtractorLipSinc::CAnalyzedWord *CPhonemeExtractorLipSinc::GetAnalyzedWord( TALKBACK_ANALYSIS *analysis, int index )
-{
- static CAnalyzedWord w;
-
- memset( &w, 0, sizeof( w ) );
-
- long chars = sizeof( w.buffer );
-
- TALKBACK_ERR err = talkback->TalkBackGetWord( analysis, index, chars, w.buffer );
- if ( err != TALKBACK_NOERR )
- {
- DescribeError( err );
- return NULL;
- }
-
- err = talkback->TalkBackGetWordStartTime( analysis, index, &w.starttime );
- if ( err != TALKBACK_NOERR )
- {
- DescribeError( err );
- return NULL;
- }
- err = talkback->TalkBackGetWordEndTime( analysis, index, &w.endtime );
- if ( err != TALKBACK_NOERR )
- {
- DescribeError( err );
- return NULL;
- }
-
- return &w;
-}
-
-//-----------------------------------------------------------------------------
-// Purpose:
-// Input : *w1 -
-// *w2 -
-// Output : Returns true on success, false on failure.
-//-----------------------------------------------------------------------------
-bool FuzzyWordMatch( char const *w1, char const *w2 )
-{
- int len1 = strlen( w1 );
- int len2 = strlen( w2 );
-
- int minlen = min( len1, len2 );
-
- // Found a match
- if ( !strnicmp( w1, w2, minlen ) )
- return true;
-
- int letterdiff = abs( len1 - len2 );
- // More than three letters different, don't bother
- if ( letterdiff > 5 )
- return false;
-
- // Compute a "delta"
- char *p1 = (char *)w1;
- char *p2 = (char *)w2;
-
- CUtlVector <char> word1;
- CUtlVector <char> word2;
-
- while ( *p1 )
- {
- if ( V_isalpha( *p1 ) )
- {
- word1.AddToTail( *p1 );
- }
- p1++;
- }
-
- while ( *p2 )
- {
- if ( V_isalpha( *p2 ) )
- {
- word2.AddToTail( *p2 );
- }
- p2++;
- }
-
- int i;
- for ( i = 0; i < word1.Size(); i++ )
- {
- char c = word1[ i ];
-
- // See if c is in word 2, if so subtract it out
- int idx = word2.Find( c );
-
- if ( idx != word2.InvalidIndex() )
- {
- word2.Remove( idx );
- }
- }
-
- if ( word2.Size() <= letterdiff )
- return true;
-
- word2.RemoveAll();
-
- while ( *p2 )
- {
- if ( V_isalpha( *p2 ) )
- {
- word2.AddToTail( *p2 );
- }
- p2++;
- }
-
- for ( i = 0; i < word2.Size(); i++ )
- {
- char c = word2[ i ];
-
- // See if c is in word 2, if so subtract it out
- int idx = word1.Find( c );
-
- if ( idx != word1.InvalidIndex() )
- {
- word1.Remove( idx );
- }
- }
-
- if ( word1.Size() <= letterdiff )
- return true;
-
- return false;
-}
-
-//-----------------------------------------------------------------------------
-// Purpose: For foreign language stuff, if inwords is empty, process anyway...
-// Input : *analysis -
-// outwords -
-//-----------------------------------------------------------------------------
-void CPhonemeExtractorLipSinc::ProcessWordsTextless( TALKBACK_ANALYSIS *analysis, CSentence& outwords )
-{
- long count;
-
- TALKBACK_ERR err = talkback->TalkBackGetNumPhonemes( analysis, &count );
- if ( err != TALKBACK_NOERR )
- {
- DescribeError( err );
- return;
- }
-
- CWordTag *newWord = new CWordTag;
-
- newWord->SetWord( TEXTLESS_WORDNAME );
-
- float starttime = 0.0f;
- float endtime = 1.0f;
-
-
- for ( int i = 0; i < count; ++i )
- {
- // Get phoneme and timing info
- CAnalyzedPhoneme *ph = GetAnalyzedPhoneme( analysis, i );
- if ( !ph )
- continue;
-
- CPhonemeTag *ptag = new CPhonemeTag;
-
- if ( i == 0 || ( ph->starttime < starttime ) )
- {
- starttime = ph->starttime;
- }
-
- if ( i == 0 || ( ph->endtime > endtime ) )
- {
- endtime = ph->endtime;
- }
-
- ptag->SetStartTime( ph->starttime );
- ptag->SetEndTime( ph->endtime );
-
- ptag->m_uiStartByte = ComputeByteFromTime( ph->starttime );
- ptag->m_uiEndByte = ComputeByteFromTime( ph->endtime );
-
- ptag->SetTag( ph->phoneme );
- ptag->SetPhonemeCode( TextToPhoneme( ptag->GetTag() ) );
-
- newWord->m_Phonemes.AddToTail( ptag );
- }
-
- newWord->m_flStartTime = starttime;
- newWord->m_flEndTime = endtime;
-
- newWord->m_uiStartByte = ComputeByteFromTime( starttime );
- newWord->m_uiEndByte = ComputeByteFromTime( endtime );
-
- outwords.Reset();
- outwords.AddWordTag( newWord );
- outwords.SetTextFromWords();
-}
-
-//-----------------------------------------------------------------------------
-// Purpose:
-// Input : *analysis -
-// inwords -
-// outwords -
-//-----------------------------------------------------------------------------
-void CPhonemeExtractorLipSinc::ProcessWords( TALKBACK_ANALYSIS *analysis, CSentence& inwords, CSentence& outwords )
-{
- long count;
-
- TALKBACK_ERR err = talkback->TalkBackGetNumWords( analysis, &count );
- if ( err != TALKBACK_NOERR )
- {
- DescribeError( err );
- return;
- }
-
- if ( count <= 0L )
- {
- if ( inwords.m_Words.Count() == 0 ||
- !Q_strnicmp( inwords.GetText(), TEXTLESS_WORDNAME, Q_strlen( TEXTLESS_WORDNAME ) ) )
- {
- ProcessWordsTextless( analysis, outwords );
- }
- return;
- }
-
- // Bogus
- if ( count >= 100000L )
- return;
-
- int inwordpos = 0;
- int awordpos = 0;
-
- outwords.Reset();
-
- char previous[ 256 ];
- previous[ 0 ] = 0;
-
- while ( inwordpos < inwords.m_Words.Size() )
- {
- CWordTag *in = inwords.m_Words[ inwordpos ];
-
- if ( awordpos >= count )
- {
- // Just copy the rest over without phonemes
- CWordTag *copy = new CWordTag( *in );
-
- outwords.AddWordTag( copy );
-
- inwordpos++;
- continue;
- }
-
- // Should never fail
- CAnalyzedWord *w = GetAnalyzedWord( analysis, awordpos );
- if ( !w )
- {
- return;
- }
-
- if ( !stricmp( w->buffer, "<SIL>" ) )
- {
- awordpos++;
- continue;
- }
-
- char const *check = ApplyTBWordRules( in->GetWord() );
- if ( !FuzzyWordMatch( check, w->buffer ) )
- {
- bool advance_input = true;
- if ( previous[ 0 ] )
- {
- if ( FuzzyWordMatch( previous, w->buffer ) )
- {
- advance_input = false;
- }
- }
-
- if ( advance_input )
- {
- inwordpos++;
- }
- awordpos++;
- continue;
- }
- strcpy( previous, check );
-
- CWordTag *newWord = new CWordTag;
-
- newWord->SetWord( in->GetWord() );
-
- newWord->m_flStartTime = w->starttime;
- newWord->m_flEndTime = w->endtime;
-
- newWord->m_uiStartByte = ComputeByteFromTime( w->starttime );
- newWord->m_uiEndByte = ComputeByteFromTime( w->endtime );
-
- int phonemestart, phonemeend;
-
- phonemestart = GetPhonemeIndexAtWordStart( analysis, w->starttime );
- phonemeend = GetPhonemeIndexAtWordEnd( analysis, w->endtime );
-
- if ( phonemestart >= 0 && phonemeend >= 0 )
- {
- for ( ; phonemestart <= phonemeend; phonemestart++ )
- {
- // Get phoneme and timing info
- CAnalyzedPhoneme *ph = GetAnalyzedPhoneme( analysis, phonemestart );
- if ( !ph )
- continue;
-
- CPhonemeTag *ptag = new CPhonemeTag;
- ptag->SetStartTime( ph->starttime );
- ptag->SetEndTime( ph->endtime );
-
- ptag->m_uiStartByte = ComputeByteFromTime( ph->starttime );
- ptag->m_uiEndByte = ComputeByteFromTime( ph->endtime );
-
- ptag->SetTag( ph->phoneme );
- ptag->SetPhonemeCode( TextToPhoneme( ptag->GetTag() ) );
-
- newWord->m_Phonemes.AddToTail( ptag );
- }
- }
-
- outwords.AddWordTag( newWord );
- inwordpos++;
- awordpos++;
- }
-}
-
-char const *CPhonemeExtractorLipSinc::ApplyTBWordRules( char const *word )
-{
- static char outword[ 256 ];
-
- char const *in = word;
- char *out = outword;
-
- while ( *in && ( ( out - outword ) <= 255 ) )
- {
- if ( *in == '\t' ||
- *in == ' ' ||
- *in == '\n' ||
- *in == '-' ||
- *in == '.' ||
- *in == ',' ||
- *in == ';' ||
- *in == '?' ||
- *in == '"' ||
- *in == ':' ||
- *in == '(' ||
- *in == ')' )
- {
- in++;
- *out++ = ' ';
- continue;
- }
-
- if ( !V_isprint( *in ) )
- {
- in++;
- continue;
- }
-
- if ( *in >= 128 )
- {
- in++;
- continue;
- }
-
- // Skip numbers
- if ( *in >= '0' && *in <= '9' )
- {
- in++;
- continue;
- }
-
- // Convert all letters to upper case
- if ( *in >= 'a' && *in <= 'z' )
- {
- *out++ = ( *in++ ) - 'a' + 'A';
- continue;
- }
-
- if ( *in >= 'A' && *in <= 'Z' )
- {
- *out++ = *in++;
- continue;
- }
-
- if ( *in == '\'' )
- {
- *out++ = *in++;
- continue;
- }
-
- in++;
- }
-
- *out = 0;
-
- return outword;
-}
-
-//-----------------------------------------------------------------------------
-// Purpose: Given a wavfile and a list of inwords, determines the word/phonene
-// sample counts for the sentce
-// Output : SR_RESULT
-//-----------------------------------------------------------------------------
-SR_RESULT CPhonemeExtractorLipSinc::Extract(
- const char *wavfile,
- int numsamples,
- void (*pfnPrint)( const char *fmt, ... ),
- CSentence& inwords,
- CSentence& outwords )
-{
- // g_enableTalkBackDebuggingOutput = 1;
-
- m_pfnPrint = pfnPrint;
-
- if ( !InitLipSinc() )
- {
- return SR_RESULT_ERROR;
- }
-
- m_flSampleCount = numsamples;
-
- if ( !CheckSoundFile( wavfile ) )
- {
- FreeLibrary( m_hHelper );
- return SR_RESULT_ERROR;
- }
-
- TALKBACK_ANALYSIS *analysis = NULL;
-
- if ( !AttemptAnalysis( &analysis, wavfile, inwords ) )
- {
- FreeLibrary( m_hHelper );
- return SR_RESULT_FAILED;
- }
-
- if ( strlen( inwords.GetText() ) <= 0 )
- {
- inwords.SetTextFromWords();
- }
-
- outwords = inwords;
-
- // Examine data
- ProcessWords( analysis, inwords, outwords );
-
- if ( analysis )
- {
- talkback->TalkBackFreeAnalysis( &analysis );
- }
-
- return SR_RESULT_SUCCESS;
-}
-
+//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +// $NoKeywords: $ +//=============================================================================// +#include <stdio.h> +#include <stdarg.h> +#include <memory.h> +#include <windows.h> +#include <mmsystem.h> +#include <mmreg.h> +#include <sys/types.h> +#include <sys/stat.h> + +#include "phonemeextractor/PhonemeExtractor.h" +#include "ims_helper/ims_helper.h" + +#include "tier0/dbg.h" +#include "sentence.h" +#include "PhonemeConverter.h" +#include "tier1/strtools.h" + +#define TEXTLESS_WORDNAME "[Textless]" + +static IImsHelper *talkback = NULL; + +//----------------------------------------------------------------------------- +// Purpose: Expose the interface +//----------------------------------------------------------------------------- +class CPhonemeExtractorLipSinc : public IPhonemeExtractor +{ +public: + virtual PE_APITYPE GetAPIType() const + { + return SPEECH_API_LIPSINC; + } + + // Used for menus, etc + virtual char const *GetName() const + { + return "IMS (LipSinc)"; + } + + SR_RESULT Extract( + const char *wavfile, + int numsamples, + void (*pfnPrint)( const char *fmt, ... ), + CSentence& inwords, + CSentence& outwords ); + + + CPhonemeExtractorLipSinc( void ); + ~CPhonemeExtractorLipSinc( void ); + + enum + { + MAX_WORD_LENGTH = 128, + }; +private: + + + class CAnalyzedWord + { + public: + char buffer[ MAX_WORD_LENGTH ]; + double starttime; + double endtime; + }; + + class CAnalyzedPhoneme + { + public: + char phoneme[ 32 ]; + double starttime; + double endtime; + }; + + bool InitLipSinc( void ); + void ShutdownLipSinc( void ); + + void DescribeError( TALKBACK_ERR err ); + void Printf( char const *fmt, ... ); + + bool CheckSoundFile( char const *filename ); + bool GetInitialized( void ); + void SetInitialized( bool init ); + + void (*m_pfnPrint)( const char *fmt, ... ); + + char const *ConstructInputSentence( CSentence& inwords ); + bool AttemptAnalysis( TALKBACK_ANALYSIS **ppAnalysis, char const *wavfile, CSentence& inwords ); + + char const *ApplyTBWordRules( char const *word ); + + void ProcessWords( TALKBACK_ANALYSIS *analysis, CSentence& inwords, CSentence& outwords ); + void ProcessWordsTextless( TALKBACK_ANALYSIS *analysis, CSentence& outwords ); + + int GetPhonemeIndexAtWord( TALKBACK_ANALYSIS *analysis, double time, bool checkstart ); + + int GetPhonemeIndexAtWordStart( TALKBACK_ANALYSIS *analysis, double starttime ); + int GetPhonemeIndexAtWordEnd( TALKBACK_ANALYSIS *analysis, double endtime ); + + CAnalyzedWord *GetAnalyzedWord( TALKBACK_ANALYSIS *analysis, int index ); + CAnalyzedPhoneme *GetAnalyzedPhoneme( TALKBACK_ANALYSIS *analysis, int index ); + + int ComputeByteFromTime( float time ); + + bool m_bInitialized; + + float m_flSampleCount; + float m_flDuration; + + float m_flSamplesPerSecond; + + int m_nBytesPerSample; + + HMODULE m_hHelper; +}; + +CPhonemeExtractorLipSinc::CPhonemeExtractorLipSinc( void ) +{ + m_hHelper = (HMODULE)0; + m_pfnPrint = NULL; + + m_bInitialized = false; + + m_flSampleCount = 0.0f; + m_flDuration = 0.0f; + + m_flSamplesPerSecond = 0.0f; + + m_nBytesPerSample = 0; +} + +CPhonemeExtractorLipSinc::~CPhonemeExtractorLipSinc( void ) +{ + if ( GetInitialized() ) + { + ShutdownLipSinc(); + } +} + +bool CPhonemeExtractorLipSinc::GetInitialized( void ) +{ + return m_bInitialized; +} + +void CPhonemeExtractorLipSinc::SetInitialized( bool init ) +{ + m_bInitialized = init; +} + +int CPhonemeExtractorLipSinc::ComputeByteFromTime( float time ) +{ + if ( !m_flDuration ) + return 0; + + float frac = time / m_flDuration; + + float sampleNumber = frac * m_flSampleCount; + + int bytenumber = sampleNumber * m_nBytesPerSample; + + return bytenumber; +} + +void CPhonemeExtractorLipSinc::DescribeError( TALKBACK_ERR err ) +{ + Assert( m_pfnPrint ); + + // Get the error description. + char errorDesc[256] = ""; + if ( err != TALKBACK_NOERR ) + { + talkback->TalkBackGetErrorString( err, sizeof(errorDesc), errorDesc ); + } + + // Report or log the error... + (*m_pfnPrint)( "LIPSINC ERROR: %s\n", errorDesc ); +} + +//----------------------------------------------------------------------------- +// Purpose: +// Input : *fmt - +// .. - +//----------------------------------------------------------------------------- +void CPhonemeExtractorLipSinc::Printf( char const *fmt, ... ) +{ + Assert( m_pfnPrint ); + + char string[ 4096 ]; + + va_list argptr; + va_start( argptr, fmt ); + vsprintf( string, fmt, argptr ); + va_end( argptr ); + + (*m_pfnPrint)( "%s", string ); +} + +bool CPhonemeExtractorLipSinc::CheckSoundFile( char const *filename ) +{ + TALKBACK_SOUND_FILE_METRICS fm; + memset( &fm, 0, sizeof( fm ) ); + fm.m_size = sizeof( fm ); + + TALKBACK_ERR err = talkback->TalkBackGetSoundFileMetrics( filename, &fm ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return false; + } + + if ( fm.m_canBeAnalyzed ) + { + Printf( "%s: %.2f s, rate %i, bits %i, channels %i\n", + filename, + fm.m_duration, + fm.m_sampleRate, + fm.m_bitsPerSample, + fm.m_channelCount ); + } + + m_flDuration = fm.m_duration; + if ( m_flDuration > 0 ) + { + m_flSamplesPerSecond = m_flSampleCount / m_flDuration; + } + else + { + m_flSamplesPerSecond = 0.0f; + } + + m_nBytesPerSample = ( fm.m_bitsPerSample >> 3 ); + + m_flSampleCount /= m_nBytesPerSample; + + m_nBytesPerSample /= fm.m_channelCount; + + return fm.m_canBeAnalyzed ? true : false; +} + +typedef IImsHelper *(*pfnImsHelper)(void); + +//----------------------------------------------------------------------------- +// Purpose: +// Output : Returns true on success, false on failure. +//----------------------------------------------------------------------------- +bool CPhonemeExtractorLipSinc::InitLipSinc( void ) +{ + if ( GetInitialized() ) + { + return true; + } + + m_hHelper = LoadLibrary( "ims_helper.dll" ); + if ( !m_hHelper ) + { + return false; + } + + pfnImsHelper factory = (pfnImsHelper)::GetProcAddress( m_hHelper, "GetImsHelper" ); + if ( !factory ) + { + FreeLibrary( m_hHelper ); + return false; + } + + talkback = reinterpret_cast< IImsHelper * >( (*factory)() ); + if ( !talkback ) + { + FreeLibrary( m_hHelper ); + return false; + } + + char szExeName[ MAX_PATH ]; + szExeName[0] = 0; + GetModuleFileName( (HMODULE)0, szExeName, sizeof( szExeName ) ); + + char szBaseDir[ MAX_PATH ]; + Q_strncpy( szBaseDir, szExeName, sizeof( szBaseDir ) ); + + Q_StripLastDir( szBaseDir, sizeof( szBaseDir ) ); + Q_StripTrailingSlash( szBaseDir ); + Q_strlower( szBaseDir ); + + char coreDataDir[ 512 ]; + Q_snprintf( coreDataDir, sizeof( coreDataDir ), "%s\\lipsinc_data\\", + szBaseDir ); + Q_FixSlashes( coreDataDir ); + + char szCheck[ 512 ]; + Q_snprintf( szCheck, sizeof( szCheck ), "%sDtC6dal.dat", coreDataDir ); + struct __stat64 buf; + + if ( _stat64( szCheck, &buf ) != 0 ) + { + Q_snprintf( coreDataDir, sizeof( coreDataDir ), "%s\\bin\\lipsinc_data\\", + szBaseDir ); + Q_FixSlashes( coreDataDir ); + Q_snprintf( szCheck, sizeof( szCheck ), "%sDtC6dal.dat", coreDataDir ); + + if ( _stat64( szCheck, &buf ) != 0 ) + { + Error( "Unable to find talkback data files in %s.", coreDataDir ); + } + } + + TALKBACK_ERR err; + + err = talkback->TalkBackStartupLibrary( coreDataDir ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + FreeLibrary( m_hHelper ); + return false; + } + + long verMajor = 0; + long verMinor = 0; + long verRevision = 0; + + err = talkback->TalkBackGetVersion( + &verMajor, + &verMinor, + &verRevision); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + FreeLibrary( m_hHelper ); + return false; + } + + Printf( "Lipsinc TalkBack Version %i.%i.%i\n", verMajor, verMinor, verRevision ); + + m_bInitialized = true; + + return true; +} + +//----------------------------------------------------------------------------- +// Purpose: +//----------------------------------------------------------------------------- +void CPhonemeExtractorLipSinc::ShutdownLipSinc( void ) +{ + // HACK HACK: This seems to crash on exit sometimes + __try + { + talkback->TalkBackShutdownLibrary(); + + FreeLibrary( m_hHelper ); + } + __except(EXCEPTION_EXECUTE_HANDLER ) + { + OutputDebugString( "----> Crash shutting down TALKBACK sdk, exception caught and ignored\n" ); + } +} + +//----------------------------------------------------------------------------- +// Purpose: +// Input : inwords - +// Output : char const +//----------------------------------------------------------------------------- +char const *CPhonemeExtractorLipSinc::ConstructInputSentence( CSentence& inwords ) +{ + static char sentence[ 16384 ]; + + sentence[ 0 ] = 0; + + int last = inwords.m_Words.Size() - 1; + + for ( int i = 0 ; i <= last; i++ ) + { + CWordTag *w = inwords.m_Words[ i ]; + + strcat( sentence, w->GetWord() ); + if ( i != last ) + { + strcat( sentence, " " ); + } + } + + if ( inwords.m_Words.Count() == 1 && + !Q_strnicmp( inwords.GetText(), TEXTLESS_WORDNAME, Q_strlen( TEXTLESS_WORDNAME ) ) ) + { + sentence[ 0 ] = 0; + } + + return sentence; +} + +bool CPhonemeExtractorLipSinc::AttemptAnalysis( TALKBACK_ANALYSIS **ppAnalysis, char const *wavfile, CSentence& inwords ) +{ + *ppAnalysis = NULL; + + TALKBACK_ANALYSIS_SETTINGS settings; + memset( &settings, 0, sizeof( settings ) ); + + // Set this field to sizeof(TALKBACK_ANALYSIS_SETTINGS) before using the + // structure. + settings.fSize = sizeof( TALKBACK_ANALYSIS_SETTINGS ); + + + // Default value: 30 (frames per second). + settings.fFrameRate = 100; + // Set this to 1 to optimize for flipbook output, 0 to do analysis normally. + // + // Default value: 0 (normal analysis). + settings.fOptimizeForFlipbook = 0; + // Set this to -1 to seed the random number generator with the current time. + // Any other number will be used directly for the random number seed, which + // is useful if you want repeatable speech gestures. This value does not + // influence lip-synching at all. + // + // Default value: -1 (use current time). + settings.fRandomSeed = -1; + // Path to the configuration (.INI) file with phoneme-to-speech-target + // mapping. Set this to NULL to use the default mapping. + // + // Default value: NULL (use default mapping). + settings.fConfigFile = NULL; + + char const *text = ConstructInputSentence( inwords ); + + Printf( "Analyzing: \"%s\"\n", text[ 0 ] ? text : TEXTLESS_WORDNAME ); + + TALKBACK_ERR err = talkback->TalkBackGetAnalysis( + ppAnalysis, + wavfile, + text, + &settings ); + + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return false; + } + + Printf( "Analysis successful...\n" ); + + return true; +} + +typedef struct +{ + TALKBACK_PHONEME phoneme; + char const *string; +} TBPHONEMES_t; + +static TBPHONEMES_t g_TBPhonemeList[]= +{ + { TALKBACK_PHONEME_IY, "iy" }, + { TALKBACK_PHONEME_IH, "ih" }, + { TALKBACK_PHONEME_EH, "eh" }, + { TALKBACK_PHONEME_EY, "ey" }, + { TALKBACK_PHONEME_AE, "ae" }, + { TALKBACK_PHONEME_AA, "aa" }, + { TALKBACK_PHONEME_AW, "aw" }, + { TALKBACK_PHONEME_AY, "ay" }, + { TALKBACK_PHONEME_AH, "ah" }, + { TALKBACK_PHONEME_AO, "ao" }, + { TALKBACK_PHONEME_OY, "oy" }, + { TALKBACK_PHONEME_OW, "ow" }, + { TALKBACK_PHONEME_UH, "uh" }, + { TALKBACK_PHONEME_UW, "uw" }, + { TALKBACK_PHONEME_ER, "er" }, + { TALKBACK_PHONEME_AX, "ax" }, + { TALKBACK_PHONEME_S, "s" }, + { TALKBACK_PHONEME_SH, "sh" }, + { TALKBACK_PHONEME_Z, "z" }, + { TALKBACK_PHONEME_ZH, "zh" }, + { TALKBACK_PHONEME_F, "f" }, + { TALKBACK_PHONEME_TH, "th" }, + { TALKBACK_PHONEME_V, "v" }, + { TALKBACK_PHONEME_DH, "dh" }, + { TALKBACK_PHONEME_M, "m" }, + { TALKBACK_PHONEME_N, "n" }, + { TALKBACK_PHONEME_NG, "ng" }, + { TALKBACK_PHONEME_L, "l" }, + { TALKBACK_PHONEME_R, "r" }, + { TALKBACK_PHONEME_W, "w" }, + { TALKBACK_PHONEME_Y, "y" }, + { TALKBACK_PHONEME_HH, "hh" }, + { TALKBACK_PHONEME_B, "b" }, + { TALKBACK_PHONEME_D, "d" }, + { TALKBACK_PHONEME_JH, "jh" }, + { TALKBACK_PHONEME_G, "g" }, + { TALKBACK_PHONEME_P, "p" }, + { TALKBACK_PHONEME_T, "t" }, + { TALKBACK_PHONEME_K, "k" }, + { TALKBACK_PHONEME_CH, "ch" }, + { TALKBACK_PHONEME_SIL, "<sil>" }, + { -1, NULL } +}; + +char const *TBPhonemeToString( TALKBACK_PHONEME phoneme ) +{ + if ( phoneme < TALKBACK_PHONEME_FIRST || phoneme > TALKBACK_PHONEME_LAST ) + { + return "Bogus"; + } + + TBPHONEMES_t *item = &g_TBPhonemeList[ phoneme ]; + return item->string; +} + +//----------------------------------------------------------------------------- +// Purpose: +// Input : *analysis - +// time - +// start - +// Output : int +//----------------------------------------------------------------------------- +int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWord( TALKBACK_ANALYSIS *analysis, double time, bool start ) +{ + long count; + + TALKBACK_ERR err = talkback->TalkBackGetNumPhonemes( analysis, &count ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return -1; + } + + if ( count <= 0L ) + return -1; + + // Bogus + if ( count >= 100000L ) + return -1; + + for ( int i = 0; i < (int)count; i++ ) + { + TALKBACK_PHONEME tbPhoneme = TALKBACK_PHONEME_INVALID; + err = talkback->TalkBackGetPhonemeEnum( analysis, i, &tbPhoneme ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + continue; + } + + double t; + + if ( start ) + { + err = talkback->TalkBackGetPhonemeStartTime( analysis, i, &t ); + } + else + { + err = talkback->TalkBackGetPhonemeEndTime( analysis, i, &t ); + } + + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + continue; + } + + if ( t == time ) + { + return i; + } + } + + return -1; +} + +//----------------------------------------------------------------------------- +// Purpose: +// Input : *analysis - +// starttime - +// Output : int +//----------------------------------------------------------------------------- +int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWordStart( TALKBACK_ANALYSIS *analysis, double starttime ) +{ + return GetPhonemeIndexAtWord( analysis, starttime, true ); +} + +//----------------------------------------------------------------------------- +// Purpose: +// Input : *analysis - +// endtime - +// Output : int +//----------------------------------------------------------------------------- +int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWordEnd( TALKBACK_ANALYSIS *analysis, double endtime ) +{ + return GetPhonemeIndexAtWord( analysis, endtime, false ); +} + +CPhonemeExtractorLipSinc::CAnalyzedPhoneme *CPhonemeExtractorLipSinc::GetAnalyzedPhoneme( TALKBACK_ANALYSIS *analysis, int index ) +{ + static CAnalyzedPhoneme p; + + memset( &p, 0, sizeof( p ) ); + + TALKBACK_PHONEME tb; + + TALKBACK_ERR err = talkback->TalkBackGetPhonemeEnum( analysis, index, &tb ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return NULL; + } + + strcpy( p.phoneme, TBPhonemeToString( tb ) ); + + err = talkback->TalkBackGetPhonemeStartTime( analysis, index, &p.starttime ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return NULL; + } + err = talkback->TalkBackGetPhonemeEndTime( analysis, index, &p.endtime ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return NULL; + } + + return &p; +} + +CPhonemeExtractorLipSinc::CAnalyzedWord *CPhonemeExtractorLipSinc::GetAnalyzedWord( TALKBACK_ANALYSIS *analysis, int index ) +{ + static CAnalyzedWord w; + + memset( &w, 0, sizeof( w ) ); + + long chars = sizeof( w.buffer ); + + TALKBACK_ERR err = talkback->TalkBackGetWord( analysis, index, chars, w.buffer ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return NULL; + } + + err = talkback->TalkBackGetWordStartTime( analysis, index, &w.starttime ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return NULL; + } + err = talkback->TalkBackGetWordEndTime( analysis, index, &w.endtime ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return NULL; + } + + return &w; +} + +//----------------------------------------------------------------------------- +// Purpose: +// Input : *w1 - +// *w2 - +// Output : Returns true on success, false on failure. +//----------------------------------------------------------------------------- +bool FuzzyWordMatch( char const *w1, char const *w2 ) +{ + int len1 = strlen( w1 ); + int len2 = strlen( w2 ); + + int minlen = min( len1, len2 ); + + // Found a match + if ( !strnicmp( w1, w2, minlen ) ) + return true; + + int letterdiff = abs( len1 - len2 ); + // More than three letters different, don't bother + if ( letterdiff > 5 ) + return false; + + // Compute a "delta" + char *p1 = (char *)w1; + char *p2 = (char *)w2; + + CUtlVector <char> word1; + CUtlVector <char> word2; + + while ( *p1 ) + { + if ( V_isalpha( *p1 ) ) + { + word1.AddToTail( *p1 ); + } + p1++; + } + + while ( *p2 ) + { + if ( V_isalpha( *p2 ) ) + { + word2.AddToTail( *p2 ); + } + p2++; + } + + int i; + for ( i = 0; i < word1.Size(); i++ ) + { + char c = word1[ i ]; + + // See if c is in word 2, if so subtract it out + int idx = word2.Find( c ); + + if ( idx != word2.InvalidIndex() ) + { + word2.Remove( idx ); + } + } + + if ( word2.Size() <= letterdiff ) + return true; + + word2.RemoveAll(); + + while ( *p2 ) + { + if ( V_isalpha( *p2 ) ) + { + word2.AddToTail( *p2 ); + } + p2++; + } + + for ( i = 0; i < word2.Size(); i++ ) + { + char c = word2[ i ]; + + // See if c is in word 2, if so subtract it out + int idx = word1.Find( c ); + + if ( idx != word1.InvalidIndex() ) + { + word1.Remove( idx ); + } + } + + if ( word1.Size() <= letterdiff ) + return true; + + return false; +} + +//----------------------------------------------------------------------------- +// Purpose: For foreign language stuff, if inwords is empty, process anyway... +// Input : *analysis - +// outwords - +//----------------------------------------------------------------------------- +void CPhonemeExtractorLipSinc::ProcessWordsTextless( TALKBACK_ANALYSIS *analysis, CSentence& outwords ) +{ + long count; + + TALKBACK_ERR err = talkback->TalkBackGetNumPhonemes( analysis, &count ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return; + } + + CWordTag *newWord = new CWordTag; + + newWord->SetWord( TEXTLESS_WORDNAME ); + + float starttime = 0.0f; + float endtime = 1.0f; + + + for ( int i = 0; i < count; ++i ) + { + // Get phoneme and timing info + CAnalyzedPhoneme *ph = GetAnalyzedPhoneme( analysis, i ); + if ( !ph ) + continue; + + CPhonemeTag *ptag = new CPhonemeTag; + + if ( i == 0 || ( ph->starttime < starttime ) ) + { + starttime = ph->starttime; + } + + if ( i == 0 || ( ph->endtime > endtime ) ) + { + endtime = ph->endtime; + } + + ptag->SetStartTime( ph->starttime ); + ptag->SetEndTime( ph->endtime ); + + ptag->m_uiStartByte = ComputeByteFromTime( ph->starttime ); + ptag->m_uiEndByte = ComputeByteFromTime( ph->endtime ); + + ptag->SetTag( ph->phoneme ); + ptag->SetPhonemeCode( TextToPhoneme( ptag->GetTag() ) ); + + newWord->m_Phonemes.AddToTail( ptag ); + } + + newWord->m_flStartTime = starttime; + newWord->m_flEndTime = endtime; + + newWord->m_uiStartByte = ComputeByteFromTime( starttime ); + newWord->m_uiEndByte = ComputeByteFromTime( endtime ); + + outwords.Reset(); + outwords.AddWordTag( newWord ); + outwords.SetTextFromWords(); +} + +//----------------------------------------------------------------------------- +// Purpose: +// Input : *analysis - +// inwords - +// outwords - +//----------------------------------------------------------------------------- +void CPhonemeExtractorLipSinc::ProcessWords( TALKBACK_ANALYSIS *analysis, CSentence& inwords, CSentence& outwords ) +{ + long count; + + TALKBACK_ERR err = talkback->TalkBackGetNumWords( analysis, &count ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return; + } + + if ( count <= 0L ) + { + if ( inwords.m_Words.Count() == 0 || + !Q_strnicmp( inwords.GetText(), TEXTLESS_WORDNAME, Q_strlen( TEXTLESS_WORDNAME ) ) ) + { + ProcessWordsTextless( analysis, outwords ); + } + return; + } + + // Bogus + if ( count >= 100000L ) + return; + + int inwordpos = 0; + int awordpos = 0; + + outwords.Reset(); + + char previous[ 256 ]; + previous[ 0 ] = 0; + + while ( inwordpos < inwords.m_Words.Size() ) + { + CWordTag *in = inwords.m_Words[ inwordpos ]; + + if ( awordpos >= count ) + { + // Just copy the rest over without phonemes + CWordTag *copy = new CWordTag( *in ); + + outwords.AddWordTag( copy ); + + inwordpos++; + continue; + } + + // Should never fail + CAnalyzedWord *w = GetAnalyzedWord( analysis, awordpos ); + if ( !w ) + { + return; + } + + if ( !stricmp( w->buffer, "<SIL>" ) ) + { + awordpos++; + continue; + } + + char const *check = ApplyTBWordRules( in->GetWord() ); + if ( !FuzzyWordMatch( check, w->buffer ) ) + { + bool advance_input = true; + if ( previous[ 0 ] ) + { + if ( FuzzyWordMatch( previous, w->buffer ) ) + { + advance_input = false; + } + } + + if ( advance_input ) + { + inwordpos++; + } + awordpos++; + continue; + } + strcpy( previous, check ); + + CWordTag *newWord = new CWordTag; + + newWord->SetWord( in->GetWord() ); + + newWord->m_flStartTime = w->starttime; + newWord->m_flEndTime = w->endtime; + + newWord->m_uiStartByte = ComputeByteFromTime( w->starttime ); + newWord->m_uiEndByte = ComputeByteFromTime( w->endtime ); + + int phonemestart, phonemeend; + + phonemestart = GetPhonemeIndexAtWordStart( analysis, w->starttime ); + phonemeend = GetPhonemeIndexAtWordEnd( analysis, w->endtime ); + + if ( phonemestart >= 0 && phonemeend >= 0 ) + { + for ( ; phonemestart <= phonemeend; phonemestart++ ) + { + // Get phoneme and timing info + CAnalyzedPhoneme *ph = GetAnalyzedPhoneme( analysis, phonemestart ); + if ( !ph ) + continue; + + CPhonemeTag *ptag = new CPhonemeTag; + ptag->SetStartTime( ph->starttime ); + ptag->SetEndTime( ph->endtime ); + + ptag->m_uiStartByte = ComputeByteFromTime( ph->starttime ); + ptag->m_uiEndByte = ComputeByteFromTime( ph->endtime ); + + ptag->SetTag( ph->phoneme ); + ptag->SetPhonemeCode( TextToPhoneme( ptag->GetTag() ) ); + + newWord->m_Phonemes.AddToTail( ptag ); + } + } + + outwords.AddWordTag( newWord ); + inwordpos++; + awordpos++; + } +} + +char const *CPhonemeExtractorLipSinc::ApplyTBWordRules( char const *word ) +{ + static char outword[ 256 ]; + + char const *in = word; + char *out = outword; + + while ( *in && ( ( out - outword ) <= 255 ) ) + { + if ( *in == '\t' || + *in == ' ' || + *in == '\n' || + *in == '-' || + *in == '.' || + *in == ',' || + *in == ';' || + *in == '?' || + *in == '"' || + *in == ':' || + *in == '(' || + *in == ')' ) + { + in++; + *out++ = ' '; + continue; + } + + if ( !V_isprint( *in ) ) + { + in++; + continue; + } + + if ( *in >= 128 ) + { + in++; + continue; + } + + // Skip numbers + if ( *in >= '0' && *in <= '9' ) + { + in++; + continue; + } + + // Convert all letters to upper case + if ( *in >= 'a' && *in <= 'z' ) + { + *out++ = ( *in++ ) - 'a' + 'A'; + continue; + } + + if ( *in >= 'A' && *in <= 'Z' ) + { + *out++ = *in++; + continue; + } + + if ( *in == '\'' ) + { + *out++ = *in++; + continue; + } + + in++; + } + + *out = 0; + + return outword; +} + +//----------------------------------------------------------------------------- +// Purpose: Given a wavfile and a list of inwords, determines the word/phonene +// sample counts for the sentce +// Output : SR_RESULT +//----------------------------------------------------------------------------- +SR_RESULT CPhonemeExtractorLipSinc::Extract( + const char *wavfile, + int numsamples, + void (*pfnPrint)( const char *fmt, ... ), + CSentence& inwords, + CSentence& outwords ) +{ + // g_enableTalkBackDebuggingOutput = 1; + + m_pfnPrint = pfnPrint; + + if ( !InitLipSinc() ) + { + return SR_RESULT_ERROR; + } + + m_flSampleCount = numsamples; + + if ( !CheckSoundFile( wavfile ) ) + { + FreeLibrary( m_hHelper ); + return SR_RESULT_ERROR; + } + + TALKBACK_ANALYSIS *analysis = NULL; + + if ( !AttemptAnalysis( &analysis, wavfile, inwords ) ) + { + FreeLibrary( m_hHelper ); + return SR_RESULT_FAILED; + } + + if ( strlen( inwords.GetText() ) <= 0 ) + { + inwords.SetTextFromWords(); + } + + outwords = inwords; + + // Examine data + ProcessWords( analysis, inwords, outwords ); + + if ( analysis ) + { + talkback->TalkBackFreeAnalysis( &analysis ); + } + + return SR_RESULT_SUCCESS; +} + EXPOSE_SINGLE_INTERFACE( CPhonemeExtractorLipSinc, IPhonemeExtractor, VPHONEME_EXTRACTOR_INTERFACE );
\ No newline at end of file |