diff options
| author | Narendra Umate <[email protected]> | 2013-12-02 23:36:05 -0800 |
|---|---|---|
| committer | Narendra Umate <[email protected]> | 2013-12-02 23:36:05 -0800 |
| commit | 8737f191f3b59f001a77bf6c08091109211c1c9f (patch) | |
| tree | dbbf05c004d9b026f2c1f23f06600fe0add82c36 /mp/src/utils/phonemeextractor | |
| parent | Update .gitignore. (diff) | |
| parent | Make .xcconfigs text files too. (diff) | |
| download | source-sdk-2013-8737f191f3b59f001a77bf6c08091109211c1c9f.tar.xz source-sdk-2013-8737f191f3b59f001a77bf6c08091109211c1c9f.zip | |
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'mp/src/utils/phonemeextractor')
| -rw-r--r-- | mp/src/utils/phonemeextractor/extractor_utils.cpp | 54 | ||||
| -rw-r--r-- | mp/src/utils/phonemeextractor/phonemeextractor.cpp | 2848 | ||||
| -rw-r--r-- | mp/src/utils/phonemeextractor/phonemeextractor.vpc | 166 | ||||
| -rw-r--r-- | mp/src/utils/phonemeextractor/phonemeextractor_ims.cpp | 2148 | ||||
| -rw-r--r-- | mp/src/utils/phonemeextractor/phonemeextractor_ims.vpc | 196 | ||||
| -rw-r--r-- | mp/src/utils/phonemeextractor/talkback.h | 1464 |
6 files changed, 3438 insertions, 3438 deletions
diff --git a/mp/src/utils/phonemeextractor/extractor_utils.cpp b/mp/src/utils/phonemeextractor/extractor_utils.cpp index ba927f04..8b8ff98b 100644 --- a/mp/src/utils/phonemeextractor/extractor_utils.cpp +++ b/mp/src/utils/phonemeextractor/extractor_utils.cpp @@ -1,28 +1,28 @@ -//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-//=============================================================================//
-#include <windows.h>
-#include <stdio.h>
-
-//-----------------------------------------------------------------------------
-// Purpose: converts an english string to unicode
-//-----------------------------------------------------------------------------
-int ConvertANSIToUnicode(const char *ansi, wchar_t *unicode, int unicodeBufferSize)
-{
- return ::MultiByteToWideChar(CP_ACP, 0, ansi, -1, unicode, unicodeBufferSize);
-}
-
-char *va( const char *fmt, ... )
-{
- va_list args;
- static char output[4][1024];
- static int outbuffer = 0;
-
- outbuffer++;
- va_start( args, fmt );
- vprintf( fmt, args );
- vsprintf( output[ outbuffer & 3 ], fmt, args );
- return output[ outbuffer & 3 ];
+//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +//=============================================================================// +#include <windows.h> +#include <stdio.h> + +//----------------------------------------------------------------------------- +// Purpose: converts an english string to unicode +//----------------------------------------------------------------------------- +int ConvertANSIToUnicode(const char *ansi, wchar_t *unicode, int unicodeBufferSize) +{ + return ::MultiByteToWideChar(CP_ACP, 0, ansi, -1, unicode, unicodeBufferSize); +} + +char *va( const char *fmt, ... ) +{ + va_list args; + static char output[4][1024]; + static int outbuffer = 0; + + outbuffer++; + va_start( args, fmt ); + vprintf( fmt, args ); + vsprintf( output[ outbuffer & 3 ], fmt, args ); + return output[ outbuffer & 3 ]; }
\ No newline at end of file diff --git a/mp/src/utils/phonemeextractor/phonemeextractor.cpp b/mp/src/utils/phonemeextractor/phonemeextractor.cpp index 8dfc8439..271f1850 100644 --- a/mp/src/utils/phonemeextractor/phonemeextractor.cpp +++ b/mp/src/utils/phonemeextractor/phonemeextractor.cpp @@ -1,1425 +1,1425 @@ -//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-// $NoKeywords: $
-//
-//=============================================================================//
-// extracephonemes.cpp : Defines the entry point for the console application.
-//
-#define PROTECTED_THINGS_DISABLE
-
-#include "tier0/wchartypes.h"
-#include <stdio.h>
-#include <windows.h>
-#include <tchar.h>
-#include "sphelper.h"
-#include "spddkhlp.h"
-// ATL Header Files
-#include <atlbase.h>
-// Face poser and util includes
-#include "utlvector.h"
-#include "phonemeextractor/PhonemeExtractor.h"
-#include "PhonemeConverter.h"
-#include "sentence.h"
-#include "tier0/dbg.h"
-#include "tier0/icommandline.h"
-#include "filesystem.h"
-
-// Extract phoneme grammar id
-#define EP_GRAM_ID 101
-// First rule of dynamic sentence rule set
-#define DYN_SENTENCERULE 102
-// # of milliseconds to allow for processing before timeout
-#define SR_WAVTIMEOUT 4000
-// Weight tag for rule to rule word/rule transitions
-#define CONFIDENCE_WEIGHT 0.0f
-
-//#define LOGGING 1
-#define LOGFILE "c:\\fp.log"
-
-void LogReset( void )
-{
-#if LOGGING
- FILE *fp = fopen( LOGFILE, "w" );
- if ( fp )
- fclose( fp );
-#endif
-}
-
-char *va( const char *fmt, ... );
-
-//-----------------------------------------------------------------------------
-// Purpose:
-// Input : *words -
-//-----------------------------------------------------------------------------
-void LogWords( CSentence& sentence )
-{
- Log( "Wordcount == %i\n", sentence.m_Words.Size() );
-
- for ( int i = 0; i < sentence.m_Words.Size(); i++ )
- {
- const CWordTag *w = sentence.m_Words[ i ];
- Log( "Word %s %u to %u\n", w->GetWord(), w->m_uiStartByte, w->m_uiEndByte );
- }
-}
-
-//-----------------------------------------------------------------------------
-// Purpose:
-// Input : *phonemes -
-//-----------------------------------------------------------------------------
-void LogPhonemes( CSentence& sentence )
-{
- return;
-
- Log( "Phonemecount == %i\n", sentence.CountPhonemes() );
-
- for ( int i = 0; i < sentence.m_Words.Size(); i++ )
- {
- const CWordTag *w = sentence.m_Words[ i ];
-
- for ( int j = 0; j < w->m_Phonemes.Size(); j++ )
- {
- const CPhonemeTag *p = w->m_Phonemes[ j ];
- Log( "Phoneme %s %u to %u\n", p->GetTag(), p->m_uiStartByte, p->m_uiEndByte );
- }
- }
-}
-
-#define NANO_CONVERT 10000000.0f;
-
-//-----------------------------------------------------------------------------
-// Purpose: Walk list of words and phonemes and create phoneme tags in CSentence object
-// FIXME: Right now, phonemes are assumed to evenly space out across a word.
-// Input : *converter -
-// result -
-// sentence -
-//-----------------------------------------------------------------------------
-void EnumeratePhonemes( ISpPhoneConverter *converter, const ISpRecoResult* result, CSentence& sentence )
-{
- USES_CONVERSION;
-
- // Grab access to element container
- ISpPhrase *phrase = ( ISpPhrase * )result;
- if ( !phrase )
- return;
-
- SPPHRASE *pElements;
- if ( !SUCCEEDED( phrase->GetPhrase( &pElements ) ) )
- return;
-
- // Only use it if it's better/same size as what we already had on-hand
- if ( pElements->Rule.ulCountOfElements > 0 )
- //(unsigned int)( sentence.m_Words.Size() - sentence.GetWordBase() ) )
- {
- sentence.ResetToBase();
-
- // Walk list of words
- for ( ULONG i = 0; i < pElements->Rule.ulCountOfElements; i++ )
- {
- unsigned int wordstart, wordend;
-
- // Get start/end sample index
- wordstart = pElements->pElements[i].ulAudioStreamOffset + (unsigned int)pElements->ullAudioStreamPosition;
- wordend = wordstart + pElements->pElements[i].ulAudioSizeBytes;
-
- // Create word tag
- CWordTag *w = new CWordTag( W2T( pElements->pElements[i].pszDisplayText ) );
- Assert( w );
- w->m_uiStartByte = wordstart;
- w->m_uiEndByte = wordend;
-
- sentence.AddWordTag( w );
-
- // Count # of phonemes in this word
- SPPHONEID pstr[ 2 ];
- pstr[ 1 ] = 0;
- WCHAR wszPhoneme[ SP_MAX_PRON_LENGTH ];
-
- const SPPHONEID *current;
- SPPHONEID phoneme;
- current = pElements->pElements[i].pszPronunciation;
- float total_weight = 0.0f;
- while ( 1 )
- {
- phoneme = *current++;
- if ( !phoneme )
- break;
-
- pstr[ 0 ] = phoneme;
- wszPhoneme[ 0 ] = L'\0';
-
- converter->IdToPhone( pstr, wszPhoneme );
-
- total_weight += WeightForPhoneme( W2A( wszPhoneme ) );
- }
-
- current = pElements->pElements[i].pszPronunciation;
-
- // Decide # of bytes/phoneme weight
- float psize = 0;
- if ( total_weight )
- {
- psize = ( wordend - wordstart ) / total_weight;
- }
-
- int number = 0;
-
- // Re-walk the phoneme list and create true phoneme tags
- float startWeight = 0.0f;
- while ( 1 )
- {
- phoneme = *current++;
- if ( !phoneme )
- break;
-
- pstr[ 0 ] = phoneme;
- wszPhoneme[ 0 ] = L'\0';
-
- converter->IdToPhone( pstr, wszPhoneme );
-
- CPhonemeTag *p = new CPhonemeTag( W2A( wszPhoneme ) );
- Assert( p );
-
- float weight = WeightForPhoneme( W2A( wszPhoneme ) );
-
- p->m_uiStartByte = wordstart + (int)( startWeight * psize );
- p->m_uiEndByte = p->m_uiStartByte + (int)( psize * weight );
-
- startWeight += weight;
-
- // Convert to IPA phoneme code
- p->SetPhonemeCode( TextToPhoneme( p->GetTag() ) );
-
- sentence.AddPhonemeTag( w, p );
-
- number++;
- }
- }
- }
-
- // Free memory
- ::CoTaskMemFree(pElements);
-}
-
-//-----------------------------------------------------------------------------
-// Purpose: Create rules for each word in the reference sentence
-//-----------------------------------------------------------------------------
-typedef struct
-{
- int ruleId;
- SPSTATEHANDLE hRule;
- CSpDynamicString word;
- char plaintext[ 256 ];
-} WORDRULETYPE;
-
-//-----------------------------------------------------------------------------
-// Purpose: Creates start for word of sentence
-// Input : cpRecoGrammar -
-// *root -
-// *rules -
-// word -
-//-----------------------------------------------------------------------------
-void AddWordRule( ISpRecoGrammar* cpRecoGrammar, SPSTATEHANDLE *root, CUtlVector< WORDRULETYPE > *rules, CSpDynamicString& word )
-{
- USES_CONVERSION;
- HRESULT hr;
- WORDRULETYPE *newrule;
-
- int idx = (*rules).AddToTail();
-
- newrule = &(*rules)[ idx ];
-
- newrule->ruleId = DYN_SENTENCERULE + idx + 1;
- newrule->word = word;
-
- strcpy( newrule->plaintext, W2T( word ) );
-
- // Create empty rule
- hr = cpRecoGrammar->CreateNewState( *root, &newrule->hRule );
- Assert( !FAILED( hr ) );
-}
-
-//-----------------------------------------------------------------------------
-// Purpose:
-// Input : cpRecoGrammar -
-// *from -
-// *to -
-//-----------------------------------------------------------------------------
-void AddWordTransitionRule( ISpRecoGrammar* cpRecoGrammar, WORDRULETYPE *from, WORDRULETYPE *to )
-{
- USES_CONVERSION;
-
- HRESULT hr;
- Assert( from );
-
- if ( from && !to )
- {
- OutputDebugString( va( "Transition from %s to TERM\r\n", from->plaintext ) );
- }
- else
- {
- OutputDebugString( va( "Transition from %s to %s\r\n", from->plaintext, to->plaintext ) );
- }
-
- hr = cpRecoGrammar->AddWordTransition( from->hRule, to ? to->hRule : NULL, (WCHAR *)from->word, NULL, SPWT_LEXICAL, CONFIDENCE_WEIGHT, NULL );
- Assert( !FAILED( hr ) );
-}
-
-//-----------------------------------------------------------------------------
-// Purpose:
-// Input : cpRecoGrammar -
-// *from -
-// *to -
-//-----------------------------------------------------------------------------
-void AddOptionalTransitionRule( ISpRecoGrammar* cpRecoGrammar, WORDRULETYPE *from, WORDRULETYPE *to )
-{
- USES_CONVERSION;
-
- HRESULT hr;
- Assert( from );
-
- if ( from && !to )
- {
- OutputDebugString( va( "Opt transition from %s to TERM\r\n", from->plaintext ) );
- }
- else
- {
- OutputDebugString( va( "Opt transition from %s to %s\r\n", from->plaintext, to->plaintext ) );
- }
-
- hr = cpRecoGrammar->AddWordTransition( from->hRule, to ? to->hRule : NULL, NULL, NULL, SPWT_LEXICAL, CONFIDENCE_WEIGHT, NULL );
- Assert( !FAILED( hr ) );
-}
-
-#define MAX_WORD_SKIP 1
-//-----------------------------------------------------------------------------
-// Purpose: Links together all word rule states into a sentence rule CFG
-// Input : singleword -
-// cpRecoGrammar -
-// *root -
-// *rules -
-//-----------------------------------------------------------------------------
-bool BuildRules( ISpRecoGrammar* cpRecoGrammar, SPSTATEHANDLE *root, CUtlVector< WORDRULETYPE > *rules )
-{
- HRESULT hr;
- WORDRULETYPE *rule, *next;
-
- int numrules = (*rules).Size();
-
- rule = &(*rules)[ 0 ];
-
- // Add transition
- hr = cpRecoGrammar->AddWordTransition( *root, rule->hRule, NULL, NULL, SPWT_LEXICAL, CONFIDENCE_WEIGHT, NULL );
- Assert( !FAILED( hr ) );
-
- for ( int i = 0; i < numrules; i++ )
- {
- rule = &(*rules)[ i ];
- if ( i < numrules - 1 )
- {
- next = &(*rules)[ i + 1 ];
- }
- else
- {
- next = NULL;
- }
-
- AddWordTransitionRule( cpRecoGrammar, rule, next );
- }
-
- if ( numrules > 1 )
- {
- for ( int skip = 1; skip <= min( MAX_WORD_SKIP, numrules ); skip++ )
- {
- OutputDebugString( va( "Opt transition from Root to %s\r\n", (*rules)[ 0 ].plaintext ) );
-
- hr = cpRecoGrammar->AddWordTransition( *root, (*rules)[ 0 ].hRule, NULL, NULL, SPWT_LEXICAL, CONFIDENCE_WEIGHT, NULL );
-
- // Now build rules where you can skip 1 to N intervening words
- for ( int i = 1; i < numrules; i++ )
- {
- // Start at the beginning?
- rule = &(*rules)[ i ];
- if ( i < numrules - skip )
- {
- next = &(*rules)[ i + skip ];
- }
- else
- {
- continue;
- }
-
- // Add transition
- AddOptionalTransitionRule( cpRecoGrammar, rule, next );
- }
-
- // Go from final rule to end point
- AddOptionalTransitionRule( cpRecoGrammar, rule, NULL );
- }
- }
-
- // Store it
- hr = cpRecoGrammar->Commit(NULL);
- if ( FAILED( hr ) )
- return false;
-
- return true;
-}
-
-//-----------------------------------------------------------------------------
-// Purpose: Debugging, prints alternate list if one is created
-// Input : cpResult -
-// (*pfnPrint -
-//-----------------------------------------------------------------------------
-void PrintAlternates( ISpRecoResult* cpResult, void (*pfnPrint)( const char *fmt, ... ) )
-{
- ISpPhraseAlt *rgPhraseAlt[ 32 ];
- memset( rgPhraseAlt, 0, sizeof( rgPhraseAlt ) );
-
- ULONG ulCount;
-
- ISpPhrase *phrase = ( ISpPhrase * )cpResult;
- if ( phrase )
- {
- SPPHRASE *pElements;
- if ( SUCCEEDED( phrase->GetPhrase( &pElements ) ) )
- {
- if ( pElements->Rule.ulCountOfElements > 0 )
- {
- HRESULT hr = cpResult->GetAlternates(
- pElements->Rule.ulFirstElement,
- pElements->Rule.ulCountOfElements,
- 32,
- rgPhraseAlt,
- &ulCount);
-
- Assert( !FAILED( hr ) );
-
- for ( ULONG r = 0 ; r < ulCount; r++ )
- {
- CSpDynamicString dstrText;
- hr = rgPhraseAlt[ r ]->GetText( (ULONG)SP_GETWHOLEPHRASE, (ULONG)SP_GETWHOLEPHRASE, TRUE, &dstrText, NULL);
- Assert( !FAILED( hr ) );
-
- pfnPrint( "[ ALT ]" );
- pfnPrint( dstrText.CopyToChar() );
- pfnPrint( "\r\n" );
- }
- }
- }
-
- }
-
- for ( int i = 0; i < 32; i++ )
- {
- if ( rgPhraseAlt[ i ] )
- {
- rgPhraseAlt[ i ]->Release();
- rgPhraseAlt[ i ] = NULL;
- }
- }
-}
-
-void PrintWordsAndPhonemes( CSentence& sentence, void (*pfnPrint)( const char *fmt, ... ) )
-{
- char sz[ 256 ];
- int i;
-
- pfnPrint( "WORDS\r\n\r\n" );
-
- for ( i = 0 ; i < sentence.m_Words.Size(); i++ )
- {
- CWordTag *word = sentence.m_Words[ i ];
- if ( !word )
- continue;
-
- sprintf( sz, "<%u - %u> %s\r\n",
- word->m_uiStartByte, word->m_uiEndByte, word->GetWord() );
-
- pfnPrint( sz );
-
- for ( int j = 0 ; j < word->m_Phonemes.Size(); j++ )
- {
- CPhonemeTag *phoneme = word->m_Phonemes[ j ];
- if ( !phoneme )
- continue;
-
- sprintf( sz, " <%u - %u> %s\r\n",
- phoneme->m_uiStartByte, phoneme->m_uiEndByte, phoneme->GetTag() );
-
- pfnPrint( sz );
- }
- }
-
- pfnPrint( "\r\n" );
-}
-
-//-----------------------------------------------------------------------------
-// Purpose: Given a wave file and a string of words "text", creates a CFG from the
-// sentence and stores the resulting words/phonemes in CSentence
-// Input : *wavname -
-// text -
-// sentence -
-// (*pfnPrint -
-// Output : SR_RESULT
-//-----------------------------------------------------------------------------
-SR_RESULT ExtractPhonemes( const char *wavname, CSpDynamicString& text, CSentence& sentence, void (*pfnPrint)( const char *fmt, ...) )
-{
- // Assume failure
- SR_RESULT result = SR_RESULT_ERROR;
-
- if ( text.Length() <= 0 )
- {
- pfnPrint( "Error: no rule / text specified\n" );
- return result;
- }
-
- USES_CONVERSION;
- HRESULT hr;
-
- CUtlVector < WORDRULETYPE > wordRules;
-
- CComPtr<ISpStream> cpInputStream;
- CComPtr<ISpRecognizer> cpRecognizer;
- CComPtr<ISpRecoContext> cpRecoContext;
- CComPtr<ISpRecoGrammar> cpRecoGrammar;
- CComPtr<ISpPhoneConverter> cpPhoneConv;
-
- // Create basic SAPI stream object
- // NOTE: The helper SpBindToFile can be used to perform the following operations
- hr = cpInputStream.CoCreateInstance(CLSID_SpStream);
- if ( FAILED( hr ) )
- {
- pfnPrint( "Error: SAPI 5.1 Stream object not installed?\n" );
- return result;
- }
-
- CSpStreamFormat sInputFormat;
-
- // setup stream object with wav file MY_WAVE_AUDIO_FILENAME
- // for read-only access, since it will only be access by the SR engine
- hr = cpInputStream->BindToFile(
- T2W(wavname),
- SPFM_OPEN_READONLY,
- NULL,
- sInputFormat.WaveFormatExPtr(),
- SPFEI_ALL_EVENTS );
-
- if ( FAILED( hr ) )
- {
- pfnPrint( "Error: couldn't open wav file %s\n", wavname );
- return result;
- }
-
- // Create in-process speech recognition engine
- hr = cpRecognizer.CoCreateInstance(CLSID_SpInprocRecognizer);
- if ( FAILED( hr ) )
- {
- pfnPrint( "Error: SAPI 5.1 In process recognizer object not installed?\n" );
- return result;
- }
-
- // Create recognition context to receive events
- hr = cpRecognizer->CreateRecoContext(&cpRecoContext);
- if ( FAILED( hr ) )
- {
- pfnPrint( "Error: SAPI 5.1 Unable to create recognizer context\n" );
- return result;
- }
-
- // Create a grammar
- hr = cpRecoContext->CreateGrammar( EP_GRAM_ID, &cpRecoGrammar );
- if ( FAILED( hr ) )
- {
- pfnPrint( "Error: SAPI 5.1 Unable to create recognizer grammar\n" );
- return result;
- }
-
- LANGID englishID = 0x409; // 1033 decimal
-
- bool userSpecified = false;
- LANGID langID = SpGetUserDefaultUILanguage();
-
- // Allow commandline override
- if ( CommandLine()->FindParm( "-languageid" ) != 0 )
- {
- userSpecified = true;
- langID = CommandLine()->ParmValue( "-languageid", langID );
- }
-
- // Create a phoneme converter ( so we can convert to IPA codes )
- hr = SpCreatePhoneConverter( langID, NULL, NULL, &cpPhoneConv );
- if ( FAILED( hr ) )
- {
- if ( langID != englishID )
- {
- if ( userSpecified )
- {
- pfnPrint( "Warning: SAPI 5.1 Unable to create phoneme converter for command line override -languageid %i\n", langID );
- }
- else
- {
- pfnPrint( "Warning: SAPI 5.1 Unable to create phoneme converter for default UI language %i\n",langID );
- }
-
- // Try english!!!
- langID = englishID;
- hr = SpCreatePhoneConverter( langID, NULL, NULL, &cpPhoneConv );
- }
-
- if ( FAILED( hr ) )
- {
- pfnPrint( "Error: SAPI 5.1 Unable to create phoneme converter for English language id %i\n", langID );
- return result;
- }
- else
- {
- pfnPrint( "Note: SAPI 5.1 Falling back to use english -languageid %i\n", langID );
- }
- }
- else if ( userSpecified )
- {
- pfnPrint( "Note: SAPI 5.1 Using user specified -languageid %i\n",langID );
- }
-
- SPSTATEHANDLE hStateRoot;
- // create/re-create Root level rule of grammar
- hr = cpRecoGrammar->GetRule(L"Root", 0, SPRAF_TopLevel | SPRAF_Active, TRUE, &hStateRoot);
- if ( FAILED( hr ) )
- {
- pfnPrint( "Error: SAPI 5.1 Unable to create root rule\n" );
- return result;
- }
-
- // Inactivate it so we can alter it
- hr = cpRecoGrammar->SetRuleState( NULL, NULL, SPRS_INACTIVE );
- if ( FAILED( hr ) )
- {
- pfnPrint( "Error: SAPI 5.1 Unable to deactivate grammar rules\n" );
- return result;
- }
-
- // Create the rule set from the words in text
- {
- CSpDynamicString currentWord;
- WCHAR *pos = ( WCHAR * )text;
- WCHAR str[ 2 ];
- str[1]= 0;
-
- while ( *pos )
- {
- if ( *pos == L' ' /*|| *pos == L'.' || *pos == L'-'*/ )
- {
- // Add word to rule set
- if ( currentWord.Length() > 0 )
- {
- AddWordRule( cpRecoGrammar, &hStateRoot, &wordRules, currentWord );
- currentWord.Clear();
- }
- pos++;
- continue;
- }
-
- // Skip anything that's inside a [ xxx ] pair.
- if ( *pos == L'[' )
- {
- while ( *pos && *pos != L']' )
- {
- pos++;
- }
-
- if ( *pos )
- {
- pos++;
- }
- continue;
- }
-
- str[ 0 ] = *pos;
-
- currentWord.Append( str );
- pos++;
- }
-
- if ( currentWord.Length() > 0 )
- {
- AddWordRule( cpRecoGrammar, &hStateRoot, &wordRules, currentWord );
- }
-
- if ( wordRules.Size() <= 0 )
- {
- pfnPrint( "Error: Text %s contained no usable words\n", text );
- return result;
- }
-
- // Build all word to word transitions in the grammar
- if ( !BuildRules( cpRecoGrammar, &hStateRoot, &wordRules ) )
- {
- pfnPrint( "Error: Rule set for %s could not be generated\n", text );
- return result;
- }
- }
-
- // check for recognitions and end of stream event
- const ULONGLONG ullInterest =
- SPFEI(SPEI_RECOGNITION) | SPFEI(SPEI_END_SR_STREAM) | SPFEI(SPEI_FALSE_RECOGNITION) |
- SPFEI(SPEI_PHRASE_START ) | SPFEI(SPEI_HYPOTHESIS ) | SPFEI(SPEI_INTERFERENCE) ;
- hr = cpRecoContext->SetInterest( ullInterest, ullInterest );
- if ( FAILED( hr ) )
- {
- pfnPrint( "Error: SAPI 5.1 Unable to set interest level\n" );
- return result;
- }
- // use Win32 events for command-line style application
- hr = cpRecoContext->SetNotifyWin32Event();
- if ( FAILED( hr ) )
- {
- pfnPrint( "Error: SAPI 5.1 Unable to set win32 notify event\n" );
- return result;
- }
- // connect wav input to recognizer
- // SAPI will negotiate mismatched engine/input audio formats using system audio codecs, so second parameter is not important - use default of TRUE
- hr = cpRecognizer->SetInput(cpInputStream, TRUE);
- if ( FAILED( hr ) )
- {
- pfnPrint( "Error: SAPI 5.1 Unable to associate input stream\n" );
- return result;
- }
-
- // Activate the CFG ( rather than using dictation )
- hr = cpRecoGrammar->SetRuleState( NULL, NULL, SPRS_ACTIVE );
- if ( FAILED( hr ) )
- {
- switch ( hr )
- {
- case E_INVALIDARG:
- pfnPrint( "pszName is invalid or bad. Alternatively, pReserved is non-NULL\n" );
- break;
- case SP_STREAM_UNINITIALIZED:
- pfnPrint( "ISpRecognizer::SetInput has not been called with the InProc recognizer\n" );
- break;
- case SPERR_UNINITIALIZED:
- pfnPrint( "The object has not been properly initialized.\n");
- break;
- case SPERR_UNSUPPORTED_FORMAT:
- pfnPrint( "Audio format is bad or is not recognized. Alternatively, the device driver may be busy by another application and cannot be accessed.\n" );
- break;
- case SPERR_NOT_TOPLEVEL_RULE:
- pfnPrint( "The rule pszName exists, but is not a top-level rule.\n" );
- break;
- default:
- pfnPrint( "Unknown error\n" );
- break;
- }
- pfnPrint( "Error: SAPI 5.1 Unable to activate rule set\n" );
- return result;
- }
-
- // while events occur, continue processing
- // timeout should be greater than the audio stream length, or a reasonable amount of time expected to pass before no more recognitions are expected in an audio stream
- BOOL fEndStreamReached = FALSE;
- while (!fEndStreamReached && S_OK == cpRecoContext->WaitForNotifyEvent( SR_WAVTIMEOUT ))
- {
- CSpEvent spEvent;
- // pull all queued events from the reco context's event queue
-
- while (!fEndStreamReached && S_OK == spEvent.GetFrom(cpRecoContext))
- {
- // Check event type
- switch (spEvent.eEventId)
- {
- case SPEI_INTERFERENCE:
- {
- SPINTERFERENCE interference = spEvent.Interference();
-
- switch ( interference )
- {
- case SPINTERFERENCE_NONE:
- pfnPrint( "[ I None ]\r\n" );
- break;
- case SPINTERFERENCE_NOISE:
- pfnPrint( "[ I Noise ]\r\n" );
- break;
- case SPINTERFERENCE_NOSIGNAL:
- pfnPrint( "[ I No Signal ]\r\n" );
- break;
- case SPINTERFERENCE_TOOLOUD:
- pfnPrint( "[ I Too Loud ]\r\n" );
- break;
- case SPINTERFERENCE_TOOQUIET:
- pfnPrint( "[ I Too Quiet ]\r\n" );
- break;
- case SPINTERFERENCE_TOOFAST:
- pfnPrint( "[ I Too Fast ]\r\n" );
- break;
- case SPINTERFERENCE_TOOSLOW:
- pfnPrint( "[ I Too Slow ]\r\n" );
- break;
- default:
- break;
- }
- }
- break;
- case SPEI_PHRASE_START:
- pfnPrint( "Phrase Start\r\n" );
- sentence.MarkNewPhraseBase();
- break;
-
- case SPEI_HYPOTHESIS:
- case SPEI_RECOGNITION:
- case SPEI_FALSE_RECOGNITION:
- {
- CComPtr<ISpRecoResult> cpResult;
- cpResult = spEvent.RecoResult();
-
- CSpDynamicString dstrText;
- if (spEvent.eEventId == SPEI_FALSE_RECOGNITION)
- {
- dstrText = L"(Unrecognized)";
-
- result = SR_RESULT_FAILED;
-
- // It's possible that the failed recog might have more words, so see if that's the case
- EnumeratePhonemes( cpPhoneConv, cpResult, sentence );
- }
- else
- {
- // Hypothesis or recognition success
- cpResult->GetText( (ULONG)SP_GETWHOLEPHRASE, (ULONG)SP_GETWHOLEPHRASE, TRUE, &dstrText, NULL);
-
- EnumeratePhonemes( cpPhoneConv, cpResult, sentence );
-
- if ( spEvent.eEventId == SPEI_RECOGNITION )
- {
- result = SR_RESULT_SUCCESS;
- }
-
- pfnPrint( va( "%s%s\r\n", spEvent.eEventId == SPEI_HYPOTHESIS ? "[ Hypothesis ] " : "", dstrText.CopyToChar() ) );
- }
-
- cpResult.Release();
- }
- break;
- // end of the wav file was reached by the speech recognition engine
- case SPEI_END_SR_STREAM:
- fEndStreamReached = TRUE;
- break;
- }
-
- // clear any event data/object references
- spEvent.Clear();
- }// END event pulling loop - break on empty event queue OR end stream
- }// END event polling loop - break on event timeout OR end stream
-
- // Deactivate rule
- hr = cpRecoGrammar->SetRuleState( NULL, NULL, SPRS_INACTIVE );
- if ( FAILED( hr ) )
- {
- pfnPrint( "Error: SAPI 5.1 Unable to deactivate rule set\n" );
- return result;
- }
-
- // close the input stream, since we're done with it
- // NOTE: smart pointer will call SpStream's destructor, and consequently ::Close, but code may want to check for errors on ::Close operation
- hr = cpInputStream->Close();
- if ( FAILED( hr ) )
- {
- pfnPrint( "Error: SAPI 5.1 Unable to close input stream\n" );
- return result;
- }
-
- return result;
-}
-
-//-----------------------------------------------------------------------------
-// Purpose: HACK HACK: We have to delete the RecoContext key or sapi starts to train
-// itself on each iteration which was causing some problems.
-// Input : hKey -
-//-----------------------------------------------------------------------------
-void RecursiveRegDelKey(HKEY hKey)
-{
- char keyname[256]={0};
- DWORD namesize=256;
-
- //base case: no subkeys when RegEnumKeyEx returns error on index 0
- LONG lResult=RegEnumKeyEx(hKey,0,keyname,&namesize,NULL,NULL,NULL,NULL);
- if (lResult!=ERROR_SUCCESS)
- {
- return;
- }
-
- do
- {
- HKEY subkey;
- LONG lResult2;
- LONG lDelResult;
- lResult2=RegOpenKeyEx(hKey,keyname,0,KEY_ALL_ACCESS,&subkey);
-
- if (lResult2==ERROR_SUCCESS)
- {
- RecursiveRegDelKey(subkey);
-
- RegCloseKey(subkey);
- lDelResult=RegDeleteKey(hKey,keyname);
- namesize=256;
- //use 0 in the next function call because when you delete one, the rest shift down!
- lResult=RegEnumKeyEx(hKey,0,keyname,&namesize,NULL,NULL,NULL,NULL);
- }
-
- else
- {
- break;
- }
-
- } while (lResult!=ERROR_NO_MORE_ITEMS);
-}
-
-bool IsUseable( CWordTag *word )
-{
- if ( word->m_uiStartByte || word->m_uiEndByte )
- return true;
-
- return false;
-}
-
-int FindLastUsableWord( CSentence& outwords )
-{
- int numwords = outwords.m_Words.Size();
- if ( numwords < 1 )
- {
- Assert( 0 );
- return -1;
- }
-
- for ( int i = numwords-1; i >= 0; i-- )
- {
- CWordTag *check = outwords.m_Words[ i ];
- if ( IsUseable( check ) )
- {
- return i;
- }
- }
-
- return -1;
-}
-
-
-int FindFirstUsableWord( CSentence& outwords )
-{
- int numwords = outwords.m_Words.Size();
- if ( numwords < 1 )
- {
- Assert( 0 );
- return -1;
- }
-
- for ( int i = 0; i < numwords; i++ )
- {
- CWordTag *check = outwords.m_Words[ i ];
- if ( IsUseable( check ) )
- {
- return i;
- }
- }
-
- return -1;
-}
-
-//-----------------------------------------------------------------------------
-// Purpose: Counts words which have either a valid start or end byte
-// Input : *outwords -
-// Output : int
-//-----------------------------------------------------------------------------
-int CountUsableWords( CSentence& outwords )
-{
- int count = 0;
- int numwords = outwords.m_Words.Size();
- // Nothing to do
- if ( numwords <= 0 )
- return count;
-
- for ( int i = 0; i < numwords; i++ )
- {
- CWordTag *word = outwords.m_Words[ i ];
- if ( !IsUseable( word ) )
- continue;
-
- count++;
- }
-
- return count;
-}
-
-
-//-----------------------------------------------------------------------------
-// Purpose: Counts words which have either a valid start or end byte
-// Input : *outwords -
-// Output : int
-//-----------------------------------------------------------------------------
-int CountUnuseableWords( CSentence& outwords )
-{
- int count = 0;
- int numwords = outwords.m_Words.Size();
- // Nothing to do
- if ( numwords <= 0 )
- return count;
-
- for ( int i = 0; i < numwords; i++ )
- {
- CWordTag *word = outwords.m_Words[ i ];
- if ( IsUseable( word ) )
- continue;
-
- count++;
- }
-
- return count;
-}
-
-// Keeps same relative spacing, but rebases list
-void RepartitionPhonemes( CWordTag *word, unsigned int oldStart, unsigned int oldEnd )
-{
- // Repartition phonemes based on old range
- float oldRange = ( float )( oldEnd - oldStart );
- float newRange = ( float )( word->m_uiEndByte - word->m_uiStartByte );
-
- for ( int i = 0; i < word->m_Phonemes.Size(); i++ )
- {
- CPhonemeTag *tag = word->m_Phonemes[ i ];
- Assert( tag );
-
- float frac1 = 0.0f, frac2 = 0.0f;
- float delta1, delta2;
-
- delta1 = ( float ) ( tag->m_uiStartByte - oldStart );
- delta2 = ( float ) ( tag->m_uiEndByte - oldStart );
- if ( oldRange > 0.0f )
- {
- frac1 = delta1 / oldRange;
- frac2 = delta2 / oldRange;
- }
-
- tag->m_uiStartByte = word->m_uiStartByte + ( unsigned int ) ( frac1 * newRange );
- tag->m_uiEndByte = word->m_uiStartByte + ( unsigned int ) ( frac2 * newRange );
- }
-}
-
-void PartitionWords( CSentence& outwords, int start, int end, int sampleStart, int sampleEnd )
-{
- int wordCount = end - start + 1;
- Assert( wordCount >= 1 );
- int stepSize = ( sampleEnd - sampleStart ) / wordCount;
-
- int currentStart = sampleStart;
-
- for ( int i = start; i <= end; i++ )
- {
- CWordTag *word = outwords.m_Words[ i ];
- Assert( word );
-
- unsigned int oldStart = word->m_uiStartByte;
- unsigned int oldEnd = word->m_uiEndByte;
-
- word->m_uiStartByte = currentStart;
- word->m_uiEndByte = currentStart + stepSize;
-
- RepartitionPhonemes( word, oldStart, oldEnd );
-
- currentStart += stepSize;
- }
-}
-
-void MergeWords( CWordTag *w1, CWordTag *w2 )
-{
- unsigned int start, end;
-
- start = min( w1->m_uiStartByte, w2->m_uiStartByte );
- end = max( w1->m_uiEndByte, w2->m_uiEndByte );
-
- unsigned int mid = ( start + end ) / 2;
-
- unsigned int oldw1start, oldw2start, oldw1end, oldw2end;
-
- oldw1start = w1->m_uiStartByte;
- oldw2start = w2->m_uiStartByte;
- oldw1end = w1->m_uiEndByte;
- oldw2end = w2->m_uiEndByte;
-
- w1->m_uiStartByte = start;
- w1->m_uiEndByte = mid;
- w2->m_uiStartByte = mid;
- w2->m_uiEndByte = end;
-
- RepartitionPhonemes( w1, oldw1start, oldw1end );
- RepartitionPhonemes( w2, oldw2start, oldw2end );
-}
-
-void FixupZeroLengthWords( CSentence& outwords )
-{
- while ( 1 )
- {
- int i;
- for ( i = 0 ; i < outwords.m_Words.Size() - 1; i++ )
- {
- CWordTag *current, *next;
-
- current = outwords.m_Words[ i ];
- next = outwords.m_Words[ i + 1 ];
-
- if ( current->m_uiEndByte - current->m_uiStartByte <= 0 )
- {
- MergeWords( current, next );
- break;
- }
-
- if ( next->m_uiEndByte - next->m_uiStartByte <= 0 )
- {
- MergeWords( current, next );
- break;
- }
- }
-
- if ( i >= outwords.m_Words.Size() - 1 )
- {
- break;
- }
- }
-}
-
-void ComputeMissingByteSpans( int numsamples, CSentence& outwords )
-{
- int numwords = outwords.m_Words.Size();
- // Nothing to do
- if ( numwords <= 0 )
- return;
-
- int interationcount = 1;
-
- while( 1 )
- {
- Log( "\nCompute %i\n", interationcount++ );
- LogWords( outwords );
-
- int wordNumber;
-
- // Done!
- if ( !CountUnuseableWords( outwords ) )
- {
- FixupZeroLengthWords( outwords );
- break;
- }
-
- if ( !CountUsableWords( outwords ) )
- {
- // Evenly space words across full sample time
- PartitionWords( outwords, 0, numwords - 1, 0, numsamples );
- break;
- }
-
- wordNumber = FindFirstUsableWord( outwords );
- // Not the first word
- if ( wordNumber > 0 )
- {
- // Repartition all of the unusables and the first one starting at zero over the range
- CWordTag *firstUsable = outwords.m_Words[ wordNumber ];
- Assert( firstUsable );
-
- if ( firstUsable->m_uiStartByte != 0 )
- {
- PartitionWords( outwords, 0, wordNumber - 1, 0, firstUsable->m_uiStartByte );
- }
- else
- {
- PartitionWords( outwords, 0, wordNumber, 0, firstUsable->m_uiEndByte );
- }
-
- // Start over
- continue;
- }
-
- wordNumber = FindLastUsableWord( outwords );
- // Not the last word
- if ( wordNumber >= 0 && wordNumber < numwords - 1 )
- {
- // Repartition all of the unusables and the first one starting at zero over the range
- CWordTag *lastUsable = outwords.m_Words[ wordNumber ];
- Assert( lastUsable );
-
- if ( lastUsable->m_uiEndByte != (unsigned int)numsamples )
- {
- PartitionWords( outwords, wordNumber + 1, numwords-1, lastUsable->m_uiEndByte, numsamples );
- }
- else
- {
- PartitionWords( outwords, wordNumber, numwords-1, lastUsable->m_uiStartByte, numsamples );
- }
-
- // Start over
- continue;
- }
-
- // If we get here it means that the start and end of the list are okay and we just have to
- // iterate across the list and fix things in the middle
- int startByte = 0;
- int endByte = 0;
- for ( int i = 0; i < numwords ; i++ )
- {
- CWordTag *word = outwords.m_Words[ i ];
- if ( IsUseable( word ) )
- {
- startByte = word->m_uiEndByte;
- continue;
- }
-
- // Found the start of a chain of 1 or more unusable words
- // Find the startbyte of the next usable word and count how many words we check
- int wordCount = 1;
- for ( int j = i + 1; j < numwords; j++ )
- {
- CWordTag *next = outwords.m_Words[ j ];
- if ( IsUseable( next ) )
- {
- endByte = next->m_uiStartByte;
- break;
- }
-
- wordCount++;
- }
-
- // Now partition words across the gap and go to start again
- PartitionWords( outwords, i, i + wordCount - 1, startByte, endByte );
- break;
- }
- }
-}
-
-//-----------------------------------------------------------------------------
-// Purpose: Given a wavfile and a list of inwords, determines the word/phonene
-// sample counts for the sentce
-// Input : *wavfile -
-// *inwords -
-// *outphonemes{ text.Clear( -
-// Output : SR_RESULT
-//-----------------------------------------------------------------------------
-static SR_RESULT SAPI_ExtractPhonemes(
- const char *wavfile,
- int numsamples,
- void (*pfnPrint)( const char *fmt, ... ),
- CSentence& inwords,
- CSentence& outwords )
-{
- LogReset();
-
- USES_CONVERSION;
-
- CSpDynamicString text;
- text.Clear();
-
- HKEY hkwipe;
- LONG lResult = RegOpenKeyEx( HKEY_CURRENT_USER, "Software\\Microsoft\\Speech\\RecoProfiles", 0, KEY_ALL_ACCESS, &hkwipe );
- if ( lResult == ERROR_SUCCESS )
- {
- RecursiveRegDelKey( hkwipe );
- RegCloseKey( hkwipe );
- }
-
- if ( strlen( inwords.GetText() ) <= 0 )
- {
- inwords.SetTextFromWords();
- }
-
- // Construct a string from the inwords array
- text.Append( T2W( inwords.GetText() ) );
-
- // Assume failure
- SR_RESULT result = SR_RESULT_ERROR;
-
- if ( text.Length() > 0 )
- {
- CSentence sentence;
-
- pfnPrint( "Processing...\r\n" );
-
- // Give it a try
- result = ExtractPhonemes( wavfile, text, sentence, pfnPrint );
-
- pfnPrint( "Finished.\r\n" );
- // PrintWordsAndPhonemes( sentence, pfnPrint );
-
- // Copy results to outputs
- outwords.Reset();
-
- outwords.SetText( inwords.GetText() );
-
- Log( "Starting\n" );
- LogWords( inwords );
-
- if ( SR_RESULT_ERROR != result )
- {
- int i;
-
- Log( "Hypothesized\n" );
- LogWords( sentence );
-
- for( i = 0 ; i < sentence.m_Words.Size(); i++ )
- {
- CWordTag *tag = sentence.m_Words[ i ];
- if ( tag )
- {
- // Skip '...' tag
- if ( stricmp( tag->GetWord(), "..." ) )
- {
- CWordTag *newTag = new CWordTag( *tag );
-
- outwords.m_Words.AddToTail( newTag );
- }
- }
- }
-
- // Now insert unrecognized/skipped words from original list
- //
- int frompos = 0, topos = 0;
-
- while( 1 )
- {
- // End of source list
- if ( frompos >= inwords.m_Words.Size() )
- break;
-
- const CWordTag *fromTag = inwords.m_Words[ frompos ];
-
- // Reached end of destination list, just copy words over from from source list until
- // we run out of source words
- if ( topos >= outwords.m_Words.Size() )
- {
- // Just copy words over
- CWordTag *newWord = new CWordTag( *fromTag );
-
- // Remove phonemes
- while ( newWord->m_Phonemes.Size() > 0 )
- {
- CPhonemeTag *kill = newWord->m_Phonemes[ 0 ];
- newWord->m_Phonemes.Remove( 0 );
- delete kill;
- }
-
- outwords.m_Words.AddToTail( newWord );
- frompos++;
- topos++;
- continue;
- }
-
- // Destination word
- const CWordTag *toTag = outwords.m_Words[ topos ];
-
- // Words match, just skip ahead
- if ( !stricmp( fromTag->GetWord(), toTag->GetWord() ) )
- {
- frompos++;
- topos++;
- continue;
- }
-
- // The only case we handle is that something in the source wasn't in the destination
-
- // Find the next source word that appears in the destination
- int skipAhead = frompos + 1;
- bool found = false;
- while ( skipAhead < inwords.m_Words.Size() )
- {
- const CWordTag *sourceWord = inwords.m_Words[ skipAhead ];
- if ( !stricmp( sourceWord->GetWord(), toTag->GetWord() ) )
- {
- found = true;
- break;
- }
-
- skipAhead++;
- }
-
- // Uh oh destination has words that are not in source, just skip to next destination word?
- if ( !found )
- {
- topos++;
- }
- else
- {
- // Copy words from from source list into destination
- //
- int skipCount = skipAhead - frompos;
-
- while ( --skipCount>= 0 )
- {
- const CWordTag *sourceWord = inwords.m_Words[ frompos++ ];
- CWordTag *newWord = new CWordTag( *sourceWord );
-
- // Remove phonemes
- while ( newWord->m_Phonemes.Size() > 0 )
- {
- CPhonemeTag *kill = newWord->m_Phonemes[ 0 ];
- newWord->m_Phonemes.Remove( 0 );
- delete kill;
- }
-
- outwords.m_Words.InsertBefore( topos, newWord );
- topos++;
- }
-
- frompos++;
- topos++;
- }
- }
-
- Log( "\nDone simple check\n" );
-
- LogWords( outwords );
- LogPhonemes( outwords );
-
- ComputeMissingByteSpans( numsamples, outwords );
-
- Log( "\nFinal check\n" );
-
- LogWords( outwords );
- LogPhonemes( outwords );
- }
- }
- else
- {
- pfnPrint( "Input sentence is empty!\n" );
- }
-
- // Return results
- return result;
-}
-
-
-//-----------------------------------------------------------------------------
-// Purpose: Expose the interface
-//-----------------------------------------------------------------------------
-class CPhonemeExtractorSAPI : public IPhonemeExtractor
-{
-public:
- virtual PE_APITYPE GetAPIType() const
- {
- return SPEECH_API_SAPI;
- }
-
- // Used for menus, etc
- virtual char const *GetName() const
- {
- return "MS SAPI 5.1";
- }
-
- SR_RESULT Extract(
- const char *wavfile,
- int numsamples,
- void (*pfnPrint)( const char *fmt, ... ),
- CSentence& inwords,
- CSentence& outwords )
- {
- return SAPI_ExtractPhonemes( wavfile, numsamples, pfnPrint, inwords, outwords );
- }
-};
-
+//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +// $NoKeywords: $ +// +//=============================================================================// +// extracephonemes.cpp : Defines the entry point for the console application. +// +#define PROTECTED_THINGS_DISABLE + +#include "tier0/wchartypes.h" +#include <stdio.h> +#include <windows.h> +#include <tchar.h> +#include "sphelper.h" +#include "spddkhlp.h" +// ATL Header Files +#include <atlbase.h> +// Face poser and util includes +#include "utlvector.h" +#include "phonemeextractor/PhonemeExtractor.h" +#include "PhonemeConverter.h" +#include "sentence.h" +#include "tier0/dbg.h" +#include "tier0/icommandline.h" +#include "filesystem.h" + +// Extract phoneme grammar id +#define EP_GRAM_ID 101 +// First rule of dynamic sentence rule set +#define DYN_SENTENCERULE 102 +// # of milliseconds to allow for processing before timeout +#define SR_WAVTIMEOUT 4000 +// Weight tag for rule to rule word/rule transitions +#define CONFIDENCE_WEIGHT 0.0f + +//#define LOGGING 1 +#define LOGFILE "c:\\fp.log" + +void LogReset( void ) +{ +#if LOGGING + FILE *fp = fopen( LOGFILE, "w" ); + if ( fp ) + fclose( fp ); +#endif +} + +char *va( const char *fmt, ... ); + +//----------------------------------------------------------------------------- +// Purpose: +// Input : *words - +//----------------------------------------------------------------------------- +void LogWords( CSentence& sentence ) +{ + Log( "Wordcount == %i\n", sentence.m_Words.Size() ); + + for ( int i = 0; i < sentence.m_Words.Size(); i++ ) + { + const CWordTag *w = sentence.m_Words[ i ]; + Log( "Word %s %u to %u\n", w->GetWord(), w->m_uiStartByte, w->m_uiEndByte ); + } +} + +//----------------------------------------------------------------------------- +// Purpose: +// Input : *phonemes - +//----------------------------------------------------------------------------- +void LogPhonemes( CSentence& sentence ) +{ + return; + + Log( "Phonemecount == %i\n", sentence.CountPhonemes() ); + + for ( int i = 0; i < sentence.m_Words.Size(); i++ ) + { + const CWordTag *w = sentence.m_Words[ i ]; + + for ( int j = 0; j < w->m_Phonemes.Size(); j++ ) + { + const CPhonemeTag *p = w->m_Phonemes[ j ]; + Log( "Phoneme %s %u to %u\n", p->GetTag(), p->m_uiStartByte, p->m_uiEndByte ); + } + } +} + +#define NANO_CONVERT 10000000.0f; + +//----------------------------------------------------------------------------- +// Purpose: Walk list of words and phonemes and create phoneme tags in CSentence object +// FIXME: Right now, phonemes are assumed to evenly space out across a word. +// Input : *converter - +// result - +// sentence - +//----------------------------------------------------------------------------- +void EnumeratePhonemes( ISpPhoneConverter *converter, const ISpRecoResult* result, CSentence& sentence ) +{ + USES_CONVERSION; + + // Grab access to element container + ISpPhrase *phrase = ( ISpPhrase * )result; + if ( !phrase ) + return; + + SPPHRASE *pElements; + if ( !SUCCEEDED( phrase->GetPhrase( &pElements ) ) ) + return; + + // Only use it if it's better/same size as what we already had on-hand + if ( pElements->Rule.ulCountOfElements > 0 ) + //(unsigned int)( sentence.m_Words.Size() - sentence.GetWordBase() ) ) + { + sentence.ResetToBase(); + + // Walk list of words + for ( ULONG i = 0; i < pElements->Rule.ulCountOfElements; i++ ) + { + unsigned int wordstart, wordend; + + // Get start/end sample index + wordstart = pElements->pElements[i].ulAudioStreamOffset + (unsigned int)pElements->ullAudioStreamPosition; + wordend = wordstart + pElements->pElements[i].ulAudioSizeBytes; + + // Create word tag + CWordTag *w = new CWordTag( W2T( pElements->pElements[i].pszDisplayText ) ); + Assert( w ); + w->m_uiStartByte = wordstart; + w->m_uiEndByte = wordend; + + sentence.AddWordTag( w ); + + // Count # of phonemes in this word + SPPHONEID pstr[ 2 ]; + pstr[ 1 ] = 0; + WCHAR wszPhoneme[ SP_MAX_PRON_LENGTH ]; + + const SPPHONEID *current; + SPPHONEID phoneme; + current = pElements->pElements[i].pszPronunciation; + float total_weight = 0.0f; + while ( 1 ) + { + phoneme = *current++; + if ( !phoneme ) + break; + + pstr[ 0 ] = phoneme; + wszPhoneme[ 0 ] = L'\0'; + + converter->IdToPhone( pstr, wszPhoneme ); + + total_weight += WeightForPhoneme( W2A( wszPhoneme ) ); + } + + current = pElements->pElements[i].pszPronunciation; + + // Decide # of bytes/phoneme weight + float psize = 0; + if ( total_weight ) + { + psize = ( wordend - wordstart ) / total_weight; + } + + int number = 0; + + // Re-walk the phoneme list and create true phoneme tags + float startWeight = 0.0f; + while ( 1 ) + { + phoneme = *current++; + if ( !phoneme ) + break; + + pstr[ 0 ] = phoneme; + wszPhoneme[ 0 ] = L'\0'; + + converter->IdToPhone( pstr, wszPhoneme ); + + CPhonemeTag *p = new CPhonemeTag( W2A( wszPhoneme ) ); + Assert( p ); + + float weight = WeightForPhoneme( W2A( wszPhoneme ) ); + + p->m_uiStartByte = wordstart + (int)( startWeight * psize ); + p->m_uiEndByte = p->m_uiStartByte + (int)( psize * weight ); + + startWeight += weight; + + // Convert to IPA phoneme code + p->SetPhonemeCode( TextToPhoneme( p->GetTag() ) ); + + sentence.AddPhonemeTag( w, p ); + + number++; + } + } + } + + // Free memory + ::CoTaskMemFree(pElements); +} + +//----------------------------------------------------------------------------- +// Purpose: Create rules for each word in the reference sentence +//----------------------------------------------------------------------------- +typedef struct +{ + int ruleId; + SPSTATEHANDLE hRule; + CSpDynamicString word; + char plaintext[ 256 ]; +} WORDRULETYPE; + +//----------------------------------------------------------------------------- +// Purpose: Creates start for word of sentence +// Input : cpRecoGrammar - +// *root - +// *rules - +// word - +//----------------------------------------------------------------------------- +void AddWordRule( ISpRecoGrammar* cpRecoGrammar, SPSTATEHANDLE *root, CUtlVector< WORDRULETYPE > *rules, CSpDynamicString& word ) +{ + USES_CONVERSION; + HRESULT hr; + WORDRULETYPE *newrule; + + int idx = (*rules).AddToTail(); + + newrule = &(*rules)[ idx ]; + + newrule->ruleId = DYN_SENTENCERULE + idx + 1; + newrule->word = word; + + strcpy( newrule->plaintext, W2T( word ) ); + + // Create empty rule + hr = cpRecoGrammar->CreateNewState( *root, &newrule->hRule ); + Assert( !FAILED( hr ) ); +} + +//----------------------------------------------------------------------------- +// Purpose: +// Input : cpRecoGrammar - +// *from - +// *to - +//----------------------------------------------------------------------------- +void AddWordTransitionRule( ISpRecoGrammar* cpRecoGrammar, WORDRULETYPE *from, WORDRULETYPE *to ) +{ + USES_CONVERSION; + + HRESULT hr; + Assert( from ); + + if ( from && !to ) + { + OutputDebugString( va( "Transition from %s to TERM\r\n", from->plaintext ) ); + } + else + { + OutputDebugString( va( "Transition from %s to %s\r\n", from->plaintext, to->plaintext ) ); + } + + hr = cpRecoGrammar->AddWordTransition( from->hRule, to ? to->hRule : NULL, (WCHAR *)from->word, NULL, SPWT_LEXICAL, CONFIDENCE_WEIGHT, NULL ); + Assert( !FAILED( hr ) ); +} + +//----------------------------------------------------------------------------- +// Purpose: +// Input : cpRecoGrammar - +// *from - +// *to - +//----------------------------------------------------------------------------- +void AddOptionalTransitionRule( ISpRecoGrammar* cpRecoGrammar, WORDRULETYPE *from, WORDRULETYPE *to ) +{ + USES_CONVERSION; + + HRESULT hr; + Assert( from ); + + if ( from && !to ) + { + OutputDebugString( va( "Opt transition from %s to TERM\r\n", from->plaintext ) ); + } + else + { + OutputDebugString( va( "Opt transition from %s to %s\r\n", from->plaintext, to->plaintext ) ); + } + + hr = cpRecoGrammar->AddWordTransition( from->hRule, to ? to->hRule : NULL, NULL, NULL, SPWT_LEXICAL, CONFIDENCE_WEIGHT, NULL ); + Assert( !FAILED( hr ) ); +} + +#define MAX_WORD_SKIP 1 +//----------------------------------------------------------------------------- +// Purpose: Links together all word rule states into a sentence rule CFG +// Input : singleword - +// cpRecoGrammar - +// *root - +// *rules - +//----------------------------------------------------------------------------- +bool BuildRules( ISpRecoGrammar* cpRecoGrammar, SPSTATEHANDLE *root, CUtlVector< WORDRULETYPE > *rules ) +{ + HRESULT hr; + WORDRULETYPE *rule, *next; + + int numrules = (*rules).Size(); + + rule = &(*rules)[ 0 ]; + + // Add transition + hr = cpRecoGrammar->AddWordTransition( *root, rule->hRule, NULL, NULL, SPWT_LEXICAL, CONFIDENCE_WEIGHT, NULL ); + Assert( !FAILED( hr ) ); + + for ( int i = 0; i < numrules; i++ ) + { + rule = &(*rules)[ i ]; + if ( i < numrules - 1 ) + { + next = &(*rules)[ i + 1 ]; + } + else + { + next = NULL; + } + + AddWordTransitionRule( cpRecoGrammar, rule, next ); + } + + if ( numrules > 1 ) + { + for ( int skip = 1; skip <= min( MAX_WORD_SKIP, numrules ); skip++ ) + { + OutputDebugString( va( "Opt transition from Root to %s\r\n", (*rules)[ 0 ].plaintext ) ); + + hr = cpRecoGrammar->AddWordTransition( *root, (*rules)[ 0 ].hRule, NULL, NULL, SPWT_LEXICAL, CONFIDENCE_WEIGHT, NULL ); + + // Now build rules where you can skip 1 to N intervening words + for ( int i = 1; i < numrules; i++ ) + { + // Start at the beginning? + rule = &(*rules)[ i ]; + if ( i < numrules - skip ) + { + next = &(*rules)[ i + skip ]; + } + else + { + continue; + } + + // Add transition + AddOptionalTransitionRule( cpRecoGrammar, rule, next ); + } + + // Go from final rule to end point + AddOptionalTransitionRule( cpRecoGrammar, rule, NULL ); + } + } + + // Store it + hr = cpRecoGrammar->Commit(NULL); + if ( FAILED( hr ) ) + return false; + + return true; +} + +//----------------------------------------------------------------------------- +// Purpose: Debugging, prints alternate list if one is created +// Input : cpResult - +// (*pfnPrint - +//----------------------------------------------------------------------------- +void PrintAlternates( ISpRecoResult* cpResult, void (*pfnPrint)( const char *fmt, ... ) ) +{ + ISpPhraseAlt *rgPhraseAlt[ 32 ]; + memset( rgPhraseAlt, 0, sizeof( rgPhraseAlt ) ); + + ULONG ulCount; + + ISpPhrase *phrase = ( ISpPhrase * )cpResult; + if ( phrase ) + { + SPPHRASE *pElements; + if ( SUCCEEDED( phrase->GetPhrase( &pElements ) ) ) + { + if ( pElements->Rule.ulCountOfElements > 0 ) + { + HRESULT hr = cpResult->GetAlternates( + pElements->Rule.ulFirstElement, + pElements->Rule.ulCountOfElements, + 32, + rgPhraseAlt, + &ulCount); + + Assert( !FAILED( hr ) ); + + for ( ULONG r = 0 ; r < ulCount; r++ ) + { + CSpDynamicString dstrText; + hr = rgPhraseAlt[ r ]->GetText( (ULONG)SP_GETWHOLEPHRASE, (ULONG)SP_GETWHOLEPHRASE, TRUE, &dstrText, NULL); + Assert( !FAILED( hr ) ); + + pfnPrint( "[ ALT ]" ); + pfnPrint( dstrText.CopyToChar() ); + pfnPrint( "\r\n" ); + } + } + } + + } + + for ( int i = 0; i < 32; i++ ) + { + if ( rgPhraseAlt[ i ] ) + { + rgPhraseAlt[ i ]->Release(); + rgPhraseAlt[ i ] = NULL; + } + } +} + +void PrintWordsAndPhonemes( CSentence& sentence, void (*pfnPrint)( const char *fmt, ... ) ) +{ + char sz[ 256 ]; + int i; + + pfnPrint( "WORDS\r\n\r\n" ); + + for ( i = 0 ; i < sentence.m_Words.Size(); i++ ) + { + CWordTag *word = sentence.m_Words[ i ]; + if ( !word ) + continue; + + sprintf( sz, "<%u - %u> %s\r\n", + word->m_uiStartByte, word->m_uiEndByte, word->GetWord() ); + + pfnPrint( sz ); + + for ( int j = 0 ; j < word->m_Phonemes.Size(); j++ ) + { + CPhonemeTag *phoneme = word->m_Phonemes[ j ]; + if ( !phoneme ) + continue; + + sprintf( sz, " <%u - %u> %s\r\n", + phoneme->m_uiStartByte, phoneme->m_uiEndByte, phoneme->GetTag() ); + + pfnPrint( sz ); + } + } + + pfnPrint( "\r\n" ); +} + +//----------------------------------------------------------------------------- +// Purpose: Given a wave file and a string of words "text", creates a CFG from the +// sentence and stores the resulting words/phonemes in CSentence +// Input : *wavname - +// text - +// sentence - +// (*pfnPrint - +// Output : SR_RESULT +//----------------------------------------------------------------------------- +SR_RESULT ExtractPhonemes( const char *wavname, CSpDynamicString& text, CSentence& sentence, void (*pfnPrint)( const char *fmt, ...) ) +{ + // Assume failure + SR_RESULT result = SR_RESULT_ERROR; + + if ( text.Length() <= 0 ) + { + pfnPrint( "Error: no rule / text specified\n" ); + return result; + } + + USES_CONVERSION; + HRESULT hr; + + CUtlVector < WORDRULETYPE > wordRules; + + CComPtr<ISpStream> cpInputStream; + CComPtr<ISpRecognizer> cpRecognizer; + CComPtr<ISpRecoContext> cpRecoContext; + CComPtr<ISpRecoGrammar> cpRecoGrammar; + CComPtr<ISpPhoneConverter> cpPhoneConv; + + // Create basic SAPI stream object + // NOTE: The helper SpBindToFile can be used to perform the following operations + hr = cpInputStream.CoCreateInstance(CLSID_SpStream); + if ( FAILED( hr ) ) + { + pfnPrint( "Error: SAPI 5.1 Stream object not installed?\n" ); + return result; + } + + CSpStreamFormat sInputFormat; + + // setup stream object with wav file MY_WAVE_AUDIO_FILENAME + // for read-only access, since it will only be access by the SR engine + hr = cpInputStream->BindToFile( + T2W(wavname), + SPFM_OPEN_READONLY, + NULL, + sInputFormat.WaveFormatExPtr(), + SPFEI_ALL_EVENTS ); + + if ( FAILED( hr ) ) + { + pfnPrint( "Error: couldn't open wav file %s\n", wavname ); + return result; + } + + // Create in-process speech recognition engine + hr = cpRecognizer.CoCreateInstance(CLSID_SpInprocRecognizer); + if ( FAILED( hr ) ) + { + pfnPrint( "Error: SAPI 5.1 In process recognizer object not installed?\n" ); + return result; + } + + // Create recognition context to receive events + hr = cpRecognizer->CreateRecoContext(&cpRecoContext); + if ( FAILED( hr ) ) + { + pfnPrint( "Error: SAPI 5.1 Unable to create recognizer context\n" ); + return result; + } + + // Create a grammar + hr = cpRecoContext->CreateGrammar( EP_GRAM_ID, &cpRecoGrammar ); + if ( FAILED( hr ) ) + { + pfnPrint( "Error: SAPI 5.1 Unable to create recognizer grammar\n" ); + return result; + } + + LANGID englishID = 0x409; // 1033 decimal + + bool userSpecified = false; + LANGID langID = SpGetUserDefaultUILanguage(); + + // Allow commandline override + if ( CommandLine()->FindParm( "-languageid" ) != 0 ) + { + userSpecified = true; + langID = CommandLine()->ParmValue( "-languageid", langID ); + } + + // Create a phoneme converter ( so we can convert to IPA codes ) + hr = SpCreatePhoneConverter( langID, NULL, NULL, &cpPhoneConv ); + if ( FAILED( hr ) ) + { + if ( langID != englishID ) + { + if ( userSpecified ) + { + pfnPrint( "Warning: SAPI 5.1 Unable to create phoneme converter for command line override -languageid %i\n", langID ); + } + else + { + pfnPrint( "Warning: SAPI 5.1 Unable to create phoneme converter for default UI language %i\n",langID ); + } + + // Try english!!! + langID = englishID; + hr = SpCreatePhoneConverter( langID, NULL, NULL, &cpPhoneConv ); + } + + if ( FAILED( hr ) ) + { + pfnPrint( "Error: SAPI 5.1 Unable to create phoneme converter for English language id %i\n", langID ); + return result; + } + else + { + pfnPrint( "Note: SAPI 5.1 Falling back to use english -languageid %i\n", langID ); + } + } + else if ( userSpecified ) + { + pfnPrint( "Note: SAPI 5.1 Using user specified -languageid %i\n",langID ); + } + + SPSTATEHANDLE hStateRoot; + // create/re-create Root level rule of grammar + hr = cpRecoGrammar->GetRule(L"Root", 0, SPRAF_TopLevel | SPRAF_Active, TRUE, &hStateRoot); + if ( FAILED( hr ) ) + { + pfnPrint( "Error: SAPI 5.1 Unable to create root rule\n" ); + return result; + } + + // Inactivate it so we can alter it + hr = cpRecoGrammar->SetRuleState( NULL, NULL, SPRS_INACTIVE ); + if ( FAILED( hr ) ) + { + pfnPrint( "Error: SAPI 5.1 Unable to deactivate grammar rules\n" ); + return result; + } + + // Create the rule set from the words in text + { + CSpDynamicString currentWord; + WCHAR *pos = ( WCHAR * )text; + WCHAR str[ 2 ]; + str[1]= 0; + + while ( *pos ) + { + if ( *pos == L' ' /*|| *pos == L'.' || *pos == L'-'*/ ) + { + // Add word to rule set + if ( currentWord.Length() > 0 ) + { + AddWordRule( cpRecoGrammar, &hStateRoot, &wordRules, currentWord ); + currentWord.Clear(); + } + pos++; + continue; + } + + // Skip anything that's inside a [ xxx ] pair. + if ( *pos == L'[' ) + { + while ( *pos && *pos != L']' ) + { + pos++; + } + + if ( *pos ) + { + pos++; + } + continue; + } + + str[ 0 ] = *pos; + + currentWord.Append( str ); + pos++; + } + + if ( currentWord.Length() > 0 ) + { + AddWordRule( cpRecoGrammar, &hStateRoot, &wordRules, currentWord ); + } + + if ( wordRules.Size() <= 0 ) + { + pfnPrint( "Error: Text %s contained no usable words\n", text ); + return result; + } + + // Build all word to word transitions in the grammar + if ( !BuildRules( cpRecoGrammar, &hStateRoot, &wordRules ) ) + { + pfnPrint( "Error: Rule set for %s could not be generated\n", text ); + return result; + } + } + + // check for recognitions and end of stream event + const ULONGLONG ullInterest = + SPFEI(SPEI_RECOGNITION) | SPFEI(SPEI_END_SR_STREAM) | SPFEI(SPEI_FALSE_RECOGNITION) | + SPFEI(SPEI_PHRASE_START ) | SPFEI(SPEI_HYPOTHESIS ) | SPFEI(SPEI_INTERFERENCE) ; + hr = cpRecoContext->SetInterest( ullInterest, ullInterest ); + if ( FAILED( hr ) ) + { + pfnPrint( "Error: SAPI 5.1 Unable to set interest level\n" ); + return result; + } + // use Win32 events for command-line style application + hr = cpRecoContext->SetNotifyWin32Event(); + if ( FAILED( hr ) ) + { + pfnPrint( "Error: SAPI 5.1 Unable to set win32 notify event\n" ); + return result; + } + // connect wav input to recognizer + // SAPI will negotiate mismatched engine/input audio formats using system audio codecs, so second parameter is not important - use default of TRUE + hr = cpRecognizer->SetInput(cpInputStream, TRUE); + if ( FAILED( hr ) ) + { + pfnPrint( "Error: SAPI 5.1 Unable to associate input stream\n" ); + return result; + } + + // Activate the CFG ( rather than using dictation ) + hr = cpRecoGrammar->SetRuleState( NULL, NULL, SPRS_ACTIVE ); + if ( FAILED( hr ) ) + { + switch ( hr ) + { + case E_INVALIDARG: + pfnPrint( "pszName is invalid or bad. Alternatively, pReserved is non-NULL\n" ); + break; + case SP_STREAM_UNINITIALIZED: + pfnPrint( "ISpRecognizer::SetInput has not been called with the InProc recognizer\n" ); + break; + case SPERR_UNINITIALIZED: + pfnPrint( "The object has not been properly initialized.\n"); + break; + case SPERR_UNSUPPORTED_FORMAT: + pfnPrint( "Audio format is bad or is not recognized. Alternatively, the device driver may be busy by another application and cannot be accessed.\n" ); + break; + case SPERR_NOT_TOPLEVEL_RULE: + pfnPrint( "The rule pszName exists, but is not a top-level rule.\n" ); + break; + default: + pfnPrint( "Unknown error\n" ); + break; + } + pfnPrint( "Error: SAPI 5.1 Unable to activate rule set\n" ); + return result; + } + + // while events occur, continue processing + // timeout should be greater than the audio stream length, or a reasonable amount of time expected to pass before no more recognitions are expected in an audio stream + BOOL fEndStreamReached = FALSE; + while (!fEndStreamReached && S_OK == cpRecoContext->WaitForNotifyEvent( SR_WAVTIMEOUT )) + { + CSpEvent spEvent; + // pull all queued events from the reco context's event queue + + while (!fEndStreamReached && S_OK == spEvent.GetFrom(cpRecoContext)) + { + // Check event type + switch (spEvent.eEventId) + { + case SPEI_INTERFERENCE: + { + SPINTERFERENCE interference = spEvent.Interference(); + + switch ( interference ) + { + case SPINTERFERENCE_NONE: + pfnPrint( "[ I None ]\r\n" ); + break; + case SPINTERFERENCE_NOISE: + pfnPrint( "[ I Noise ]\r\n" ); + break; + case SPINTERFERENCE_NOSIGNAL: + pfnPrint( "[ I No Signal ]\r\n" ); + break; + case SPINTERFERENCE_TOOLOUD: + pfnPrint( "[ I Too Loud ]\r\n" ); + break; + case SPINTERFERENCE_TOOQUIET: + pfnPrint( "[ I Too Quiet ]\r\n" ); + break; + case SPINTERFERENCE_TOOFAST: + pfnPrint( "[ I Too Fast ]\r\n" ); + break; + case SPINTERFERENCE_TOOSLOW: + pfnPrint( "[ I Too Slow ]\r\n" ); + break; + default: + break; + } + } + break; + case SPEI_PHRASE_START: + pfnPrint( "Phrase Start\r\n" ); + sentence.MarkNewPhraseBase(); + break; + + case SPEI_HYPOTHESIS: + case SPEI_RECOGNITION: + case SPEI_FALSE_RECOGNITION: + { + CComPtr<ISpRecoResult> cpResult; + cpResult = spEvent.RecoResult(); + + CSpDynamicString dstrText; + if (spEvent.eEventId == SPEI_FALSE_RECOGNITION) + { + dstrText = L"(Unrecognized)"; + + result = SR_RESULT_FAILED; + + // It's possible that the failed recog might have more words, so see if that's the case + EnumeratePhonemes( cpPhoneConv, cpResult, sentence ); + } + else + { + // Hypothesis or recognition success + cpResult->GetText( (ULONG)SP_GETWHOLEPHRASE, (ULONG)SP_GETWHOLEPHRASE, TRUE, &dstrText, NULL); + + EnumeratePhonemes( cpPhoneConv, cpResult, sentence ); + + if ( spEvent.eEventId == SPEI_RECOGNITION ) + { + result = SR_RESULT_SUCCESS; + } + + pfnPrint( va( "%s%s\r\n", spEvent.eEventId == SPEI_HYPOTHESIS ? "[ Hypothesis ] " : "", dstrText.CopyToChar() ) ); + } + + cpResult.Release(); + } + break; + // end of the wav file was reached by the speech recognition engine + case SPEI_END_SR_STREAM: + fEndStreamReached = TRUE; + break; + } + + // clear any event data/object references + spEvent.Clear(); + }// END event pulling loop - break on empty event queue OR end stream + }// END event polling loop - break on event timeout OR end stream + + // Deactivate rule + hr = cpRecoGrammar->SetRuleState( NULL, NULL, SPRS_INACTIVE ); + if ( FAILED( hr ) ) + { + pfnPrint( "Error: SAPI 5.1 Unable to deactivate rule set\n" ); + return result; + } + + // close the input stream, since we're done with it + // NOTE: smart pointer will call SpStream's destructor, and consequently ::Close, but code may want to check for errors on ::Close operation + hr = cpInputStream->Close(); + if ( FAILED( hr ) ) + { + pfnPrint( "Error: SAPI 5.1 Unable to close input stream\n" ); + return result; + } + + return result; +} + +//----------------------------------------------------------------------------- +// Purpose: HACK HACK: We have to delete the RecoContext key or sapi starts to train +// itself on each iteration which was causing some problems. +// Input : hKey - +//----------------------------------------------------------------------------- +void RecursiveRegDelKey(HKEY hKey) +{ + char keyname[256]={0}; + DWORD namesize=256; + + //base case: no subkeys when RegEnumKeyEx returns error on index 0 + LONG lResult=RegEnumKeyEx(hKey,0,keyname,&namesize,NULL,NULL,NULL,NULL); + if (lResult!=ERROR_SUCCESS) + { + return; + } + + do + { + HKEY subkey; + LONG lResult2; + LONG lDelResult; + lResult2=RegOpenKeyEx(hKey,keyname,0,KEY_ALL_ACCESS,&subkey); + + if (lResult2==ERROR_SUCCESS) + { + RecursiveRegDelKey(subkey); + + RegCloseKey(subkey); + lDelResult=RegDeleteKey(hKey,keyname); + namesize=256; + //use 0 in the next function call because when you delete one, the rest shift down! + lResult=RegEnumKeyEx(hKey,0,keyname,&namesize,NULL,NULL,NULL,NULL); + } + + else + { + break; + } + + } while (lResult!=ERROR_NO_MORE_ITEMS); +} + +bool IsUseable( CWordTag *word ) +{ + if ( word->m_uiStartByte || word->m_uiEndByte ) + return true; + + return false; +} + +int FindLastUsableWord( CSentence& outwords ) +{ + int numwords = outwords.m_Words.Size(); + if ( numwords < 1 ) + { + Assert( 0 ); + return -1; + } + + for ( int i = numwords-1; i >= 0; i-- ) + { + CWordTag *check = outwords.m_Words[ i ]; + if ( IsUseable( check ) ) + { + return i; + } + } + + return -1; +} + + +int FindFirstUsableWord( CSentence& outwords ) +{ + int numwords = outwords.m_Words.Size(); + if ( numwords < 1 ) + { + Assert( 0 ); + return -1; + } + + for ( int i = 0; i < numwords; i++ ) + { + CWordTag *check = outwords.m_Words[ i ]; + if ( IsUseable( check ) ) + { + return i; + } + } + + return -1; +} + +//----------------------------------------------------------------------------- +// Purpose: Counts words which have either a valid start or end byte +// Input : *outwords - +// Output : int +//----------------------------------------------------------------------------- +int CountUsableWords( CSentence& outwords ) +{ + int count = 0; + int numwords = outwords.m_Words.Size(); + // Nothing to do + if ( numwords <= 0 ) + return count; + + for ( int i = 0; i < numwords; i++ ) + { + CWordTag *word = outwords.m_Words[ i ]; + if ( !IsUseable( word ) ) + continue; + + count++; + } + + return count; +} + + +//----------------------------------------------------------------------------- +// Purpose: Counts words which have either a valid start or end byte +// Input : *outwords - +// Output : int +//----------------------------------------------------------------------------- +int CountUnuseableWords( CSentence& outwords ) +{ + int count = 0; + int numwords = outwords.m_Words.Size(); + // Nothing to do + if ( numwords <= 0 ) + return count; + + for ( int i = 0; i < numwords; i++ ) + { + CWordTag *word = outwords.m_Words[ i ]; + if ( IsUseable( word ) ) + continue; + + count++; + } + + return count; +} + +// Keeps same relative spacing, but rebases list +void RepartitionPhonemes( CWordTag *word, unsigned int oldStart, unsigned int oldEnd ) +{ + // Repartition phonemes based on old range + float oldRange = ( float )( oldEnd - oldStart ); + float newRange = ( float )( word->m_uiEndByte - word->m_uiStartByte ); + + for ( int i = 0; i < word->m_Phonemes.Size(); i++ ) + { + CPhonemeTag *tag = word->m_Phonemes[ i ]; + Assert( tag ); + + float frac1 = 0.0f, frac2 = 0.0f; + float delta1, delta2; + + delta1 = ( float ) ( tag->m_uiStartByte - oldStart ); + delta2 = ( float ) ( tag->m_uiEndByte - oldStart ); + if ( oldRange > 0.0f ) + { + frac1 = delta1 / oldRange; + frac2 = delta2 / oldRange; + } + + tag->m_uiStartByte = word->m_uiStartByte + ( unsigned int ) ( frac1 * newRange ); + tag->m_uiEndByte = word->m_uiStartByte + ( unsigned int ) ( frac2 * newRange ); + } +} + +void PartitionWords( CSentence& outwords, int start, int end, int sampleStart, int sampleEnd ) +{ + int wordCount = end - start + 1; + Assert( wordCount >= 1 ); + int stepSize = ( sampleEnd - sampleStart ) / wordCount; + + int currentStart = sampleStart; + + for ( int i = start; i <= end; i++ ) + { + CWordTag *word = outwords.m_Words[ i ]; + Assert( word ); + + unsigned int oldStart = word->m_uiStartByte; + unsigned int oldEnd = word->m_uiEndByte; + + word->m_uiStartByte = currentStart; + word->m_uiEndByte = currentStart + stepSize; + + RepartitionPhonemes( word, oldStart, oldEnd ); + + currentStart += stepSize; + } +} + +void MergeWords( CWordTag *w1, CWordTag *w2 ) +{ + unsigned int start, end; + + start = min( w1->m_uiStartByte, w2->m_uiStartByte ); + end = max( w1->m_uiEndByte, w2->m_uiEndByte ); + + unsigned int mid = ( start + end ) / 2; + + unsigned int oldw1start, oldw2start, oldw1end, oldw2end; + + oldw1start = w1->m_uiStartByte; + oldw2start = w2->m_uiStartByte; + oldw1end = w1->m_uiEndByte; + oldw2end = w2->m_uiEndByte; + + w1->m_uiStartByte = start; + w1->m_uiEndByte = mid; + w2->m_uiStartByte = mid; + w2->m_uiEndByte = end; + + RepartitionPhonemes( w1, oldw1start, oldw1end ); + RepartitionPhonemes( w2, oldw2start, oldw2end ); +} + +void FixupZeroLengthWords( CSentence& outwords ) +{ + while ( 1 ) + { + int i; + for ( i = 0 ; i < outwords.m_Words.Size() - 1; i++ ) + { + CWordTag *current, *next; + + current = outwords.m_Words[ i ]; + next = outwords.m_Words[ i + 1 ]; + + if ( current->m_uiEndByte - current->m_uiStartByte <= 0 ) + { + MergeWords( current, next ); + break; + } + + if ( next->m_uiEndByte - next->m_uiStartByte <= 0 ) + { + MergeWords( current, next ); + break; + } + } + + if ( i >= outwords.m_Words.Size() - 1 ) + { + break; + } + } +} + +void ComputeMissingByteSpans( int numsamples, CSentence& outwords ) +{ + int numwords = outwords.m_Words.Size(); + // Nothing to do + if ( numwords <= 0 ) + return; + + int interationcount = 1; + + while( 1 ) + { + Log( "\nCompute %i\n", interationcount++ ); + LogWords( outwords ); + + int wordNumber; + + // Done! + if ( !CountUnuseableWords( outwords ) ) + { + FixupZeroLengthWords( outwords ); + break; + } + + if ( !CountUsableWords( outwords ) ) + { + // Evenly space words across full sample time + PartitionWords( outwords, 0, numwords - 1, 0, numsamples ); + break; + } + + wordNumber = FindFirstUsableWord( outwords ); + // Not the first word + if ( wordNumber > 0 ) + { + // Repartition all of the unusables and the first one starting at zero over the range + CWordTag *firstUsable = outwords.m_Words[ wordNumber ]; + Assert( firstUsable ); + + if ( firstUsable->m_uiStartByte != 0 ) + { + PartitionWords( outwords, 0, wordNumber - 1, 0, firstUsable->m_uiStartByte ); + } + else + { + PartitionWords( outwords, 0, wordNumber, 0, firstUsable->m_uiEndByte ); + } + + // Start over + continue; + } + + wordNumber = FindLastUsableWord( outwords ); + // Not the last word + if ( wordNumber >= 0 && wordNumber < numwords - 1 ) + { + // Repartition all of the unusables and the first one starting at zero over the range + CWordTag *lastUsable = outwords.m_Words[ wordNumber ]; + Assert( lastUsable ); + + if ( lastUsable->m_uiEndByte != (unsigned int)numsamples ) + { + PartitionWords( outwords, wordNumber + 1, numwords-1, lastUsable->m_uiEndByte, numsamples ); + } + else + { + PartitionWords( outwords, wordNumber, numwords-1, lastUsable->m_uiStartByte, numsamples ); + } + + // Start over + continue; + } + + // If we get here it means that the start and end of the list are okay and we just have to + // iterate across the list and fix things in the middle + int startByte = 0; + int endByte = 0; + for ( int i = 0; i < numwords ; i++ ) + { + CWordTag *word = outwords.m_Words[ i ]; + if ( IsUseable( word ) ) + { + startByte = word->m_uiEndByte; + continue; + } + + // Found the start of a chain of 1 or more unusable words + // Find the startbyte of the next usable word and count how many words we check + int wordCount = 1; + for ( int j = i + 1; j < numwords; j++ ) + { + CWordTag *next = outwords.m_Words[ j ]; + if ( IsUseable( next ) ) + { + endByte = next->m_uiStartByte; + break; + } + + wordCount++; + } + + // Now partition words across the gap and go to start again + PartitionWords( outwords, i, i + wordCount - 1, startByte, endByte ); + break; + } + } +} + +//----------------------------------------------------------------------------- +// Purpose: Given a wavfile and a list of inwords, determines the word/phonene +// sample counts for the sentce +// Input : *wavfile - +// *inwords - +// *outphonemes{ text.Clear( - +// Output : SR_RESULT +//----------------------------------------------------------------------------- +static SR_RESULT SAPI_ExtractPhonemes( + const char *wavfile, + int numsamples, + void (*pfnPrint)( const char *fmt, ... ), + CSentence& inwords, + CSentence& outwords ) +{ + LogReset(); + + USES_CONVERSION; + + CSpDynamicString text; + text.Clear(); + + HKEY hkwipe; + LONG lResult = RegOpenKeyEx( HKEY_CURRENT_USER, "Software\\Microsoft\\Speech\\RecoProfiles", 0, KEY_ALL_ACCESS, &hkwipe ); + if ( lResult == ERROR_SUCCESS ) + { + RecursiveRegDelKey( hkwipe ); + RegCloseKey( hkwipe ); + } + + if ( strlen( inwords.GetText() ) <= 0 ) + { + inwords.SetTextFromWords(); + } + + // Construct a string from the inwords array + text.Append( T2W( inwords.GetText() ) ); + + // Assume failure + SR_RESULT result = SR_RESULT_ERROR; + + if ( text.Length() > 0 ) + { + CSentence sentence; + + pfnPrint( "Processing...\r\n" ); + + // Give it a try + result = ExtractPhonemes( wavfile, text, sentence, pfnPrint ); + + pfnPrint( "Finished.\r\n" ); + // PrintWordsAndPhonemes( sentence, pfnPrint ); + + // Copy results to outputs + outwords.Reset(); + + outwords.SetText( inwords.GetText() ); + + Log( "Starting\n" ); + LogWords( inwords ); + + if ( SR_RESULT_ERROR != result ) + { + int i; + + Log( "Hypothesized\n" ); + LogWords( sentence ); + + for( i = 0 ; i < sentence.m_Words.Size(); i++ ) + { + CWordTag *tag = sentence.m_Words[ i ]; + if ( tag ) + { + // Skip '...' tag + if ( stricmp( tag->GetWord(), "..." ) ) + { + CWordTag *newTag = new CWordTag( *tag ); + + outwords.m_Words.AddToTail( newTag ); + } + } + } + + // Now insert unrecognized/skipped words from original list + // + int frompos = 0, topos = 0; + + while( 1 ) + { + // End of source list + if ( frompos >= inwords.m_Words.Size() ) + break; + + const CWordTag *fromTag = inwords.m_Words[ frompos ]; + + // Reached end of destination list, just copy words over from from source list until + // we run out of source words + if ( topos >= outwords.m_Words.Size() ) + { + // Just copy words over + CWordTag *newWord = new CWordTag( *fromTag ); + + // Remove phonemes + while ( newWord->m_Phonemes.Size() > 0 ) + { + CPhonemeTag *kill = newWord->m_Phonemes[ 0 ]; + newWord->m_Phonemes.Remove( 0 ); + delete kill; + } + + outwords.m_Words.AddToTail( newWord ); + frompos++; + topos++; + continue; + } + + // Destination word + const CWordTag *toTag = outwords.m_Words[ topos ]; + + // Words match, just skip ahead + if ( !stricmp( fromTag->GetWord(), toTag->GetWord() ) ) + { + frompos++; + topos++; + continue; + } + + // The only case we handle is that something in the source wasn't in the destination + + // Find the next source word that appears in the destination + int skipAhead = frompos + 1; + bool found = false; + while ( skipAhead < inwords.m_Words.Size() ) + { + const CWordTag *sourceWord = inwords.m_Words[ skipAhead ]; + if ( !stricmp( sourceWord->GetWord(), toTag->GetWord() ) ) + { + found = true; + break; + } + + skipAhead++; + } + + // Uh oh destination has words that are not in source, just skip to next destination word? + if ( !found ) + { + topos++; + } + else + { + // Copy words from from source list into destination + // + int skipCount = skipAhead - frompos; + + while ( --skipCount>= 0 ) + { + const CWordTag *sourceWord = inwords.m_Words[ frompos++ ]; + CWordTag *newWord = new CWordTag( *sourceWord ); + + // Remove phonemes + while ( newWord->m_Phonemes.Size() > 0 ) + { + CPhonemeTag *kill = newWord->m_Phonemes[ 0 ]; + newWord->m_Phonemes.Remove( 0 ); + delete kill; + } + + outwords.m_Words.InsertBefore( topos, newWord ); + topos++; + } + + frompos++; + topos++; + } + } + + Log( "\nDone simple check\n" ); + + LogWords( outwords ); + LogPhonemes( outwords ); + + ComputeMissingByteSpans( numsamples, outwords ); + + Log( "\nFinal check\n" ); + + LogWords( outwords ); + LogPhonemes( outwords ); + } + } + else + { + pfnPrint( "Input sentence is empty!\n" ); + } + + // Return results + return result; +} + + +//----------------------------------------------------------------------------- +// Purpose: Expose the interface +//----------------------------------------------------------------------------- +class CPhonemeExtractorSAPI : public IPhonemeExtractor +{ +public: + virtual PE_APITYPE GetAPIType() const + { + return SPEECH_API_SAPI; + } + + // Used for menus, etc + virtual char const *GetName() const + { + return "MS SAPI 5.1"; + } + + SR_RESULT Extract( + const char *wavfile, + int numsamples, + void (*pfnPrint)( const char *fmt, ... ), + CSentence& inwords, + CSentence& outwords ) + { + return SAPI_ExtractPhonemes( wavfile, numsamples, pfnPrint, inwords, outwords ); + } +}; + EXPOSE_SINGLE_INTERFACE( CPhonemeExtractorSAPI, IPhonemeExtractor, VPHONEME_EXTRACTOR_INTERFACE );
\ No newline at end of file diff --git a/mp/src/utils/phonemeextractor/phonemeextractor.vpc b/mp/src/utils/phonemeextractor/phonemeextractor.vpc index f6e82e4d..cdcee5c2 100644 --- a/mp/src/utils/phonemeextractor/phonemeextractor.vpc +++ b/mp/src/utils/phonemeextractor/phonemeextractor.vpc @@ -1,83 +1,83 @@ -//-----------------------------------------------------------------------------
-// PHONEMEEXTRACTOR.VPC
-//
-// Project Script
-//-----------------------------------------------------------------------------
-
-$Macro SRCDIR "..\.."
-$Macro OUTBINDIR "$SRCDIR\..\game\bin\phonemeextractors"
-
-$Include "$SRCDIR\vpc_scripts\source_dll_base.vpc"
-
-$Configuration
-{
- $Compiler
- {
- $AdditionalIncludeDirectories "$BASE;../common,../hlfaceposer,../sapi51/include"
- $PreprocessorDefinitions "$BASE;PHONEMEEXTRACTOR_EXPORTS"
-
- // The project has some trouble with the deprecated string function warnings, so turn those off.
- $AdditionalOptions "$BASE /wd4995"
- }
-
- $Linker
- {
- $AdditionalDependencies "$BASE odbc32.lib odbccp32.lib"
- }
-}
-
-$Project "Phonemeextractor"
-{
- $Folder "Source Files"
- {
- $File "extractor_utils.cpp"
- $File "$SRCDIR\public\phonemeconverter.cpp"
- $File "$SRCDIR\public\sentence.cpp"
- $File "phonemeextractor.cpp"
- }
-
- $Folder "Header Files"
- {
- $File "talkback.h"
- }
-
- $Folder "SAPI Header Files"
- { // These are dynamic because sapi might not be present in SDK branches, but we still want VPC to succeed.
- $DynamicFile "..\sapi51\Include\sapi.h"
- $DynamicFile "..\sapi51\Include\sapiddk.h"
- $DynamicFile "..\sapi51\Include\Spddkhlp.h"
- $DynamicFile "..\sapi51\Include\spdebug.h"
- $DynamicFile "..\sapi51\Include\sperror.h"
- $DynamicFile "..\sapi51\Include\sphelper.h"
- }
-
- $Folder "Public Header Files"
- {
- $File "$SRCDIR\public\mathlib\amd3dx.h"
- $File "$SRCDIR\public\tier0\basetypes.h"
- $File "$SRCDIR\public\tier0\commonmacros.h"
- $File "$SRCDIR\public\tier0\dbg.h"
- $File "$SRCDIR\public\tier0\fasttimer.h"
- $File "$SRCDIR\public\appframework\IAppSystem.h"
- $File "$SRCDIR\public\mathlib\mathlib.h"
- $File "$SRCDIR\public\phonemeconverter.h"
- $File "$SRCDIR\public\phonemeextractor\phonemeextractor.h"
- $File "$SRCDIR\public\tier0\platform.h"
- $File "$SRCDIR\public\tier0\protected_things.h"
- $File "$SRCDIR\public\sentence.h"
- $File "$SRCDIR\public\string_t.h"
- $File "$SRCDIR\public\tier1\strtools.h"
- $File "$SRCDIR\public\tier1\utllinkedlist.h"
- $File "$SRCDIR\public\tier1\utlmemory.h"
- $File "$SRCDIR\public\tier1\utlvector.h"
- $File "$SRCDIR\public\mathlib\vector.h"
- $File "$SRCDIR\public\mathlib\vector2d.h"
- $File "$SRCDIR\public\vstdlib\vstdlib.h"
- }
-
- $Folder "Link Libraries"
- {
- $Lib mathlib
- $DynamicFile "..\sapi51\lib\i386\sapi.lib"
- }
-}
+//----------------------------------------------------------------------------- +// PHONEMEEXTRACTOR.VPC +// +// Project Script +//----------------------------------------------------------------------------- + +$Macro SRCDIR "..\.." +$Macro OUTBINDIR "$SRCDIR\..\game\bin\phonemeextractors" + +$Include "$SRCDIR\vpc_scripts\source_dll_base.vpc" + +$Configuration +{ + $Compiler + { + $AdditionalIncludeDirectories "$BASE;../common,../hlfaceposer,../sapi51/include" + $PreprocessorDefinitions "$BASE;PHONEMEEXTRACTOR_EXPORTS" + + // The project has some trouble with the deprecated string function warnings, so turn those off. + $AdditionalOptions "$BASE /wd4995" + } + + $Linker + { + $AdditionalDependencies "$BASE odbc32.lib odbccp32.lib" + } +} + +$Project "Phonemeextractor" +{ + $Folder "Source Files" + { + $File "extractor_utils.cpp" + $File "$SRCDIR\public\phonemeconverter.cpp" + $File "$SRCDIR\public\sentence.cpp" + $File "phonemeextractor.cpp" + } + + $Folder "Header Files" + { + $File "talkback.h" + } + + $Folder "SAPI Header Files" + { // These are dynamic because sapi might not be present in SDK branches, but we still want VPC to succeed. + $DynamicFile "..\sapi51\Include\sapi.h" + $DynamicFile "..\sapi51\Include\sapiddk.h" + $DynamicFile "..\sapi51\Include\Spddkhlp.h" + $DynamicFile "..\sapi51\Include\spdebug.h" + $DynamicFile "..\sapi51\Include\sperror.h" + $DynamicFile "..\sapi51\Include\sphelper.h" + } + + $Folder "Public Header Files" + { + $File "$SRCDIR\public\mathlib\amd3dx.h" + $File "$SRCDIR\public\tier0\basetypes.h" + $File "$SRCDIR\public\tier0\commonmacros.h" + $File "$SRCDIR\public\tier0\dbg.h" + $File "$SRCDIR\public\tier0\fasttimer.h" + $File "$SRCDIR\public\appframework\IAppSystem.h" + $File "$SRCDIR\public\mathlib\mathlib.h" + $File "$SRCDIR\public\phonemeconverter.h" + $File "$SRCDIR\public\phonemeextractor\phonemeextractor.h" + $File "$SRCDIR\public\tier0\platform.h" + $File "$SRCDIR\public\tier0\protected_things.h" + $File "$SRCDIR\public\sentence.h" + $File "$SRCDIR\public\string_t.h" + $File "$SRCDIR\public\tier1\strtools.h" + $File "$SRCDIR\public\tier1\utllinkedlist.h" + $File "$SRCDIR\public\tier1\utlmemory.h" + $File "$SRCDIR\public\tier1\utlvector.h" + $File "$SRCDIR\public\mathlib\vector.h" + $File "$SRCDIR\public\mathlib\vector2d.h" + $File "$SRCDIR\public\vstdlib\vstdlib.h" + } + + $Folder "Link Libraries" + { + $Lib mathlib + $DynamicFile "..\sapi51\lib\i386\sapi.lib" + } +} diff --git a/mp/src/utils/phonemeextractor/phonemeextractor_ims.cpp b/mp/src/utils/phonemeextractor/phonemeextractor_ims.cpp index 70819f8e..29dabab4 100644 --- a/mp/src/utils/phonemeextractor/phonemeextractor_ims.cpp +++ b/mp/src/utils/phonemeextractor/phonemeextractor_ims.cpp @@ -1,1075 +1,1075 @@ -//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-// $NoKeywords: $
-//=============================================================================//
-#include <stdio.h>
-#include <stdarg.h>
-#include <memory.h>
-#include <windows.h>
-#include <mmsystem.h>
-#include <mmreg.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "phonemeextractor/PhonemeExtractor.h"
-#include "ims_helper/ims_helper.h"
-
-#include "tier0/dbg.h"
-#include "sentence.h"
-#include "PhonemeConverter.h"
-#include "tier1/strtools.h"
-
-#define TEXTLESS_WORDNAME "[Textless]"
-
-static IImsHelper *talkback = NULL;
-
-//-----------------------------------------------------------------------------
-// Purpose: Expose the interface
-//-----------------------------------------------------------------------------
-class CPhonemeExtractorLipSinc : public IPhonemeExtractor
-{
-public:
- virtual PE_APITYPE GetAPIType() const
- {
- return SPEECH_API_LIPSINC;
- }
-
- // Used for menus, etc
- virtual char const *GetName() const
- {
- return "IMS (LipSinc)";
- }
-
- SR_RESULT Extract(
- const char *wavfile,
- int numsamples,
- void (*pfnPrint)( const char *fmt, ... ),
- CSentence& inwords,
- CSentence& outwords );
-
-
- CPhonemeExtractorLipSinc( void );
- ~CPhonemeExtractorLipSinc( void );
-
- enum
- {
- MAX_WORD_LENGTH = 128,
- };
-private:
-
-
- class CAnalyzedWord
- {
- public:
- char buffer[ MAX_WORD_LENGTH ];
- double starttime;
- double endtime;
- };
-
- class CAnalyzedPhoneme
- {
- public:
- char phoneme[ 32 ];
- double starttime;
- double endtime;
- };
-
- bool InitLipSinc( void );
- void ShutdownLipSinc( void );
-
- void DescribeError( TALKBACK_ERR err );
- void Printf( char const *fmt, ... );
-
- bool CheckSoundFile( char const *filename );
- bool GetInitialized( void );
- void SetInitialized( bool init );
-
- void (*m_pfnPrint)( const char *fmt, ... );
-
- char const *ConstructInputSentence( CSentence& inwords );
- bool AttemptAnalysis( TALKBACK_ANALYSIS **ppAnalysis, char const *wavfile, CSentence& inwords );
-
- char const *ApplyTBWordRules( char const *word );
-
- void ProcessWords( TALKBACK_ANALYSIS *analysis, CSentence& inwords, CSentence& outwords );
- void ProcessWordsTextless( TALKBACK_ANALYSIS *analysis, CSentence& outwords );
-
- int GetPhonemeIndexAtWord( TALKBACK_ANALYSIS *analysis, double time, bool checkstart );
-
- int GetPhonemeIndexAtWordStart( TALKBACK_ANALYSIS *analysis, double starttime );
- int GetPhonemeIndexAtWordEnd( TALKBACK_ANALYSIS *analysis, double endtime );
-
- CAnalyzedWord *GetAnalyzedWord( TALKBACK_ANALYSIS *analysis, int index );
- CAnalyzedPhoneme *GetAnalyzedPhoneme( TALKBACK_ANALYSIS *analysis, int index );
-
- int ComputeByteFromTime( float time );
-
- bool m_bInitialized;
-
- float m_flSampleCount;
- float m_flDuration;
-
- float m_flSamplesPerSecond;
-
- int m_nBytesPerSample;
-
- HMODULE m_hHelper;
-};
-
-CPhonemeExtractorLipSinc::CPhonemeExtractorLipSinc( void )
-{
- m_hHelper = (HMODULE)0;
- m_pfnPrint = NULL;
-
- m_bInitialized = false;
-
- m_flSampleCount = 0.0f;
- m_flDuration = 0.0f;
-
- m_flSamplesPerSecond = 0.0f;
-
- m_nBytesPerSample = 0;
-}
-
-CPhonemeExtractorLipSinc::~CPhonemeExtractorLipSinc( void )
-{
- if ( GetInitialized() )
- {
- ShutdownLipSinc();
- }
-}
-
-bool CPhonemeExtractorLipSinc::GetInitialized( void )
-{
- return m_bInitialized;
-}
-
-void CPhonemeExtractorLipSinc::SetInitialized( bool init )
-{
- m_bInitialized = init;
-}
-
-int CPhonemeExtractorLipSinc::ComputeByteFromTime( float time )
-{
- if ( !m_flDuration )
- return 0;
-
- float frac = time / m_flDuration;
-
- float sampleNumber = frac * m_flSampleCount;
-
- int bytenumber = sampleNumber * m_nBytesPerSample;
-
- return bytenumber;
-}
-
-void CPhonemeExtractorLipSinc::DescribeError( TALKBACK_ERR err )
-{
- Assert( m_pfnPrint );
-
- // Get the error description.
- char errorDesc[256] = "";
- if ( err != TALKBACK_NOERR )
- {
- talkback->TalkBackGetErrorString( err, sizeof(errorDesc), errorDesc );
- }
-
- // Report or log the error...
- (*m_pfnPrint)( "LIPSINC ERROR: %s\n", errorDesc );
-}
-
-//-----------------------------------------------------------------------------
-// Purpose:
-// Input : *fmt -
-// .. -
-//-----------------------------------------------------------------------------
-void CPhonemeExtractorLipSinc::Printf( char const *fmt, ... )
-{
- Assert( m_pfnPrint );
-
- char string[ 4096 ];
-
- va_list argptr;
- va_start( argptr, fmt );
- vsprintf( string, fmt, argptr );
- va_end( argptr );
-
- (*m_pfnPrint)( "%s", string );
-}
-
-bool CPhonemeExtractorLipSinc::CheckSoundFile( char const *filename )
-{
- TALKBACK_SOUND_FILE_METRICS fm;
- memset( &fm, 0, sizeof( fm ) );
- fm.m_size = sizeof( fm );
-
- TALKBACK_ERR err = talkback->TalkBackGetSoundFileMetrics( filename, &fm );
- if ( err != TALKBACK_NOERR )
- {
- DescribeError( err );
- return false;
- }
-
- if ( fm.m_canBeAnalyzed )
- {
- Printf( "%s: %.2f s, rate %i, bits %i, channels %i\n",
- filename,
- fm.m_duration,
- fm.m_sampleRate,
- fm.m_bitsPerSample,
- fm.m_channelCount );
- }
-
- m_flDuration = fm.m_duration;
- if ( m_flDuration > 0 )
- {
- m_flSamplesPerSecond = m_flSampleCount / m_flDuration;
- }
- else
- {
- m_flSamplesPerSecond = 0.0f;
- }
-
- m_nBytesPerSample = ( fm.m_bitsPerSample >> 3 );
-
- m_flSampleCount /= m_nBytesPerSample;
-
- m_nBytesPerSample /= fm.m_channelCount;
-
- return fm.m_canBeAnalyzed ? true : false;
-}
-
-typedef IImsHelper *(*pfnImsHelper)(void);
-
-//-----------------------------------------------------------------------------
-// Purpose:
-// Output : Returns true on success, false on failure.
-//-----------------------------------------------------------------------------
-bool CPhonemeExtractorLipSinc::InitLipSinc( void )
-{
- if ( GetInitialized() )
- {
- return true;
- }
-
- m_hHelper = LoadLibrary( "ims_helper.dll" );
- if ( !m_hHelper )
- {
- return false;
- }
-
- pfnImsHelper factory = (pfnImsHelper)::GetProcAddress( m_hHelper, "GetImsHelper" );
- if ( !factory )
- {
- FreeLibrary( m_hHelper );
- return false;
- }
-
- talkback = reinterpret_cast< IImsHelper * >( (*factory)() );
- if ( !talkback )
- {
- FreeLibrary( m_hHelper );
- return false;
- }
-
- char szExeName[ MAX_PATH ];
- szExeName[0] = 0;
- GetModuleFileName( (HMODULE)0, szExeName, sizeof( szExeName ) );
-
- char szBaseDir[ MAX_PATH ];
- Q_strncpy( szBaseDir, szExeName, sizeof( szBaseDir ) );
-
- Q_StripLastDir( szBaseDir, sizeof( szBaseDir ) );
- Q_StripTrailingSlash( szBaseDir );
- Q_strlower( szBaseDir );
-
- char coreDataDir[ 512 ];
- Q_snprintf( coreDataDir, sizeof( coreDataDir ), "%s\\lipsinc_data\\",
- szBaseDir );
- Q_FixSlashes( coreDataDir );
-
- char szCheck[ 512 ];
- Q_snprintf( szCheck, sizeof( szCheck ), "%sDtC6dal.dat", coreDataDir );
- struct __stat64 buf;
-
- if ( _stat64( szCheck, &buf ) != 0 )
- {
- Q_snprintf( coreDataDir, sizeof( coreDataDir ), "%s\\bin\\lipsinc_data\\",
- szBaseDir );
- Q_FixSlashes( coreDataDir );
- Q_snprintf( szCheck, sizeof( szCheck ), "%sDtC6dal.dat", coreDataDir );
-
- if ( _stat64( szCheck, &buf ) != 0 )
- {
- Error( "Unable to find talkback data files in %s.", coreDataDir );
- }
- }
-
- TALKBACK_ERR err;
-
- err = talkback->TalkBackStartupLibrary( coreDataDir );
- if ( err != TALKBACK_NOERR )
- {
- DescribeError( err );
- FreeLibrary( m_hHelper );
- return false;
- }
-
- long verMajor = 0;
- long verMinor = 0;
- long verRevision = 0;
-
- err = talkback->TalkBackGetVersion(
- &verMajor,
- &verMinor,
- &verRevision);
- if ( err != TALKBACK_NOERR )
- {
- DescribeError( err );
- FreeLibrary( m_hHelper );
- return false;
- }
-
- Printf( "Lipsinc TalkBack Version %i.%i.%i\n", verMajor, verMinor, verRevision );
-
- m_bInitialized = true;
-
- return true;
-}
-
-//-----------------------------------------------------------------------------
-// Purpose:
-//-----------------------------------------------------------------------------
-void CPhonemeExtractorLipSinc::ShutdownLipSinc( void )
-{
- // HACK HACK: This seems to crash on exit sometimes
- __try
- {
- talkback->TalkBackShutdownLibrary();
-
- FreeLibrary( m_hHelper );
- }
- __except(EXCEPTION_EXECUTE_HANDLER )
- {
- OutputDebugString( "----> Crash shutting down TALKBACK sdk, exception caught and ignored\n" );
- }
-}
-
-//-----------------------------------------------------------------------------
-// Purpose:
-// Input : inwords -
-// Output : char const
-//-----------------------------------------------------------------------------
-char const *CPhonemeExtractorLipSinc::ConstructInputSentence( CSentence& inwords )
-{
- static char sentence[ 16384 ];
-
- sentence[ 0 ] = 0;
-
- int last = inwords.m_Words.Size() - 1;
-
- for ( int i = 0 ; i <= last; i++ )
- {
- CWordTag *w = inwords.m_Words[ i ];
-
- strcat( sentence, w->GetWord() );
- if ( i != last )
- {
- strcat( sentence, " " );
- }
- }
-
- if ( inwords.m_Words.Count() == 1 &&
- !Q_strnicmp( inwords.GetText(), TEXTLESS_WORDNAME, Q_strlen( TEXTLESS_WORDNAME ) ) )
- {
- sentence[ 0 ] = 0;
- }
-
- return sentence;
-}
-
-bool CPhonemeExtractorLipSinc::AttemptAnalysis( TALKBACK_ANALYSIS **ppAnalysis, char const *wavfile, CSentence& inwords )
-{
- *ppAnalysis = NULL;
-
- TALKBACK_ANALYSIS_SETTINGS settings;
- memset( &settings, 0, sizeof( settings ) );
-
- // Set this field to sizeof(TALKBACK_ANALYSIS_SETTINGS) before using the
- // structure.
- settings.fSize = sizeof( TALKBACK_ANALYSIS_SETTINGS );
-
-
- // Default value: 30 (frames per second).
- settings.fFrameRate = 100;
- // Set this to 1 to optimize for flipbook output, 0 to do analysis normally.
- //
- // Default value: 0 (normal analysis).
- settings.fOptimizeForFlipbook = 0;
- // Set this to -1 to seed the random number generator with the current time.
- // Any other number will be used directly for the random number seed, which
- // is useful if you want repeatable speech gestures. This value does not
- // influence lip-synching at all.
- //
- // Default value: -1 (use current time).
- settings.fRandomSeed = -1;
- // Path to the configuration (.INI) file with phoneme-to-speech-target
- // mapping. Set this to NULL to use the default mapping.
- //
- // Default value: NULL (use default mapping).
- settings.fConfigFile = NULL;
-
- char const *text = ConstructInputSentence( inwords );
-
- Printf( "Analyzing: \"%s\"\n", text[ 0 ] ? text : TEXTLESS_WORDNAME );
-
- TALKBACK_ERR err = talkback->TalkBackGetAnalysis(
- ppAnalysis,
- wavfile,
- text,
- &settings );
-
- if ( err != TALKBACK_NOERR )
- {
- DescribeError( err );
- return false;
- }
-
- Printf( "Analysis successful...\n" );
-
- return true;
-}
-
-typedef struct
-{
- TALKBACK_PHONEME phoneme;
- char const *string;
-} TBPHONEMES_t;
-
-static TBPHONEMES_t g_TBPhonemeList[]=
-{
- { TALKBACK_PHONEME_IY, "iy" },
- { TALKBACK_PHONEME_IH, "ih" },
- { TALKBACK_PHONEME_EH, "eh" },
- { TALKBACK_PHONEME_EY, "ey" },
- { TALKBACK_PHONEME_AE, "ae" },
- { TALKBACK_PHONEME_AA, "aa" },
- { TALKBACK_PHONEME_AW, "aw" },
- { TALKBACK_PHONEME_AY, "ay" },
- { TALKBACK_PHONEME_AH, "ah" },
- { TALKBACK_PHONEME_AO, "ao" },
- { TALKBACK_PHONEME_OY, "oy" },
- { TALKBACK_PHONEME_OW, "ow" },
- { TALKBACK_PHONEME_UH, "uh" },
- { TALKBACK_PHONEME_UW, "uw" },
- { TALKBACK_PHONEME_ER, "er" },
- { TALKBACK_PHONEME_AX, "ax" },
- { TALKBACK_PHONEME_S, "s" },
- { TALKBACK_PHONEME_SH, "sh" },
- { TALKBACK_PHONEME_Z, "z" },
- { TALKBACK_PHONEME_ZH, "zh" },
- { TALKBACK_PHONEME_F, "f" },
- { TALKBACK_PHONEME_TH, "th" },
- { TALKBACK_PHONEME_V, "v" },
- { TALKBACK_PHONEME_DH, "dh" },
- { TALKBACK_PHONEME_M, "m" },
- { TALKBACK_PHONEME_N, "n" },
- { TALKBACK_PHONEME_NG, "ng" },
- { TALKBACK_PHONEME_L, "l" },
- { TALKBACK_PHONEME_R, "r" },
- { TALKBACK_PHONEME_W, "w" },
- { TALKBACK_PHONEME_Y, "y" },
- { TALKBACK_PHONEME_HH, "hh" },
- { TALKBACK_PHONEME_B, "b" },
- { TALKBACK_PHONEME_D, "d" },
- { TALKBACK_PHONEME_JH, "jh" },
- { TALKBACK_PHONEME_G, "g" },
- { TALKBACK_PHONEME_P, "p" },
- { TALKBACK_PHONEME_T, "t" },
- { TALKBACK_PHONEME_K, "k" },
- { TALKBACK_PHONEME_CH, "ch" },
- { TALKBACK_PHONEME_SIL, "<sil>" },
- { -1, NULL }
-};
-
-char const *TBPhonemeToString( TALKBACK_PHONEME phoneme )
-{
- if ( phoneme < TALKBACK_PHONEME_FIRST || phoneme > TALKBACK_PHONEME_LAST )
- {
- return "Bogus";
- }
-
- TBPHONEMES_t *item = &g_TBPhonemeList[ phoneme ];
- return item->string;
-}
-
-//-----------------------------------------------------------------------------
-// Purpose:
-// Input : *analysis -
-// time -
-// start -
-// Output : int
-//-----------------------------------------------------------------------------
-int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWord( TALKBACK_ANALYSIS *analysis, double time, bool start )
-{
- long count;
-
- TALKBACK_ERR err = talkback->TalkBackGetNumPhonemes( analysis, &count );
- if ( err != TALKBACK_NOERR )
- {
- DescribeError( err );
- return -1;
- }
-
- if ( count <= 0L )
- return -1;
-
- // Bogus
- if ( count >= 100000L )
- return -1;
-
- for ( int i = 0; i < (int)count; i++ )
- {
- TALKBACK_PHONEME tbPhoneme = TALKBACK_PHONEME_INVALID;
- err = talkback->TalkBackGetPhonemeEnum( analysis, i, &tbPhoneme );
- if ( err != TALKBACK_NOERR )
- {
- DescribeError( err );
- continue;
- }
-
- double t;
-
- if ( start )
- {
- err = talkback->TalkBackGetPhonemeStartTime( analysis, i, &t );
- }
- else
- {
- err = talkback->TalkBackGetPhonemeEndTime( analysis, i, &t );
- }
-
- if ( err != TALKBACK_NOERR )
- {
- DescribeError( err );
- continue;
- }
-
- if ( t == time )
- {
- return i;
- }
- }
-
- return -1;
-}
-
-//-----------------------------------------------------------------------------
-// Purpose:
-// Input : *analysis -
-// starttime -
-// Output : int
-//-----------------------------------------------------------------------------
-int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWordStart( TALKBACK_ANALYSIS *analysis, double starttime )
-{
- return GetPhonemeIndexAtWord( analysis, starttime, true );
-}
-
-//-----------------------------------------------------------------------------
-// Purpose:
-// Input : *analysis -
-// endtime -
-// Output : int
-//-----------------------------------------------------------------------------
-int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWordEnd( TALKBACK_ANALYSIS *analysis, double endtime )
-{
- return GetPhonemeIndexAtWord( analysis, endtime, false );
-}
-
-CPhonemeExtractorLipSinc::CAnalyzedPhoneme *CPhonemeExtractorLipSinc::GetAnalyzedPhoneme( TALKBACK_ANALYSIS *analysis, int index )
-{
- static CAnalyzedPhoneme p;
-
- memset( &p, 0, sizeof( p ) );
-
- TALKBACK_PHONEME tb;
-
- TALKBACK_ERR err = talkback->TalkBackGetPhonemeEnum( analysis, index, &tb );
- if ( err != TALKBACK_NOERR )
- {
- DescribeError( err );
- return NULL;
- }
-
- strcpy( p.phoneme, TBPhonemeToString( tb ) );
-
- err = talkback->TalkBackGetPhonemeStartTime( analysis, index, &p.starttime );
- if ( err != TALKBACK_NOERR )
- {
- DescribeError( err );
- return NULL;
- }
- err = talkback->TalkBackGetPhonemeEndTime( analysis, index, &p.endtime );
- if ( err != TALKBACK_NOERR )
- {
- DescribeError( err );
- return NULL;
- }
-
- return &p;
-}
-
-CPhonemeExtractorLipSinc::CAnalyzedWord *CPhonemeExtractorLipSinc::GetAnalyzedWord( TALKBACK_ANALYSIS *analysis, int index )
-{
- static CAnalyzedWord w;
-
- memset( &w, 0, sizeof( w ) );
-
- long chars = sizeof( w.buffer );
-
- TALKBACK_ERR err = talkback->TalkBackGetWord( analysis, index, chars, w.buffer );
- if ( err != TALKBACK_NOERR )
- {
- DescribeError( err );
- return NULL;
- }
-
- err = talkback->TalkBackGetWordStartTime( analysis, index, &w.starttime );
- if ( err != TALKBACK_NOERR )
- {
- DescribeError( err );
- return NULL;
- }
- err = talkback->TalkBackGetWordEndTime( analysis, index, &w.endtime );
- if ( err != TALKBACK_NOERR )
- {
- DescribeError( err );
- return NULL;
- }
-
- return &w;
-}
-
-//-----------------------------------------------------------------------------
-// Purpose:
-// Input : *w1 -
-// *w2 -
-// Output : Returns true on success, false on failure.
-//-----------------------------------------------------------------------------
-bool FuzzyWordMatch( char const *w1, char const *w2 )
-{
- int len1 = strlen( w1 );
- int len2 = strlen( w2 );
-
- int minlen = min( len1, len2 );
-
- // Found a match
- if ( !strnicmp( w1, w2, minlen ) )
- return true;
-
- int letterdiff = abs( len1 - len2 );
- // More than three letters different, don't bother
- if ( letterdiff > 5 )
- return false;
-
- // Compute a "delta"
- char *p1 = (char *)w1;
- char *p2 = (char *)w2;
-
- CUtlVector <char> word1;
- CUtlVector <char> word2;
-
- while ( *p1 )
- {
- if ( V_isalpha( *p1 ) )
- {
- word1.AddToTail( *p1 );
- }
- p1++;
- }
-
- while ( *p2 )
- {
- if ( V_isalpha( *p2 ) )
- {
- word2.AddToTail( *p2 );
- }
- p2++;
- }
-
- int i;
- for ( i = 0; i < word1.Size(); i++ )
- {
- char c = word1[ i ];
-
- // See if c is in word 2, if so subtract it out
- int idx = word2.Find( c );
-
- if ( idx != word2.InvalidIndex() )
- {
- word2.Remove( idx );
- }
- }
-
- if ( word2.Size() <= letterdiff )
- return true;
-
- word2.RemoveAll();
-
- while ( *p2 )
- {
- if ( V_isalpha( *p2 ) )
- {
- word2.AddToTail( *p2 );
- }
- p2++;
- }
-
- for ( i = 0; i < word2.Size(); i++ )
- {
- char c = word2[ i ];
-
- // See if c is in word 2, if so subtract it out
- int idx = word1.Find( c );
-
- if ( idx != word1.InvalidIndex() )
- {
- word1.Remove( idx );
- }
- }
-
- if ( word1.Size() <= letterdiff )
- return true;
-
- return false;
-}
-
-//-----------------------------------------------------------------------------
-// Purpose: For foreign language stuff, if inwords is empty, process anyway...
-// Input : *analysis -
-// outwords -
-//-----------------------------------------------------------------------------
-void CPhonemeExtractorLipSinc::ProcessWordsTextless( TALKBACK_ANALYSIS *analysis, CSentence& outwords )
-{
- long count;
-
- TALKBACK_ERR err = talkback->TalkBackGetNumPhonemes( analysis, &count );
- if ( err != TALKBACK_NOERR )
- {
- DescribeError( err );
- return;
- }
-
- CWordTag *newWord = new CWordTag;
-
- newWord->SetWord( TEXTLESS_WORDNAME );
-
- float starttime = 0.0f;
- float endtime = 1.0f;
-
-
- for ( int i = 0; i < count; ++i )
- {
- // Get phoneme and timing info
- CAnalyzedPhoneme *ph = GetAnalyzedPhoneme( analysis, i );
- if ( !ph )
- continue;
-
- CPhonemeTag *ptag = new CPhonemeTag;
-
- if ( i == 0 || ( ph->starttime < starttime ) )
- {
- starttime = ph->starttime;
- }
-
- if ( i == 0 || ( ph->endtime > endtime ) )
- {
- endtime = ph->endtime;
- }
-
- ptag->SetStartTime( ph->starttime );
- ptag->SetEndTime( ph->endtime );
-
- ptag->m_uiStartByte = ComputeByteFromTime( ph->starttime );
- ptag->m_uiEndByte = ComputeByteFromTime( ph->endtime );
-
- ptag->SetTag( ph->phoneme );
- ptag->SetPhonemeCode( TextToPhoneme( ptag->GetTag() ) );
-
- newWord->m_Phonemes.AddToTail( ptag );
- }
-
- newWord->m_flStartTime = starttime;
- newWord->m_flEndTime = endtime;
-
- newWord->m_uiStartByte = ComputeByteFromTime( starttime );
- newWord->m_uiEndByte = ComputeByteFromTime( endtime );
-
- outwords.Reset();
- outwords.AddWordTag( newWord );
- outwords.SetTextFromWords();
-}
-
-//-----------------------------------------------------------------------------
-// Purpose:
-// Input : *analysis -
-// inwords -
-// outwords -
-//-----------------------------------------------------------------------------
-void CPhonemeExtractorLipSinc::ProcessWords( TALKBACK_ANALYSIS *analysis, CSentence& inwords, CSentence& outwords )
-{
- long count;
-
- TALKBACK_ERR err = talkback->TalkBackGetNumWords( analysis, &count );
- if ( err != TALKBACK_NOERR )
- {
- DescribeError( err );
- return;
- }
-
- if ( count <= 0L )
- {
- if ( inwords.m_Words.Count() == 0 ||
- !Q_strnicmp( inwords.GetText(), TEXTLESS_WORDNAME, Q_strlen( TEXTLESS_WORDNAME ) ) )
- {
- ProcessWordsTextless( analysis, outwords );
- }
- return;
- }
-
- // Bogus
- if ( count >= 100000L )
- return;
-
- int inwordpos = 0;
- int awordpos = 0;
-
- outwords.Reset();
-
- char previous[ 256 ];
- previous[ 0 ] = 0;
-
- while ( inwordpos < inwords.m_Words.Size() )
- {
- CWordTag *in = inwords.m_Words[ inwordpos ];
-
- if ( awordpos >= count )
- {
- // Just copy the rest over without phonemes
- CWordTag *copy = new CWordTag( *in );
-
- outwords.AddWordTag( copy );
-
- inwordpos++;
- continue;
- }
-
- // Should never fail
- CAnalyzedWord *w = GetAnalyzedWord( analysis, awordpos );
- if ( !w )
- {
- return;
- }
-
- if ( !stricmp( w->buffer, "<SIL>" ) )
- {
- awordpos++;
- continue;
- }
-
- char const *check = ApplyTBWordRules( in->GetWord() );
- if ( !FuzzyWordMatch( check, w->buffer ) )
- {
- bool advance_input = true;
- if ( previous[ 0 ] )
- {
- if ( FuzzyWordMatch( previous, w->buffer ) )
- {
- advance_input = false;
- }
- }
-
- if ( advance_input )
- {
- inwordpos++;
- }
- awordpos++;
- continue;
- }
- strcpy( previous, check );
-
- CWordTag *newWord = new CWordTag;
-
- newWord->SetWord( in->GetWord() );
-
- newWord->m_flStartTime = w->starttime;
- newWord->m_flEndTime = w->endtime;
-
- newWord->m_uiStartByte = ComputeByteFromTime( w->starttime );
- newWord->m_uiEndByte = ComputeByteFromTime( w->endtime );
-
- int phonemestart, phonemeend;
-
- phonemestart = GetPhonemeIndexAtWordStart( analysis, w->starttime );
- phonemeend = GetPhonemeIndexAtWordEnd( analysis, w->endtime );
-
- if ( phonemestart >= 0 && phonemeend >= 0 )
- {
- for ( ; phonemestart <= phonemeend; phonemestart++ )
- {
- // Get phoneme and timing info
- CAnalyzedPhoneme *ph = GetAnalyzedPhoneme( analysis, phonemestart );
- if ( !ph )
- continue;
-
- CPhonemeTag *ptag = new CPhonemeTag;
- ptag->SetStartTime( ph->starttime );
- ptag->SetEndTime( ph->endtime );
-
- ptag->m_uiStartByte = ComputeByteFromTime( ph->starttime );
- ptag->m_uiEndByte = ComputeByteFromTime( ph->endtime );
-
- ptag->SetTag( ph->phoneme );
- ptag->SetPhonemeCode( TextToPhoneme( ptag->GetTag() ) );
-
- newWord->m_Phonemes.AddToTail( ptag );
- }
- }
-
- outwords.AddWordTag( newWord );
- inwordpos++;
- awordpos++;
- }
-}
-
-char const *CPhonemeExtractorLipSinc::ApplyTBWordRules( char const *word )
-{
- static char outword[ 256 ];
-
- char const *in = word;
- char *out = outword;
-
- while ( *in && ( ( out - outword ) <= 255 ) )
- {
- if ( *in == '\t' ||
- *in == ' ' ||
- *in == '\n' ||
- *in == '-' ||
- *in == '.' ||
- *in == ',' ||
- *in == ';' ||
- *in == '?' ||
- *in == '"' ||
- *in == ':' ||
- *in == '(' ||
- *in == ')' )
- {
- in++;
- *out++ = ' ';
- continue;
- }
-
- if ( !V_isprint( *in ) )
- {
- in++;
- continue;
- }
-
- if ( *in >= 128 )
- {
- in++;
- continue;
- }
-
- // Skip numbers
- if ( *in >= '0' && *in <= '9' )
- {
- in++;
- continue;
- }
-
- // Convert all letters to upper case
- if ( *in >= 'a' && *in <= 'z' )
- {
- *out++ = ( *in++ ) - 'a' + 'A';
- continue;
- }
-
- if ( *in >= 'A' && *in <= 'Z' )
- {
- *out++ = *in++;
- continue;
- }
-
- if ( *in == '\'' )
- {
- *out++ = *in++;
- continue;
- }
-
- in++;
- }
-
- *out = 0;
-
- return outword;
-}
-
-//-----------------------------------------------------------------------------
-// Purpose: Given a wavfile and a list of inwords, determines the word/phonene
-// sample counts for the sentce
-// Output : SR_RESULT
-//-----------------------------------------------------------------------------
-SR_RESULT CPhonemeExtractorLipSinc::Extract(
- const char *wavfile,
- int numsamples,
- void (*pfnPrint)( const char *fmt, ... ),
- CSentence& inwords,
- CSentence& outwords )
-{
- // g_enableTalkBackDebuggingOutput = 1;
-
- m_pfnPrint = pfnPrint;
-
- if ( !InitLipSinc() )
- {
- return SR_RESULT_ERROR;
- }
-
- m_flSampleCount = numsamples;
-
- if ( !CheckSoundFile( wavfile ) )
- {
- FreeLibrary( m_hHelper );
- return SR_RESULT_ERROR;
- }
-
- TALKBACK_ANALYSIS *analysis = NULL;
-
- if ( !AttemptAnalysis( &analysis, wavfile, inwords ) )
- {
- FreeLibrary( m_hHelper );
- return SR_RESULT_FAILED;
- }
-
- if ( strlen( inwords.GetText() ) <= 0 )
- {
- inwords.SetTextFromWords();
- }
-
- outwords = inwords;
-
- // Examine data
- ProcessWords( analysis, inwords, outwords );
-
- if ( analysis )
- {
- talkback->TalkBackFreeAnalysis( &analysis );
- }
-
- return SR_RESULT_SUCCESS;
-}
-
+//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +// $NoKeywords: $ +//=============================================================================// +#include <stdio.h> +#include <stdarg.h> +#include <memory.h> +#include <windows.h> +#include <mmsystem.h> +#include <mmreg.h> +#include <sys/types.h> +#include <sys/stat.h> + +#include "phonemeextractor/PhonemeExtractor.h" +#include "ims_helper/ims_helper.h" + +#include "tier0/dbg.h" +#include "sentence.h" +#include "PhonemeConverter.h" +#include "tier1/strtools.h" + +#define TEXTLESS_WORDNAME "[Textless]" + +static IImsHelper *talkback = NULL; + +//----------------------------------------------------------------------------- +// Purpose: Expose the interface +//----------------------------------------------------------------------------- +class CPhonemeExtractorLipSinc : public IPhonemeExtractor +{ +public: + virtual PE_APITYPE GetAPIType() const + { + return SPEECH_API_LIPSINC; + } + + // Used for menus, etc + virtual char const *GetName() const + { + return "IMS (LipSinc)"; + } + + SR_RESULT Extract( + const char *wavfile, + int numsamples, + void (*pfnPrint)( const char *fmt, ... ), + CSentence& inwords, + CSentence& outwords ); + + + CPhonemeExtractorLipSinc( void ); + ~CPhonemeExtractorLipSinc( void ); + + enum + { + MAX_WORD_LENGTH = 128, + }; +private: + + + class CAnalyzedWord + { + public: + char buffer[ MAX_WORD_LENGTH ]; + double starttime; + double endtime; + }; + + class CAnalyzedPhoneme + { + public: + char phoneme[ 32 ]; + double starttime; + double endtime; + }; + + bool InitLipSinc( void ); + void ShutdownLipSinc( void ); + + void DescribeError( TALKBACK_ERR err ); + void Printf( char const *fmt, ... ); + + bool CheckSoundFile( char const *filename ); + bool GetInitialized( void ); + void SetInitialized( bool init ); + + void (*m_pfnPrint)( const char *fmt, ... ); + + char const *ConstructInputSentence( CSentence& inwords ); + bool AttemptAnalysis( TALKBACK_ANALYSIS **ppAnalysis, char const *wavfile, CSentence& inwords ); + + char const *ApplyTBWordRules( char const *word ); + + void ProcessWords( TALKBACK_ANALYSIS *analysis, CSentence& inwords, CSentence& outwords ); + void ProcessWordsTextless( TALKBACK_ANALYSIS *analysis, CSentence& outwords ); + + int GetPhonemeIndexAtWord( TALKBACK_ANALYSIS *analysis, double time, bool checkstart ); + + int GetPhonemeIndexAtWordStart( TALKBACK_ANALYSIS *analysis, double starttime ); + int GetPhonemeIndexAtWordEnd( TALKBACK_ANALYSIS *analysis, double endtime ); + + CAnalyzedWord *GetAnalyzedWord( TALKBACK_ANALYSIS *analysis, int index ); + CAnalyzedPhoneme *GetAnalyzedPhoneme( TALKBACK_ANALYSIS *analysis, int index ); + + int ComputeByteFromTime( float time ); + + bool m_bInitialized; + + float m_flSampleCount; + float m_flDuration; + + float m_flSamplesPerSecond; + + int m_nBytesPerSample; + + HMODULE m_hHelper; +}; + +CPhonemeExtractorLipSinc::CPhonemeExtractorLipSinc( void ) +{ + m_hHelper = (HMODULE)0; + m_pfnPrint = NULL; + + m_bInitialized = false; + + m_flSampleCount = 0.0f; + m_flDuration = 0.0f; + + m_flSamplesPerSecond = 0.0f; + + m_nBytesPerSample = 0; +} + +CPhonemeExtractorLipSinc::~CPhonemeExtractorLipSinc( void ) +{ + if ( GetInitialized() ) + { + ShutdownLipSinc(); + } +} + +bool CPhonemeExtractorLipSinc::GetInitialized( void ) +{ + return m_bInitialized; +} + +void CPhonemeExtractorLipSinc::SetInitialized( bool init ) +{ + m_bInitialized = init; +} + +int CPhonemeExtractorLipSinc::ComputeByteFromTime( float time ) +{ + if ( !m_flDuration ) + return 0; + + float frac = time / m_flDuration; + + float sampleNumber = frac * m_flSampleCount; + + int bytenumber = sampleNumber * m_nBytesPerSample; + + return bytenumber; +} + +void CPhonemeExtractorLipSinc::DescribeError( TALKBACK_ERR err ) +{ + Assert( m_pfnPrint ); + + // Get the error description. + char errorDesc[256] = ""; + if ( err != TALKBACK_NOERR ) + { + talkback->TalkBackGetErrorString( err, sizeof(errorDesc), errorDesc ); + } + + // Report or log the error... + (*m_pfnPrint)( "LIPSINC ERROR: %s\n", errorDesc ); +} + +//----------------------------------------------------------------------------- +// Purpose: +// Input : *fmt - +// .. - +//----------------------------------------------------------------------------- +void CPhonemeExtractorLipSinc::Printf( char const *fmt, ... ) +{ + Assert( m_pfnPrint ); + + char string[ 4096 ]; + + va_list argptr; + va_start( argptr, fmt ); + vsprintf( string, fmt, argptr ); + va_end( argptr ); + + (*m_pfnPrint)( "%s", string ); +} + +bool CPhonemeExtractorLipSinc::CheckSoundFile( char const *filename ) +{ + TALKBACK_SOUND_FILE_METRICS fm; + memset( &fm, 0, sizeof( fm ) ); + fm.m_size = sizeof( fm ); + + TALKBACK_ERR err = talkback->TalkBackGetSoundFileMetrics( filename, &fm ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return false; + } + + if ( fm.m_canBeAnalyzed ) + { + Printf( "%s: %.2f s, rate %i, bits %i, channels %i\n", + filename, + fm.m_duration, + fm.m_sampleRate, + fm.m_bitsPerSample, + fm.m_channelCount ); + } + + m_flDuration = fm.m_duration; + if ( m_flDuration > 0 ) + { + m_flSamplesPerSecond = m_flSampleCount / m_flDuration; + } + else + { + m_flSamplesPerSecond = 0.0f; + } + + m_nBytesPerSample = ( fm.m_bitsPerSample >> 3 ); + + m_flSampleCount /= m_nBytesPerSample; + + m_nBytesPerSample /= fm.m_channelCount; + + return fm.m_canBeAnalyzed ? true : false; +} + +typedef IImsHelper *(*pfnImsHelper)(void); + +//----------------------------------------------------------------------------- +// Purpose: +// Output : Returns true on success, false on failure. +//----------------------------------------------------------------------------- +bool CPhonemeExtractorLipSinc::InitLipSinc( void ) +{ + if ( GetInitialized() ) + { + return true; + } + + m_hHelper = LoadLibrary( "ims_helper.dll" ); + if ( !m_hHelper ) + { + return false; + } + + pfnImsHelper factory = (pfnImsHelper)::GetProcAddress( m_hHelper, "GetImsHelper" ); + if ( !factory ) + { + FreeLibrary( m_hHelper ); + return false; + } + + talkback = reinterpret_cast< IImsHelper * >( (*factory)() ); + if ( !talkback ) + { + FreeLibrary( m_hHelper ); + return false; + } + + char szExeName[ MAX_PATH ]; + szExeName[0] = 0; + GetModuleFileName( (HMODULE)0, szExeName, sizeof( szExeName ) ); + + char szBaseDir[ MAX_PATH ]; + Q_strncpy( szBaseDir, szExeName, sizeof( szBaseDir ) ); + + Q_StripLastDir( szBaseDir, sizeof( szBaseDir ) ); + Q_StripTrailingSlash( szBaseDir ); + Q_strlower( szBaseDir ); + + char coreDataDir[ 512 ]; + Q_snprintf( coreDataDir, sizeof( coreDataDir ), "%s\\lipsinc_data\\", + szBaseDir ); + Q_FixSlashes( coreDataDir ); + + char szCheck[ 512 ]; + Q_snprintf( szCheck, sizeof( szCheck ), "%sDtC6dal.dat", coreDataDir ); + struct __stat64 buf; + + if ( _stat64( szCheck, &buf ) != 0 ) + { + Q_snprintf( coreDataDir, sizeof( coreDataDir ), "%s\\bin\\lipsinc_data\\", + szBaseDir ); + Q_FixSlashes( coreDataDir ); + Q_snprintf( szCheck, sizeof( szCheck ), "%sDtC6dal.dat", coreDataDir ); + + if ( _stat64( szCheck, &buf ) != 0 ) + { + Error( "Unable to find talkback data files in %s.", coreDataDir ); + } + } + + TALKBACK_ERR err; + + err = talkback->TalkBackStartupLibrary( coreDataDir ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + FreeLibrary( m_hHelper ); + return false; + } + + long verMajor = 0; + long verMinor = 0; + long verRevision = 0; + + err = talkback->TalkBackGetVersion( + &verMajor, + &verMinor, + &verRevision); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + FreeLibrary( m_hHelper ); + return false; + } + + Printf( "Lipsinc TalkBack Version %i.%i.%i\n", verMajor, verMinor, verRevision ); + + m_bInitialized = true; + + return true; +} + +//----------------------------------------------------------------------------- +// Purpose: +//----------------------------------------------------------------------------- +void CPhonemeExtractorLipSinc::ShutdownLipSinc( void ) +{ + // HACK HACK: This seems to crash on exit sometimes + __try + { + talkback->TalkBackShutdownLibrary(); + + FreeLibrary( m_hHelper ); + } + __except(EXCEPTION_EXECUTE_HANDLER ) + { + OutputDebugString( "----> Crash shutting down TALKBACK sdk, exception caught and ignored\n" ); + } +} + +//----------------------------------------------------------------------------- +// Purpose: +// Input : inwords - +// Output : char const +//----------------------------------------------------------------------------- +char const *CPhonemeExtractorLipSinc::ConstructInputSentence( CSentence& inwords ) +{ + static char sentence[ 16384 ]; + + sentence[ 0 ] = 0; + + int last = inwords.m_Words.Size() - 1; + + for ( int i = 0 ; i <= last; i++ ) + { + CWordTag *w = inwords.m_Words[ i ]; + + strcat( sentence, w->GetWord() ); + if ( i != last ) + { + strcat( sentence, " " ); + } + } + + if ( inwords.m_Words.Count() == 1 && + !Q_strnicmp( inwords.GetText(), TEXTLESS_WORDNAME, Q_strlen( TEXTLESS_WORDNAME ) ) ) + { + sentence[ 0 ] = 0; + } + + return sentence; +} + +bool CPhonemeExtractorLipSinc::AttemptAnalysis( TALKBACK_ANALYSIS **ppAnalysis, char const *wavfile, CSentence& inwords ) +{ + *ppAnalysis = NULL; + + TALKBACK_ANALYSIS_SETTINGS settings; + memset( &settings, 0, sizeof( settings ) ); + + // Set this field to sizeof(TALKBACK_ANALYSIS_SETTINGS) before using the + // structure. + settings.fSize = sizeof( TALKBACK_ANALYSIS_SETTINGS ); + + + // Default value: 30 (frames per second). + settings.fFrameRate = 100; + // Set this to 1 to optimize for flipbook output, 0 to do analysis normally. + // + // Default value: 0 (normal analysis). + settings.fOptimizeForFlipbook = 0; + // Set this to -1 to seed the random number generator with the current time. + // Any other number will be used directly for the random number seed, which + // is useful if you want repeatable speech gestures. This value does not + // influence lip-synching at all. + // + // Default value: -1 (use current time). + settings.fRandomSeed = -1; + // Path to the configuration (.INI) file with phoneme-to-speech-target + // mapping. Set this to NULL to use the default mapping. + // + // Default value: NULL (use default mapping). + settings.fConfigFile = NULL; + + char const *text = ConstructInputSentence( inwords ); + + Printf( "Analyzing: \"%s\"\n", text[ 0 ] ? text : TEXTLESS_WORDNAME ); + + TALKBACK_ERR err = talkback->TalkBackGetAnalysis( + ppAnalysis, + wavfile, + text, + &settings ); + + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return false; + } + + Printf( "Analysis successful...\n" ); + + return true; +} + +typedef struct +{ + TALKBACK_PHONEME phoneme; + char const *string; +} TBPHONEMES_t; + +static TBPHONEMES_t g_TBPhonemeList[]= +{ + { TALKBACK_PHONEME_IY, "iy" }, + { TALKBACK_PHONEME_IH, "ih" }, + { TALKBACK_PHONEME_EH, "eh" }, + { TALKBACK_PHONEME_EY, "ey" }, + { TALKBACK_PHONEME_AE, "ae" }, + { TALKBACK_PHONEME_AA, "aa" }, + { TALKBACK_PHONEME_AW, "aw" }, + { TALKBACK_PHONEME_AY, "ay" }, + { TALKBACK_PHONEME_AH, "ah" }, + { TALKBACK_PHONEME_AO, "ao" }, + { TALKBACK_PHONEME_OY, "oy" }, + { TALKBACK_PHONEME_OW, "ow" }, + { TALKBACK_PHONEME_UH, "uh" }, + { TALKBACK_PHONEME_UW, "uw" }, + { TALKBACK_PHONEME_ER, "er" }, + { TALKBACK_PHONEME_AX, "ax" }, + { TALKBACK_PHONEME_S, "s" }, + { TALKBACK_PHONEME_SH, "sh" }, + { TALKBACK_PHONEME_Z, "z" }, + { TALKBACK_PHONEME_ZH, "zh" }, + { TALKBACK_PHONEME_F, "f" }, + { TALKBACK_PHONEME_TH, "th" }, + { TALKBACK_PHONEME_V, "v" }, + { TALKBACK_PHONEME_DH, "dh" }, + { TALKBACK_PHONEME_M, "m" }, + { TALKBACK_PHONEME_N, "n" }, + { TALKBACK_PHONEME_NG, "ng" }, + { TALKBACK_PHONEME_L, "l" }, + { TALKBACK_PHONEME_R, "r" }, + { TALKBACK_PHONEME_W, "w" }, + { TALKBACK_PHONEME_Y, "y" }, + { TALKBACK_PHONEME_HH, "hh" }, + { TALKBACK_PHONEME_B, "b" }, + { TALKBACK_PHONEME_D, "d" }, + { TALKBACK_PHONEME_JH, "jh" }, + { TALKBACK_PHONEME_G, "g" }, + { TALKBACK_PHONEME_P, "p" }, + { TALKBACK_PHONEME_T, "t" }, + { TALKBACK_PHONEME_K, "k" }, + { TALKBACK_PHONEME_CH, "ch" }, + { TALKBACK_PHONEME_SIL, "<sil>" }, + { -1, NULL } +}; + +char const *TBPhonemeToString( TALKBACK_PHONEME phoneme ) +{ + if ( phoneme < TALKBACK_PHONEME_FIRST || phoneme > TALKBACK_PHONEME_LAST ) + { + return "Bogus"; + } + + TBPHONEMES_t *item = &g_TBPhonemeList[ phoneme ]; + return item->string; +} + +//----------------------------------------------------------------------------- +// Purpose: +// Input : *analysis - +// time - +// start - +// Output : int +//----------------------------------------------------------------------------- +int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWord( TALKBACK_ANALYSIS *analysis, double time, bool start ) +{ + long count; + + TALKBACK_ERR err = talkback->TalkBackGetNumPhonemes( analysis, &count ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return -1; + } + + if ( count <= 0L ) + return -1; + + // Bogus + if ( count >= 100000L ) + return -1; + + for ( int i = 0; i < (int)count; i++ ) + { + TALKBACK_PHONEME tbPhoneme = TALKBACK_PHONEME_INVALID; + err = talkback->TalkBackGetPhonemeEnum( analysis, i, &tbPhoneme ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + continue; + } + + double t; + + if ( start ) + { + err = talkback->TalkBackGetPhonemeStartTime( analysis, i, &t ); + } + else + { + err = talkback->TalkBackGetPhonemeEndTime( analysis, i, &t ); + } + + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + continue; + } + + if ( t == time ) + { + return i; + } + } + + return -1; +} + +//----------------------------------------------------------------------------- +// Purpose: +// Input : *analysis - +// starttime - +// Output : int +//----------------------------------------------------------------------------- +int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWordStart( TALKBACK_ANALYSIS *analysis, double starttime ) +{ + return GetPhonemeIndexAtWord( analysis, starttime, true ); +} + +//----------------------------------------------------------------------------- +// Purpose: +// Input : *analysis - +// endtime - +// Output : int +//----------------------------------------------------------------------------- +int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWordEnd( TALKBACK_ANALYSIS *analysis, double endtime ) +{ + return GetPhonemeIndexAtWord( analysis, endtime, false ); +} + +CPhonemeExtractorLipSinc::CAnalyzedPhoneme *CPhonemeExtractorLipSinc::GetAnalyzedPhoneme( TALKBACK_ANALYSIS *analysis, int index ) +{ + static CAnalyzedPhoneme p; + + memset( &p, 0, sizeof( p ) ); + + TALKBACK_PHONEME tb; + + TALKBACK_ERR err = talkback->TalkBackGetPhonemeEnum( analysis, index, &tb ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return NULL; + } + + strcpy( p.phoneme, TBPhonemeToString( tb ) ); + + err = talkback->TalkBackGetPhonemeStartTime( analysis, index, &p.starttime ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return NULL; + } + err = talkback->TalkBackGetPhonemeEndTime( analysis, index, &p.endtime ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return NULL; + } + + return &p; +} + +CPhonemeExtractorLipSinc::CAnalyzedWord *CPhonemeExtractorLipSinc::GetAnalyzedWord( TALKBACK_ANALYSIS *analysis, int index ) +{ + static CAnalyzedWord w; + + memset( &w, 0, sizeof( w ) ); + + long chars = sizeof( w.buffer ); + + TALKBACK_ERR err = talkback->TalkBackGetWord( analysis, index, chars, w.buffer ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return NULL; + } + + err = talkback->TalkBackGetWordStartTime( analysis, index, &w.starttime ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return NULL; + } + err = talkback->TalkBackGetWordEndTime( analysis, index, &w.endtime ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return NULL; + } + + return &w; +} + +//----------------------------------------------------------------------------- +// Purpose: +// Input : *w1 - +// *w2 - +// Output : Returns true on success, false on failure. +//----------------------------------------------------------------------------- +bool FuzzyWordMatch( char const *w1, char const *w2 ) +{ + int len1 = strlen( w1 ); + int len2 = strlen( w2 ); + + int minlen = min( len1, len2 ); + + // Found a match + if ( !strnicmp( w1, w2, minlen ) ) + return true; + + int letterdiff = abs( len1 - len2 ); + // More than three letters different, don't bother + if ( letterdiff > 5 ) + return false; + + // Compute a "delta" + char *p1 = (char *)w1; + char *p2 = (char *)w2; + + CUtlVector <char> word1; + CUtlVector <char> word2; + + while ( *p1 ) + { + if ( V_isalpha( *p1 ) ) + { + word1.AddToTail( *p1 ); + } + p1++; + } + + while ( *p2 ) + { + if ( V_isalpha( *p2 ) ) + { + word2.AddToTail( *p2 ); + } + p2++; + } + + int i; + for ( i = 0; i < word1.Size(); i++ ) + { + char c = word1[ i ]; + + // See if c is in word 2, if so subtract it out + int idx = word2.Find( c ); + + if ( idx != word2.InvalidIndex() ) + { + word2.Remove( idx ); + } + } + + if ( word2.Size() <= letterdiff ) + return true; + + word2.RemoveAll(); + + while ( *p2 ) + { + if ( V_isalpha( *p2 ) ) + { + word2.AddToTail( *p2 ); + } + p2++; + } + + for ( i = 0; i < word2.Size(); i++ ) + { + char c = word2[ i ]; + + // See if c is in word 2, if so subtract it out + int idx = word1.Find( c ); + + if ( idx != word1.InvalidIndex() ) + { + word1.Remove( idx ); + } + } + + if ( word1.Size() <= letterdiff ) + return true; + + return false; +} + +//----------------------------------------------------------------------------- +// Purpose: For foreign language stuff, if inwords is empty, process anyway... +// Input : *analysis - +// outwords - +//----------------------------------------------------------------------------- +void CPhonemeExtractorLipSinc::ProcessWordsTextless( TALKBACK_ANALYSIS *analysis, CSentence& outwords ) +{ + long count; + + TALKBACK_ERR err = talkback->TalkBackGetNumPhonemes( analysis, &count ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return; + } + + CWordTag *newWord = new CWordTag; + + newWord->SetWord( TEXTLESS_WORDNAME ); + + float starttime = 0.0f; + float endtime = 1.0f; + + + for ( int i = 0; i < count; ++i ) + { + // Get phoneme and timing info + CAnalyzedPhoneme *ph = GetAnalyzedPhoneme( analysis, i ); + if ( !ph ) + continue; + + CPhonemeTag *ptag = new CPhonemeTag; + + if ( i == 0 || ( ph->starttime < starttime ) ) + { + starttime = ph->starttime; + } + + if ( i == 0 || ( ph->endtime > endtime ) ) + { + endtime = ph->endtime; + } + + ptag->SetStartTime( ph->starttime ); + ptag->SetEndTime( ph->endtime ); + + ptag->m_uiStartByte = ComputeByteFromTime( ph->starttime ); + ptag->m_uiEndByte = ComputeByteFromTime( ph->endtime ); + + ptag->SetTag( ph->phoneme ); + ptag->SetPhonemeCode( TextToPhoneme( ptag->GetTag() ) ); + + newWord->m_Phonemes.AddToTail( ptag ); + } + + newWord->m_flStartTime = starttime; + newWord->m_flEndTime = endtime; + + newWord->m_uiStartByte = ComputeByteFromTime( starttime ); + newWord->m_uiEndByte = ComputeByteFromTime( endtime ); + + outwords.Reset(); + outwords.AddWordTag( newWord ); + outwords.SetTextFromWords(); +} + +//----------------------------------------------------------------------------- +// Purpose: +// Input : *analysis - +// inwords - +// outwords - +//----------------------------------------------------------------------------- +void CPhonemeExtractorLipSinc::ProcessWords( TALKBACK_ANALYSIS *analysis, CSentence& inwords, CSentence& outwords ) +{ + long count; + + TALKBACK_ERR err = talkback->TalkBackGetNumWords( analysis, &count ); + if ( err != TALKBACK_NOERR ) + { + DescribeError( err ); + return; + } + + if ( count <= 0L ) + { + if ( inwords.m_Words.Count() == 0 || + !Q_strnicmp( inwords.GetText(), TEXTLESS_WORDNAME, Q_strlen( TEXTLESS_WORDNAME ) ) ) + { + ProcessWordsTextless( analysis, outwords ); + } + return; + } + + // Bogus + if ( count >= 100000L ) + return; + + int inwordpos = 0; + int awordpos = 0; + + outwords.Reset(); + + char previous[ 256 ]; + previous[ 0 ] = 0; + + while ( inwordpos < inwords.m_Words.Size() ) + { + CWordTag *in = inwords.m_Words[ inwordpos ]; + + if ( awordpos >= count ) + { + // Just copy the rest over without phonemes + CWordTag *copy = new CWordTag( *in ); + + outwords.AddWordTag( copy ); + + inwordpos++; + continue; + } + + // Should never fail + CAnalyzedWord *w = GetAnalyzedWord( analysis, awordpos ); + if ( !w ) + { + return; + } + + if ( !stricmp( w->buffer, "<SIL>" ) ) + { + awordpos++; + continue; + } + + char const *check = ApplyTBWordRules( in->GetWord() ); + if ( !FuzzyWordMatch( check, w->buffer ) ) + { + bool advance_input = true; + if ( previous[ 0 ] ) + { + if ( FuzzyWordMatch( previous, w->buffer ) ) + { + advance_input = false; + } + } + + if ( advance_input ) + { + inwordpos++; + } + awordpos++; + continue; + } + strcpy( previous, check ); + + CWordTag *newWord = new CWordTag; + + newWord->SetWord( in->GetWord() ); + + newWord->m_flStartTime = w->starttime; + newWord->m_flEndTime = w->endtime; + + newWord->m_uiStartByte = ComputeByteFromTime( w->starttime ); + newWord->m_uiEndByte = ComputeByteFromTime( w->endtime ); + + int phonemestart, phonemeend; + + phonemestart = GetPhonemeIndexAtWordStart( analysis, w->starttime ); + phonemeend = GetPhonemeIndexAtWordEnd( analysis, w->endtime ); + + if ( phonemestart >= 0 && phonemeend >= 0 ) + { + for ( ; phonemestart <= phonemeend; phonemestart++ ) + { + // Get phoneme and timing info + CAnalyzedPhoneme *ph = GetAnalyzedPhoneme( analysis, phonemestart ); + if ( !ph ) + continue; + + CPhonemeTag *ptag = new CPhonemeTag; + ptag->SetStartTime( ph->starttime ); + ptag->SetEndTime( ph->endtime ); + + ptag->m_uiStartByte = ComputeByteFromTime( ph->starttime ); + ptag->m_uiEndByte = ComputeByteFromTime( ph->endtime ); + + ptag->SetTag( ph->phoneme ); + ptag->SetPhonemeCode( TextToPhoneme( ptag->GetTag() ) ); + + newWord->m_Phonemes.AddToTail( ptag ); + } + } + + outwords.AddWordTag( newWord ); + inwordpos++; + awordpos++; + } +} + +char const *CPhonemeExtractorLipSinc::ApplyTBWordRules( char const *word ) +{ + static char outword[ 256 ]; + + char const *in = word; + char *out = outword; + + while ( *in && ( ( out - outword ) <= 255 ) ) + { + if ( *in == '\t' || + *in == ' ' || + *in == '\n' || + *in == '-' || + *in == '.' || + *in == ',' || + *in == ';' || + *in == '?' || + *in == '"' || + *in == ':' || + *in == '(' || + *in == ')' ) + { + in++; + *out++ = ' '; + continue; + } + + if ( !V_isprint( *in ) ) + { + in++; + continue; + } + + if ( *in >= 128 ) + { + in++; + continue; + } + + // Skip numbers + if ( *in >= '0' && *in <= '9' ) + { + in++; + continue; + } + + // Convert all letters to upper case + if ( *in >= 'a' && *in <= 'z' ) + { + *out++ = ( *in++ ) - 'a' + 'A'; + continue; + } + + if ( *in >= 'A' && *in <= 'Z' ) + { + *out++ = *in++; + continue; + } + + if ( *in == '\'' ) + { + *out++ = *in++; + continue; + } + + in++; + } + + *out = 0; + + return outword; +} + +//----------------------------------------------------------------------------- +// Purpose: Given a wavfile and a list of inwords, determines the word/phonene +// sample counts for the sentce +// Output : SR_RESULT +//----------------------------------------------------------------------------- +SR_RESULT CPhonemeExtractorLipSinc::Extract( + const char *wavfile, + int numsamples, + void (*pfnPrint)( const char *fmt, ... ), + CSentence& inwords, + CSentence& outwords ) +{ + // g_enableTalkBackDebuggingOutput = 1; + + m_pfnPrint = pfnPrint; + + if ( !InitLipSinc() ) + { + return SR_RESULT_ERROR; + } + + m_flSampleCount = numsamples; + + if ( !CheckSoundFile( wavfile ) ) + { + FreeLibrary( m_hHelper ); + return SR_RESULT_ERROR; + } + + TALKBACK_ANALYSIS *analysis = NULL; + + if ( !AttemptAnalysis( &analysis, wavfile, inwords ) ) + { + FreeLibrary( m_hHelper ); + return SR_RESULT_FAILED; + } + + if ( strlen( inwords.GetText() ) <= 0 ) + { + inwords.SetTextFromWords(); + } + + outwords = inwords; + + // Examine data + ProcessWords( analysis, inwords, outwords ); + + if ( analysis ) + { + talkback->TalkBackFreeAnalysis( &analysis ); + } + + return SR_RESULT_SUCCESS; +} + EXPOSE_SINGLE_INTERFACE( CPhonemeExtractorLipSinc, IPhonemeExtractor, VPHONEME_EXTRACTOR_INTERFACE );
\ No newline at end of file diff --git a/mp/src/utils/phonemeextractor/phonemeextractor_ims.vpc b/mp/src/utils/phonemeextractor/phonemeextractor_ims.vpc index e3df0327..72d7a82b 100644 --- a/mp/src/utils/phonemeextractor/phonemeextractor_ims.vpc +++ b/mp/src/utils/phonemeextractor/phonemeextractor_ims.vpc @@ -1,98 +1,98 @@ -//-----------------------------------------------------------------------------
-// PHONEMEEXTRACTOR_IMS.VPC
-//
-// Project Script
-//-----------------------------------------------------------------------------
-
-$Macro SRCDIR "..\.."
-$Macro OUTBINDIR "$SRCDIR\..\game\bin\phonemeextractors"
-
-$Include "$SRCDIR\vpc_scripts\source_dll_base.vpc"
-
-$Configuration
-{
- $Compiler
- {
- $AdditionalIncludeDirectories "$BASE;../common,../hlfaceposer,../sapi51/include"
- $PreprocessorDefinitions "$BASE;PHONEMEEXTRACTOR_EXPORTS"
- }
-
- $Linker
- {
- $AdditionalDependencies "$BASE odbc32.lib odbccp32.lib"
- }
-}
-
-$Configuration "Debug"
-{
- $General
- {
- $OutputDirectory ".\Debug_ims" [$WIN32]
- $IntermediateDirectory ".\Debug_ims" [$WIN32]
- }
-}
-
-$Configuration "Release"
-{
- $General
- {
- $OutputDirectory ".\Release_ims" [$WIN32]
- $IntermediateDirectory ".\Release_ims" [$WIN32]
- }
-}
-
-$Project "Phonemeextractor_ims"
-{
- $Folder "Source Files"
- {
- $File "extractor_utils.cpp"
- $File "$SRCDIR\public\phonemeconverter.cpp"
- $File "$SRCDIR\public\sentence.cpp"
- $File "phonemeextractor_ims.cpp"
- }
-
- $Folder "Header Files"
- {
- $File "talkback.h"
- }
-
- $Folder "SAPI Header Files"
- {
- $File "..\sapi51\Include\sapi.h"
- $File "..\sapi51\Include\sapiddk.h"
- $File "..\sapi51\Include\Spddkhlp.h"
- $File "..\sapi51\Include\spdebug.h"
- $File "..\sapi51\Include\sperror.h"
- $File "..\sapi51\Include\sphelper.h"
- }
-
- $Folder "Public Header Files"
- {
- $File "$SRCDIR\public\mathlib\amd3dx.h"
- $File "$SRCDIR\public\tier0\basetypes.h"
- $File "$SRCDIR\public\tier0\commonmacros.h"
- $File "$SRCDIR\public\tier0\dbg.h"
- $File "$SRCDIR\public\tier0\fasttimer.h"
- $File "$SRCDIR\public\appframework\IAppSystem.h"
- $File "$SRCDIR\public\mathlib\mathlib.h"
- $File "$SRCDIR\public\phonemeconverter.h"
- $File "$SRCDIR\public\phonemeextractor\phonemeextractor.h"
- $File "$SRCDIR\public\tier0\platform.h"
- $File "$SRCDIR\public\tier0\protected_things.h"
- $File "$SRCDIR\public\sentence.h"
- $File "$SRCDIR\public\string_t.h"
- $File "$SRCDIR\public\tier1\strtools.h"
- $File "$SRCDIR\public\tier1\utllinkedlist.h"
- $File "$SRCDIR\public\tier1\utlmemory.h"
- $File "$SRCDIR\public\tier1\utlvector.h"
- $File "$SRCDIR\public\mathlib\vector.h"
- $File "$SRCDIR\public\mathlib\vector2d.h"
- $File "$SRCDIR\public\vstdlib\vstdlib.h"
- }
-
- $Folder "Link Libraries"
- {
- $Lib mathlib
- $File "..\sapi51\lib\i386\sapi.lib"
- }
-}
+//----------------------------------------------------------------------------- +// PHONEMEEXTRACTOR_IMS.VPC +// +// Project Script +//----------------------------------------------------------------------------- + +$Macro SRCDIR "..\.." +$Macro OUTBINDIR "$SRCDIR\..\game\bin\phonemeextractors" + +$Include "$SRCDIR\vpc_scripts\source_dll_base.vpc" + +$Configuration +{ + $Compiler + { + $AdditionalIncludeDirectories "$BASE;../common,../hlfaceposer,../sapi51/include" + $PreprocessorDefinitions "$BASE;PHONEMEEXTRACTOR_EXPORTS" + } + + $Linker + { + $AdditionalDependencies "$BASE odbc32.lib odbccp32.lib" + } +} + +$Configuration "Debug" +{ + $General + { + $OutputDirectory ".\Debug_ims" [$WIN32] + $IntermediateDirectory ".\Debug_ims" [$WIN32] + } +} + +$Configuration "Release" +{ + $General + { + $OutputDirectory ".\Release_ims" [$WIN32] + $IntermediateDirectory ".\Release_ims" [$WIN32] + } +} + +$Project "Phonemeextractor_ims" +{ + $Folder "Source Files" + { + $File "extractor_utils.cpp" + $File "$SRCDIR\public\phonemeconverter.cpp" + $File "$SRCDIR\public\sentence.cpp" + $File "phonemeextractor_ims.cpp" + } + + $Folder "Header Files" + { + $File "talkback.h" + } + + $Folder "SAPI Header Files" + { + $File "..\sapi51\Include\sapi.h" + $File "..\sapi51\Include\sapiddk.h" + $File "..\sapi51\Include\Spddkhlp.h" + $File "..\sapi51\Include\spdebug.h" + $File "..\sapi51\Include\sperror.h" + $File "..\sapi51\Include\sphelper.h" + } + + $Folder "Public Header Files" + { + $File "$SRCDIR\public\mathlib\amd3dx.h" + $File "$SRCDIR\public\tier0\basetypes.h" + $File "$SRCDIR\public\tier0\commonmacros.h" + $File "$SRCDIR\public\tier0\dbg.h" + $File "$SRCDIR\public\tier0\fasttimer.h" + $File "$SRCDIR\public\appframework\IAppSystem.h" + $File "$SRCDIR\public\mathlib\mathlib.h" + $File "$SRCDIR\public\phonemeconverter.h" + $File "$SRCDIR\public\phonemeextractor\phonemeextractor.h" + $File "$SRCDIR\public\tier0\platform.h" + $File "$SRCDIR\public\tier0\protected_things.h" + $File "$SRCDIR\public\sentence.h" + $File "$SRCDIR\public\string_t.h" + $File "$SRCDIR\public\tier1\strtools.h" + $File "$SRCDIR\public\tier1\utllinkedlist.h" + $File "$SRCDIR\public\tier1\utlmemory.h" + $File "$SRCDIR\public\tier1\utlvector.h" + $File "$SRCDIR\public\mathlib\vector.h" + $File "$SRCDIR\public\mathlib\vector2d.h" + $File "$SRCDIR\public\vstdlib\vstdlib.h" + } + + $Folder "Link Libraries" + { + $Lib mathlib + $File "..\sapi51\lib\i386\sapi.lib" + } +} diff --git a/mp/src/utils/phonemeextractor/talkback.h b/mp/src/utils/phonemeextractor/talkback.h index 3a1b179a..bb6ee808 100644 --- a/mp/src/utils/phonemeextractor/talkback.h +++ b/mp/src/utils/phonemeextractor/talkback.h @@ -1,732 +1,732 @@ -//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-// $NoKeywords: $
-//
-//=============================================================================//
-// =============================================================================
-// Interface to the LIPSinc TalkBack 1.1 library (TalkBack_*.lib).
-//
-// Copyright � 1998-2002 LIPSinc. All rights reserved.
-
-#if !defined(TalkBack_h)
-#define TalkBack_h
-
-#include <stddef.h> // size_t.
-
-// Enforce a C API.
-#if defined(__cplusplus)
-extern "C"
-{
-#endif
-
-// -----------------------------------------------------------------------------
-// Use the preprocessor to make the new API compatible with the old one.
-
-#define TalkbackStartupLibrary TalkBackStartupLibrary
-#define TalkbackShutdownLibrary TalkBackShutdownLibrary
-#define TalkbackGetVersion TalkBackGetVersion
-#define TalkbackGetVersionString TalkBackGetVersionString
-#define TalkbackCheckSoundFile TalkBackCheckSoundFile
-#define TalkbackCheckSpokenText TalkBackCheckSpokenText
-#define TalkbackGetErrorString TalkBackGetErrorString
-#define TalkbackGetAnalysis TalkBackGetAnalysis
-#define TalkbackFreeAnalysis TalkBackFreeAnalysis
-#define TalkbackGetFirstFrameNum TalkBackGetFirstFrameNum
-#define TalkbackGetLastFrameNum TalkBackGetLastFrameNum
-#define TalkbackGetFrameStartTime TalkBackGetFrameStartTime
-#define TalkbackGetFrameEndTime TalkBackGetFrameEndTime
-#define TalkbackGetNumPhonemes TalkBackGetNumPhonemes
-#define TalkbackGetPhonemeEnum TalkBackGetPhonemeEnum
-#define TalkbackGetPhonemeStartTime TalkBackGetPhonemeStartTime
-#define TalkbackGetPhonemeEndTime TalkBackGetPhonemeEndTime
-#define TalkbackInsertPhoneme TalkBackInsertPhoneme
-#define TalkbackDeletePhoneme TalkBackDeletePhoneme
-#define TalkbackChangePhonemeStart TalkBackChangePhonemeStart
-#define TalkbackChangePhonemeEnd TalkBackChangePhonemeEnd
-#define TalkbackChangePhonemeEnum TalkBackChangePhonemeEnum
-#define TalkbackGetNumWords TalkBackGetNumWords
-#define TalkbackGetWord TalkBackGetWord
-#define TalkbackGetWordStartTime TalkBackGetWordStartTime
-#define TalkbackGetWordEndTime TalkBackGetWordEndTime
-#define TalkbackGetNumSpeechTargetTracks TalkBackGetNumSpeechTargetTracks
-#define TalkbackGetNumSpeechTargetKeys TalkBackGetNumSpeechTargetKeys
-#define TalkbackGetSpeechTargetKeyInfo TalkBackGetSpeechTargetKeyInfo
-#define TalkbackGetSpeechTargetValueAtFrame TalkBackGetSpeechTargetValueAtFrame
-#define TalkbackGetDominantSpeechTargetAtFrame TalkBackGetDominantSpeechTargetAtFrame
-#define TalkbackGetSpeechTargetValueAtTime TalkBackGetSpeechTargetValueAtTime
-#define TalkbackGetSpeechTargetDerivativesAtTime TalkBackGetSpeechTargetDerivativesAtTime
-#define TalkbackGetNumGestureTracks TalkBackGetNumGestureTracks
-#define TalkbackGetNumGestureKeys TalkBackGetNumGestureKeys
-#define TalkbackGetGestureKeyInfo TalkBackGetGestureKeyInfo
-#define TalkbackGetGestureValueAtFrame TalkBackGetGestureValueAtFrame
-#define TalkbackGetGestureValueAtTime TalkBackGetGestureValueAtTime
-#define TalkbackGetGestureDerivativesAtTime TalkBackGetGestureDerivativesAtTime
-
-// -----------------------------------------------------------------------------
-// For debug builds, set this to a non-zero value to get verbose debugging
-// output from TalkBack.
-
-extern int g_enableTalkBackDebuggingOutput;
-
-// -----------------------------------------------------------------------------
-// Miscellaneous constants.
-
-// For calling TalkBackGetAnalysis() with all defaults.
-#define TALKBACK_DEFAULT_SETTINGS NULL
-
-// For setting the iSoundText parameter in TalkBackGetAnalysis() to "no text."
-#define TALKBACK_NO_TEXT NULL
-
-// Handy constants for TALKBACK_ANALYSIS_SETTINGS fields:
-
- // For setting fSize.
-#define TALKBACK_SETTINGS_SIZE sizeof(TALKBACK_ANALYSIS_SETTINGS)
- // For setting fFrameRate to the
- // default.
-#define TALKBACK_DEFAULT_FRAME_RATE 30
- // For setting fOptimizeForFlipbook
- // to *not* optimize for flipbook.
-#define TALKBACK_OPTIMIZE_FOR_FLIPBOOK_OFF 0
- // For setting fOptimizeForFlipbook
- // to optimize for flipbook.
-#define TALKBACK_OPTIMIZE_FOR_FLIPBOOK_ON 1
- // For setting fRandomSeed to use the
- // current time to seed the random
- // number generator and thereby get
- // non-deterministic speech gestures.
-#define TALKBACK_RANDOM_SEED -1
- // For setting fConfigFile to "no
- // config file."
-#define TALKBACK_NO_CONFIG_FILE NULL
-
-// -----------------------------------------------------------------------------
-// Data types.
-
-// TALKBACK_NOERR if successful, TalkBack error code if not.
-typedef long TALKBACK_ERR;
-
-// Opaque analysis results.
-typedef void TALKBACK_ANALYSIS;
-
-// Speech target.
-typedef long TALKBACK_SPEECH_TARGET;
-
-// Speech gesture.
-typedef long TALKBACK_GESTURE;
-
-// Phoneme.
-typedef long TALKBACK_PHONEME;
-
-// -----------------------------------------------------------------------------
-// Data structures.
-
-#pragma pack(push, 1)
-
-// Optional analysis settings passed to TalkBackGetAnalysis().
-typedef struct
-{
- // Set this field to sizeof(TALKBACK_ANALYSIS_SETTINGS) before using the
- // structure.
- long fSize;
- // Frame rate for analysis. This only matters if you will be using *AtFrame
- // functions.
- //
- // Default value: 30 (frames per second).
- long fFrameRate;
- // Set this to 1 to optimize for flipbook output, 0 to do analysis normally.
- //
- // Default value: 0 (normal analysis).
- long fOptimizeForFlipbook;
- // Set this to -1 to seed the random number generator with the current time.
- // Any other number will be used directly for the random number seed, which
- // is useful if you want repeatable speech gestures. This value does not
- // influence lip-synching at all.
- //
- // Default value: -1 (use current time).
- long fRandomSeed;
- // Path to the configuration (.INI) file with phoneme-to-speech-target
- // mapping. Set this to NULL to use the default mapping.
- //
- // Default value: NULL (use default mapping).
- char const *fConfigFile;
-} TALKBACK_ANALYSIS_SETTINGS;
-
-typedef struct
-{
- // Set this field to sizeof(TALKBACK_SOUND_FILE_METRICS) before using the
- // structure. This will allow the structure to evolve if necessary.
- size_t m_size;
- // Bits per sample.
- long m_bitsPerSample;
- // Sample rate in Hertz.
- long m_sampleRate;
- // Duration of the audio in seconds.
- double m_duration;
- // 1 if the sound file can be analyzed, 0 if not.
- long m_canBeAnalyzed;
- // 1 if the sound file is clipped, 0 if not.
- long m_isClipped;
- // The decibel range of the sound file.
- double m_decibelRange;
- // A quality value for the sound file: the nominal range is 0 to 100. Try
- // to keep it above 45 for good results.
- int m_quality;
-
- // Added for version 2 of the metrics structure:
- // ---------------------------------------------
- // The number of channels in the sound file: 1 for mono, 2 for stereo, etc.
- long m_channelCount;
-} TALKBACK_SOUND_FILE_METRICS;
-
-#pragma pack(pop)
-
-// -----------------------------------------------------------------------------
-// Constants.
-
-// TalkBack error codes. Use TalkBackGetErrorString() to return text
-// descriptions for these codes.
-enum
-{
- // Windows convention: set this bit to indicate an application-defined error
- // code.
- BIT29 = (1 << 29),
- // Success (not an error).
- TALKBACK_NOERR = 0,
- // The first error code: useful for iterating through the error codes.
- TALKBACK_ERROR_FIRST = 4201 | BIT29,
- // Generic error.
- TALKBACK_ERROR = TALKBACK_ERROR_FIRST,
- // TalkBackStartupLibrary() failed [internal error] or was never called.
- TALKBACK_STARTUP_FAILED_ERR,
- // TalkBackShutdownLibrary() failed, either because
- // TalkBackStartupLibrary() was never called or because
- // TalkBackShutdownLibrary() has already been called.
- TALKBACK_SHUTDOWN_FAILED_ERR,
- // The TalkBack data files could not be found [invalid path or missing
- // files].
- TALKBACK_CORE_DATA_NOT_FOUND_ERR,
- // One or more of the parameters are NULL.
- TALKBACK_NULL_PARAMETER_ERR,
- // One or more of the parameters is invalid.
- TALKBACK_INVALID_PARAMETER_ERR,
- // The analysis object pointer is invalid.
- TALKBACK_INVALID_ANALYSIS_ERR,
- // Analysis failed [the sound file cannot be analyzed or an internal error
- // occurred].
- TALKBACK_ANALYSIS_FAILED_ERR,
- // One or more of the indices (track, key, frame, word, phoneme) are
- // invalid (out of range).
- TALKBACK_INVALID_INDEX_ERR,
- // The time parameter is invalid (out of range).
- TALKBACK_INVALID_TIME_ERR,
- // A serious internal error occurred in TalkBack; please alert LIPSinc by
- // sending mail with a description of how the error was triggered to
- // [email protected].
- TALKBACK_INTERNAL_ERR,
- // Could not open the specified sound file.
- TALKBACK_COULD_NOT_LOAD_SOUND_ERR,
- // TalkBackStartupLibrary() has not been called.
- TALKBACK_STARTUP_NOT_CALLED,
- // The configuration file specified in the TALKBACK_ANALYSIS_SETTINGS
- // structure is invalid.
- TALKBACK_CONFIG_PARSE_ERROR,
- // The last error code: useful for iterating through the error codes.
- TALKBACK_ERROR_LAST = TALKBACK_CONFIG_PARSE_ERROR
-};
-
-// Default lip-synching track identifiers.
-//
-// NOTE: these track identifiers apply *only* to the default phoneme-to-track
-// mapping! Consult the TalkBack Reference Guide for more details.
-//
-// NOTE: these values are valid *only* if you use the default mapping and are
-// provided as a convenience. If you use your own mapping, these values
-// are invalid and should not be used.
-
-enum
-{
- TALKBACK_SPEECH_TARGET_INVALID = -1,
- TALKBACK_SPEECH_TARGET_FIRST = 0,
- TALKBACK_SPEECH_TARGET_EAT = TALKBACK_SPEECH_TARGET_FIRST, // 0
- TALKBACK_SPEECH_TARGET_EARTH, // 1
- TALKBACK_SPEECH_TARGET_IF, // 2
- TALKBACK_SPEECH_TARGET_OX, // 3
- TALKBACK_SPEECH_TARGET_OAT, // 4
- TALKBACK_SPEECH_TARGET_WET, // 5
- TALKBACK_SPEECH_TARGET_SIZE, // 6
- TALKBACK_SPEECH_TARGET_CHURCH, // 7
- TALKBACK_SPEECH_TARGET_FAVE, // 8
- TALKBACK_SPEECH_TARGET_THOUGH, // 9
- TALKBACK_SPEECH_TARGET_TOLD, // 10
- TALKBACK_SPEECH_TARGET_BUMP, // 11
- TALKBACK_SPEECH_TARGET_NEW, // 12
- TALKBACK_SPEECH_TARGET_ROAR, // 13
- TALKBACK_SPEECH_TARGET_CAGE, // 14
- TALKBACK_SPEECH_TARGET_LAST = TALKBACK_SPEECH_TARGET_CAGE, // 14
- TALKBACK_NUM_SPEECH_TARGETS // 15 (0..14)
-};
-
-// Speech gesture track identifiers.
-
-enum
-{
- TALKBACK_GESTURE_INVALID = -1,
- TALKBACK_GESTURE_FIRST = 0,
- TALKBACK_GESTURE_EYEBROW_RAISE_LEFT = TALKBACK_GESTURE_FIRST, // 0
- TALKBACK_GESTURE_EYEBROW_RAISE_RIGHT, // 1
- TALKBACK_GESTURE_BLINK_LEFT, // 2
- TALKBACK_GESTURE_BLINK_RIGHT, // 3
- TALKBACK_GESTURE_HEAD_BEND, // 4
- TALKBACK_GESTURE_HEAD_SIDE_SIDE, // 5
- TALKBACK_GESTURE_HEAD_TWIST, // 6
- TALKBACK_GESTURE_EYE_SIDE_SIDE_LEFT, // 7
- TALKBACK_GESTURE_EYE_SIDE_SIDE_RIGHT, // 8
- TALKBACK_GESTURE_EYE_UP_DOWN_LEFT, // 9
- TALKBACK_GESTURE_EYE_UP_DOWN_RIGHT, // 10
- TALKBACK_GESTURE_LAST = TALKBACK_GESTURE_EYE_UP_DOWN_RIGHT, // 10
- TALKBACK_NUM_GESTURES // 11 (0..10)
-};
-
-// Phoneme identifiers.
-
-enum
-{
- TALKBACK_PHONEME_INVALID = -1,
- TALKBACK_PHONEME_FIRST = 0,
- TALKBACK_PHONEME_IY = TALKBACK_PHONEME_FIRST, // 0
- TALKBACK_PHONEME_IH, // 1
- TALKBACK_PHONEME_EH, // 2
- TALKBACK_PHONEME_EY, // 3
- TALKBACK_PHONEME_AE, // 4
- TALKBACK_PHONEME_AA, // 5
- TALKBACK_PHONEME_AW, // 6
- TALKBACK_PHONEME_AY, // 7
- TALKBACK_PHONEME_AH, // 8
- TALKBACK_PHONEME_AO, // 9
- TALKBACK_PHONEME_OY, // 10
- TALKBACK_PHONEME_OW, // 11
- TALKBACK_PHONEME_UH, // 12
- TALKBACK_PHONEME_UW, // 13
- TALKBACK_PHONEME_ER, // 14
- TALKBACK_PHONEME_AX, // 15
- TALKBACK_PHONEME_S, // 16
- TALKBACK_PHONEME_SH, // 17
- TALKBACK_PHONEME_Z, // 18
- TALKBACK_PHONEME_ZH, // 19
- TALKBACK_PHONEME_F, // 20
- TALKBACK_PHONEME_TH, // 21
- TALKBACK_PHONEME_V, // 22
- TALKBACK_PHONEME_DH, // 23
- TALKBACK_PHONEME_M, // 24
- TALKBACK_PHONEME_N, // 25
- TALKBACK_PHONEME_NG, // 26
- TALKBACK_PHONEME_L, // 27
- TALKBACK_PHONEME_R, // 28
- TALKBACK_PHONEME_W, // 29
- TALKBACK_PHONEME_Y, // 30
- TALKBACK_PHONEME_HH, // 31
- TALKBACK_PHONEME_B, // 32
- TALKBACK_PHONEME_D, // 33
- TALKBACK_PHONEME_JH, // 34
- TALKBACK_PHONEME_G, // 35
- TALKBACK_PHONEME_P, // 36
- TALKBACK_PHONEME_T, // 37
- TALKBACK_PHONEME_K, // 38
- TALKBACK_PHONEME_CH, // 39
- TALKBACK_PHONEME_SIL, // 40
- TALKBACK_PHONEME_LAST = TALKBACK_PHONEME_SIL, // 40
- TALKBACK_NUM_PHONEMES // 41 (0..40)
-};
-
-// -----------------------------------------------------------------------------
-// Function declarations.
-
-// ---------------------------
-// Startup/shutdown functions.
-// ---------------------------
-
-// Must be the first function called when using TalkBack.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackStartupLibrary(
- char const *iCoreDataDir); // IN: full path of folder containing TalkBack data files.
-
-// Should be the last function called when using TalkBack.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackShutdownLibrary(); // IN: nothing.
-
-// ------------------
-// Version functions.
-// ------------------
-
-// Gets the TalkBack version number.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackGetVersion(
- long *oMajor, // OUT: major version number.
- long *oMinor, // OUT: minor version number.
- long *oRevision); // OUT: revision version number.
-
-// Gets the TalkBack version number as a string.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackGetVersionString(
- long iMaxChars, // IN: size of version string buffer.
- char *oVersion); // OUT: version string buffer.
-
-// ------------------
-// Utility functions.
-// ------------------
-
-// Checks whether a sound file can be analyzed and returns some quality metrics.
-//
-// NOTE: this function is deprecated and has been supplanted by
-// TalkBackGetSoundFileMetrics().
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackCheckSoundFile(
- char const *iSoundFileName, // IN: name of sound file to be checked.
- long *oCanBeAnalyzed, // OUT: 1 if sound can be analyzed, 0 if not.
- long *oIsClipped, // OUT: 1 if sound is clipped, 0 if not.
- double *oDecibelRange); // OUT: used decibel range of sound.
-
-// Returns metrics for the specified sound file.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackGetSoundFileMetrics(
- char const *iSoundFileName, // IN: name of sound file to be checked.
- TALKBACK_SOUND_FILE_METRICS *ioMetrics); // IN/OUT: address of a structure where the metrics will be stored.
-
-// Checks whether text can be used for text-based analysis, returning the text
-// as it will be analyzed.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackCheckSpokenText(
- char const *iSpokenText, // IN: text to check.
- long iMaxChars, // IN: size of analyzed text buffer.
- char *oAnalyzedText); // OUT: buffer for text as it will be analyzed.
-
-// Convert a TalkBack error code to a description string.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackGetErrorString(
- TALKBACK_ERR iErrorCode, // IN: TalkBack error code to convert.
- long iMaxChars, // IN: size of the buffer.
- char *oErrorString); // OUT: buffer for the description string.
-
-// Gets the error code and text for the most recent TalkBack error.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackGetLastError(
- long iMaxChars, // IN: size of the buffer.
- char *oErrorString, // OUT: buffer for the description string.
- TALKBACK_ERR *oErrorCode); // OUT: most recent TalkBack error code.
-
-// -------------------
-// Analysis functions.
-// -------------------
-
-// Gets an opaque TALKBACK_ANALYSIS object. This object is then queried with the
-// TalkBackGet* functions below.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackGetAnalysis(
- TALKBACK_ANALYSIS **ioAnalysis, // IN/OUT: address of a TALKBACK_ANALYSIS *variable where analysis will be stored.
- char const *iSoundFileName, // IN: name of the sound file to analyze.
- char const *iSoundText, // IN: text spoken in sound file (can be NULL to use textless analysis).
- TALKBACK_ANALYSIS_SETTINGS *iSettings); // IN: pointer to a TALKBACK_ANALYSIS_SETTINGS structure (can be NULL for defaults).
-
-// Frees an opaque TALKBACK_ANALYSIS object. This releases all memory used by
-// the analysis.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackFreeAnalysis(
- TALKBACK_ANALYSIS **ioAnalysis); // IN/OUT: analysis to free.
-
-// #######################################################################
-// NOTE: all functions from this point on require a valid analysis object.
-// #######################################################################
-
-// ------------------------
-// Speech target functions.
-// ------------------------
-
-// Gets the number of speech target tracks.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackGetNumSpeechTargetTracks(
- TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis().
- long *oResult); // OUT: number of speech target tracks.
-
-// Gets the number of keys in the specified speech target track.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackGetNumSpeechTargetKeys(
- TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis().
- long iTrackNum, // IN: speech target track.
- long *oResult); // OUT: number of keys in the speech target track.
-
-// Gets key information (time, value, derivative in, and derivative out) for the
-// specified key in the specified speech target track.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackGetSpeechTargetKeyInfo(
- TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis().
- long iTrackNum, // IN: speech target track.
- long iKeyNum, // IN: speech target key.
- double *oTime, // OUT: time of key.
- double *oValue, // OUT: value of key.
- double *oDerivativeIn, // OUT: incoming derivative of key.
- double *oDerivativeOut); // OUT: outgoing derivative of key.
-
-// Gets the value of the function curve for the specified speech target track at
-// the specified time.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackGetSpeechTargetValueAtTime(
- TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis().
- long iTrackNum, // IN: speech target track.
- double iTime, // IN: time in seconds.
- double *oResult); // OUT: value of the function curve.
-
-// Gets the derivatives of the function curve for the specified speech target
-// track at the specified time.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackGetSpeechTargetDerivativesAtTime(
- TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis().
- long iTrackNum, // IN: speech target track.
- double iTime, // IN: time in seconds.
- double *oDerivativeIn, // OUT: value of the incoming derivative of the function curve.
- double *oDerivativeOut); // OUT: value of the outgoing derivative of the function curve.
-
-// -------------------------
-// Speech gesture functions.
-// -------------------------
-
-// Gets the number of speech gesture tracks.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackGetNumGestureTracks(
- TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis().
- long *oResult); // OUT: number of speech gesture tracks
-
-// Gets the number of keys in the specified speech gesture track.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackGetNumGestureKeys(
- TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis().
- long iTrackNum, // IN: speech gesture track.
- long *oResult); // OUT: number of keys in the speech gesture track.
-
-// Gets key information (time, value, derivative in, and derivative out) for the
-// specified key in the specified speech gesture track.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackGetGestureKeyInfo(
- TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis().
- long iTrackNum, // IN: speech gesture track.
- long iKeyNum, // IN: speech gesture key.
- double *oTime, // OUT: time of key.
- double *oValue, // OUT: value of key.
- double *oDerivativeIn, // OUT: incoming derivative of key.
- double *oDerivativeOut); // OUT: outgoing derivative of key.
-
-// Gets the value of the function curve for the specified speech gesture track
-// at the specified time.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackGetGestureValueAtTime(
- TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis().
- long iTrackNum, // IN: speech gesture track.
- double iTime, // IN: time in seconds.
- double *oResult); // OUT: value of the function curve.
-
-// Gets the derivatives of the function curve for the specified speech gesture
-// track at the specified time.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackGetGestureDerivativesAtTime(
- TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis().
- long iTrackNum, // IN: speech gesture track.
- double iTime, // IN: time in seconds.
- double *oDerivativeIn, // OUT: value of the incoming derivative of the function curve.
- double *oDerivativeOut); // OUT: value of the outgoing derivative of the function curve.
-
-// ----------------
-// Frame functions.
-// ----------------
-
-// NOTE: these functions use the frame rate specified in the
-// TALKBACK_ANALYSIS_SETTINGS structure passed to TalkBackGetAnalysis() and
-// default to 30 fps (TALKBACK_DEFAULT_FRAME_RATE) if the structure pointer was
-// NULL.
-
-// Gets the first frame number.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackGetFirstFrameNum(
- TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis().
- long *oResult); // OUT: number of the first frame.
-
-// Gets the last frame number.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackGetLastFrameNum(
- TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis().
- long *oResult); // OUT: number of the last frame.
-
-// Gets the start time of the specified frame.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackGetFrameStartTime(
- TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis().
- long iFrameNum, // IN: frame.
- double *oResult); // OUT: start time of the frame in seconds.
-
-// Gets the end time of the specified frame.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackGetFrameEndTime(
- TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis().
- long iFrameNum, // IN: frame.
- double *oResult); // OUT: end time of the frame in seconds.
-
-// Gets the value of the function curve for a speech target integrated over the
-// specified frame.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackGetSpeechTargetValueAtFrame(
- TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis().
- long iTrackNum, // IN: speech target track.
- long iFrameNum, // IN: frame number.
- double *oResult); // OUT: value of the function curve integrated over the frame.
-
-// Gets the dominant speech target at the specified frame.
-//
-// NOTE: this function is meant to be used in flipbook mode only.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackGetDominantSpeechTargetAtFrame(
- TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis().
- long iFrameNum, // IN: frame number.
- TALKBACK_SPEECH_TARGET *oSpeechTarget); // OUT: dominant speech target.
-
-// Gets the value of the function curve for a speech gesture integrated over the
-// specified frame.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackGetGestureValueAtFrame(
- TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis().
- long iTrackNum, // IN: speech gesture track.
- long iFrameNum, // IN: frame number.
- double *oResult); // OUT: value of the function curve integrated over the frame.
-
-// ------------------
-// Phoneme functions.
-// ------------------
-
-// Gets the number of phonemes.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackGetNumPhonemes(
- TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis().
- long *oResult); // OUT: number of phonemes.
-
-// Gets the enumeration of the specified phoneme.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackGetPhonemeEnum(
- TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis().
- long iPhonemeNum, // IN: phoneme.
- TALKBACK_PHONEME *oResult); // OUT: enumeration of the specified phoneme.
-
-// Gets the start time of the specified phoneme.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackGetPhonemeStartTime(
- TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis().
- long iPhonemeNum, // IN: phoneme.
- double *oResult); // OUT: start time of the phoneme in seconds.
-
-// Gets the end time of the specified phoneme.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackGetPhonemeEndTime(
- TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis().
- long iPhonemeNum, // IN: phoneme.
- double *oResult); // OUT: end time of the phoneme in seconds.
-
-// ---------------
-// Word functions.
-// ---------------
-
-// NOTE: these functions only yield data for text-based analysis.
-
-// Gets the number of words.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackGetNumWords(
- TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis().
- long *oResult); // OUT: number of words.
-
-// Gets the text of the specified word.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackGetWord(
- TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis().
- long iWordNum, // IN: word.
- long iMaxChars, // IN: size of word buffer.
- char *oWord); // OUT: word buffer.
-
-// Gets the start time of the specified word.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackGetWordStartTime(
- TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis().
- long iWordNum, // IN: word.
- double *oResult); // OUT: start time of the word in seconds.
-
-// Gets the end time of the specified word.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackGetWordEndTime(
- TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis().
- long iWordNum, // IN: word.
- double *oResult); // OUT: end time of the word in seconds.
-
-// --------------------------
-// Phoneme editing functions.
-// --------------------------
-
-// Use these functions to modify the phoneme list after you get an opaque
-// analysis object from TalkBackGetAnalysis(). After modifying the phoneme list
-// in the opaque analysis object, subsequent TalkBackGet* calls on that opaque
-// analysis object for speech target (lip-synching) data will return values
-// based on the modified phoneme list. However, speech gesture data is not
-// affected by phoneme editing.
-//
-// NOTE: phoneme editing is only provided in order to support Ventriloquist-like
-// applications where tweaking of the phoneme segmenation (and subsequent
-// recalculation of the animation data) is required. Most customers probably
-// won't need this functionality.
-
-// Inserts a phoneme at the specified position in the specified manner.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackInsertPhoneme(
- TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis().
- TALKBACK_PHONEME iPhoneme, // IN: enumeration of phoneme to insert.
- long iInsertPosition, // IN: position (phoneme number) at which to insert.
- int iInsertBefore); // IN: manner of insertion:
- // 0 means put phoneme after insert position;
- // 1 means put phoneme before insert position.
-
-// Deletes the specified phoneme.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackDeletePhoneme(
- TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis().
- long iPhonemeToDelete); // IN: phoneme to delete.
-
-// Changes the start time of the specified phoneme.
-//
-// NOTE: the start time specified may not be the actual start time for a number
-// of reasons, most notably if the specified start time will make the phoneme
-// too short. This function returns the actual start time so the caller can
-// check the result without having to query the phoneme.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackChangePhonemeStart(
- TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis().
- long iPhonemeToChange, // IN: phoneme to change.
- double *ioNewTime); // IN/OUT: new start time value in seconds (in); actual start time (out).
-
-// Changes the end time of the specified phoneme.
-//
-// NOTE: the end time specified may not be the actual end time for a number of
-// reasons, most notably if the specified end time will make the phoneme too
-// short. This function returns the actual end time so the caller can check the
-// result without having to query the phoneme.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackChangePhonemeEnd(
- TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis().
- long iPhonemeToChange, // IN: phoneme to change.
- double *ioNewTime); // IN/OUT: new end time value in seconds (in); actual end time (out).
-
-// Changes the enumeration of the specified phoneme.
-TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not.
-TalkBackChangePhonemeEnum(
- TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis().
- long iPhonemeToChange, // IN: phoneme to change.
- TALKBACK_PHONEME iNewPhoneme); // IN: new phoneme enumeration.
-
-#if defined(__cplusplus)
-}
-#endif
-
-#endif
+//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +// $NoKeywords: $ +// +//=============================================================================// +// ============================================================================= +// Interface to the LIPSinc TalkBack 1.1 library (TalkBack_*.lib). +// +// Copyright � 1998-2002 LIPSinc. All rights reserved. + +#if !defined(TalkBack_h) +#define TalkBack_h + +#include <stddef.h> // size_t. + +// Enforce a C API. +#if defined(__cplusplus) +extern "C" +{ +#endif + +// ----------------------------------------------------------------------------- +// Use the preprocessor to make the new API compatible with the old one. + +#define TalkbackStartupLibrary TalkBackStartupLibrary +#define TalkbackShutdownLibrary TalkBackShutdownLibrary +#define TalkbackGetVersion TalkBackGetVersion +#define TalkbackGetVersionString TalkBackGetVersionString +#define TalkbackCheckSoundFile TalkBackCheckSoundFile +#define TalkbackCheckSpokenText TalkBackCheckSpokenText +#define TalkbackGetErrorString TalkBackGetErrorString +#define TalkbackGetAnalysis TalkBackGetAnalysis +#define TalkbackFreeAnalysis TalkBackFreeAnalysis +#define TalkbackGetFirstFrameNum TalkBackGetFirstFrameNum +#define TalkbackGetLastFrameNum TalkBackGetLastFrameNum +#define TalkbackGetFrameStartTime TalkBackGetFrameStartTime +#define TalkbackGetFrameEndTime TalkBackGetFrameEndTime +#define TalkbackGetNumPhonemes TalkBackGetNumPhonemes +#define TalkbackGetPhonemeEnum TalkBackGetPhonemeEnum +#define TalkbackGetPhonemeStartTime TalkBackGetPhonemeStartTime +#define TalkbackGetPhonemeEndTime TalkBackGetPhonemeEndTime +#define TalkbackInsertPhoneme TalkBackInsertPhoneme +#define TalkbackDeletePhoneme TalkBackDeletePhoneme +#define TalkbackChangePhonemeStart TalkBackChangePhonemeStart +#define TalkbackChangePhonemeEnd TalkBackChangePhonemeEnd +#define TalkbackChangePhonemeEnum TalkBackChangePhonemeEnum +#define TalkbackGetNumWords TalkBackGetNumWords +#define TalkbackGetWord TalkBackGetWord +#define TalkbackGetWordStartTime TalkBackGetWordStartTime +#define TalkbackGetWordEndTime TalkBackGetWordEndTime +#define TalkbackGetNumSpeechTargetTracks TalkBackGetNumSpeechTargetTracks +#define TalkbackGetNumSpeechTargetKeys TalkBackGetNumSpeechTargetKeys +#define TalkbackGetSpeechTargetKeyInfo TalkBackGetSpeechTargetKeyInfo +#define TalkbackGetSpeechTargetValueAtFrame TalkBackGetSpeechTargetValueAtFrame +#define TalkbackGetDominantSpeechTargetAtFrame TalkBackGetDominantSpeechTargetAtFrame +#define TalkbackGetSpeechTargetValueAtTime TalkBackGetSpeechTargetValueAtTime +#define TalkbackGetSpeechTargetDerivativesAtTime TalkBackGetSpeechTargetDerivativesAtTime +#define TalkbackGetNumGestureTracks TalkBackGetNumGestureTracks +#define TalkbackGetNumGestureKeys TalkBackGetNumGestureKeys +#define TalkbackGetGestureKeyInfo TalkBackGetGestureKeyInfo +#define TalkbackGetGestureValueAtFrame TalkBackGetGestureValueAtFrame +#define TalkbackGetGestureValueAtTime TalkBackGetGestureValueAtTime +#define TalkbackGetGestureDerivativesAtTime TalkBackGetGestureDerivativesAtTime + +// ----------------------------------------------------------------------------- +// For debug builds, set this to a non-zero value to get verbose debugging +// output from TalkBack. + +extern int g_enableTalkBackDebuggingOutput; + +// ----------------------------------------------------------------------------- +// Miscellaneous constants. + +// For calling TalkBackGetAnalysis() with all defaults. +#define TALKBACK_DEFAULT_SETTINGS NULL + +// For setting the iSoundText parameter in TalkBackGetAnalysis() to "no text." +#define TALKBACK_NO_TEXT NULL + +// Handy constants for TALKBACK_ANALYSIS_SETTINGS fields: + + // For setting fSize. +#define TALKBACK_SETTINGS_SIZE sizeof(TALKBACK_ANALYSIS_SETTINGS) + // For setting fFrameRate to the + // default. +#define TALKBACK_DEFAULT_FRAME_RATE 30 + // For setting fOptimizeForFlipbook + // to *not* optimize for flipbook. +#define TALKBACK_OPTIMIZE_FOR_FLIPBOOK_OFF 0 + // For setting fOptimizeForFlipbook + // to optimize for flipbook. +#define TALKBACK_OPTIMIZE_FOR_FLIPBOOK_ON 1 + // For setting fRandomSeed to use the + // current time to seed the random + // number generator and thereby get + // non-deterministic speech gestures. +#define TALKBACK_RANDOM_SEED -1 + // For setting fConfigFile to "no + // config file." +#define TALKBACK_NO_CONFIG_FILE NULL + +// ----------------------------------------------------------------------------- +// Data types. + +// TALKBACK_NOERR if successful, TalkBack error code if not. +typedef long TALKBACK_ERR; + +// Opaque analysis results. +typedef void TALKBACK_ANALYSIS; + +// Speech target. +typedef long TALKBACK_SPEECH_TARGET; + +// Speech gesture. +typedef long TALKBACK_GESTURE; + +// Phoneme. +typedef long TALKBACK_PHONEME; + +// ----------------------------------------------------------------------------- +// Data structures. + +#pragma pack(push, 1) + +// Optional analysis settings passed to TalkBackGetAnalysis(). +typedef struct +{ + // Set this field to sizeof(TALKBACK_ANALYSIS_SETTINGS) before using the + // structure. + long fSize; + // Frame rate for analysis. This only matters if you will be using *AtFrame + // functions. + // + // Default value: 30 (frames per second). + long fFrameRate; + // Set this to 1 to optimize for flipbook output, 0 to do analysis normally. + // + // Default value: 0 (normal analysis). + long fOptimizeForFlipbook; + // Set this to -1 to seed the random number generator with the current time. + // Any other number will be used directly for the random number seed, which + // is useful if you want repeatable speech gestures. This value does not + // influence lip-synching at all. + // + // Default value: -1 (use current time). + long fRandomSeed; + // Path to the configuration (.INI) file with phoneme-to-speech-target + // mapping. Set this to NULL to use the default mapping. + // + // Default value: NULL (use default mapping). + char const *fConfigFile; +} TALKBACK_ANALYSIS_SETTINGS; + +typedef struct +{ + // Set this field to sizeof(TALKBACK_SOUND_FILE_METRICS) before using the + // structure. This will allow the structure to evolve if necessary. + size_t m_size; + // Bits per sample. + long m_bitsPerSample; + // Sample rate in Hertz. + long m_sampleRate; + // Duration of the audio in seconds. + double m_duration; + // 1 if the sound file can be analyzed, 0 if not. + long m_canBeAnalyzed; + // 1 if the sound file is clipped, 0 if not. + long m_isClipped; + // The decibel range of the sound file. + double m_decibelRange; + // A quality value for the sound file: the nominal range is 0 to 100. Try + // to keep it above 45 for good results. + int m_quality; + + // Added for version 2 of the metrics structure: + // --------------------------------------------- + // The number of channels in the sound file: 1 for mono, 2 for stereo, etc. + long m_channelCount; +} TALKBACK_SOUND_FILE_METRICS; + +#pragma pack(pop) + +// ----------------------------------------------------------------------------- +// Constants. + +// TalkBack error codes. Use TalkBackGetErrorString() to return text +// descriptions for these codes. +enum +{ + // Windows convention: set this bit to indicate an application-defined error + // code. + BIT29 = (1 << 29), + // Success (not an error). + TALKBACK_NOERR = 0, + // The first error code: useful for iterating through the error codes. + TALKBACK_ERROR_FIRST = 4201 | BIT29, + // Generic error. + TALKBACK_ERROR = TALKBACK_ERROR_FIRST, + // TalkBackStartupLibrary() failed [internal error] or was never called. + TALKBACK_STARTUP_FAILED_ERR, + // TalkBackShutdownLibrary() failed, either because + // TalkBackStartupLibrary() was never called or because + // TalkBackShutdownLibrary() has already been called. + TALKBACK_SHUTDOWN_FAILED_ERR, + // The TalkBack data files could not be found [invalid path or missing + // files]. + TALKBACK_CORE_DATA_NOT_FOUND_ERR, + // One or more of the parameters are NULL. + TALKBACK_NULL_PARAMETER_ERR, + // One or more of the parameters is invalid. + TALKBACK_INVALID_PARAMETER_ERR, + // The analysis object pointer is invalid. + TALKBACK_INVALID_ANALYSIS_ERR, + // Analysis failed [the sound file cannot be analyzed or an internal error + // occurred]. + TALKBACK_ANALYSIS_FAILED_ERR, + // One or more of the indices (track, key, frame, word, phoneme) are + // invalid (out of range). + TALKBACK_INVALID_INDEX_ERR, + // The time parameter is invalid (out of range). + TALKBACK_INVALID_TIME_ERR, + // A serious internal error occurred in TalkBack; please alert LIPSinc by + // sending mail with a description of how the error was triggered to + // [email protected]. + TALKBACK_INTERNAL_ERR, + // Could not open the specified sound file. + TALKBACK_COULD_NOT_LOAD_SOUND_ERR, + // TalkBackStartupLibrary() has not been called. + TALKBACK_STARTUP_NOT_CALLED, + // The configuration file specified in the TALKBACK_ANALYSIS_SETTINGS + // structure is invalid. + TALKBACK_CONFIG_PARSE_ERROR, + // The last error code: useful for iterating through the error codes. + TALKBACK_ERROR_LAST = TALKBACK_CONFIG_PARSE_ERROR +}; + +// Default lip-synching track identifiers. +// +// NOTE: these track identifiers apply *only* to the default phoneme-to-track +// mapping! Consult the TalkBack Reference Guide for more details. +// +// NOTE: these values are valid *only* if you use the default mapping and are +// provided as a convenience. If you use your own mapping, these values +// are invalid and should not be used. + +enum +{ + TALKBACK_SPEECH_TARGET_INVALID = -1, + TALKBACK_SPEECH_TARGET_FIRST = 0, + TALKBACK_SPEECH_TARGET_EAT = TALKBACK_SPEECH_TARGET_FIRST, // 0 + TALKBACK_SPEECH_TARGET_EARTH, // 1 + TALKBACK_SPEECH_TARGET_IF, // 2 + TALKBACK_SPEECH_TARGET_OX, // 3 + TALKBACK_SPEECH_TARGET_OAT, // 4 + TALKBACK_SPEECH_TARGET_WET, // 5 + TALKBACK_SPEECH_TARGET_SIZE, // 6 + TALKBACK_SPEECH_TARGET_CHURCH, // 7 + TALKBACK_SPEECH_TARGET_FAVE, // 8 + TALKBACK_SPEECH_TARGET_THOUGH, // 9 + TALKBACK_SPEECH_TARGET_TOLD, // 10 + TALKBACK_SPEECH_TARGET_BUMP, // 11 + TALKBACK_SPEECH_TARGET_NEW, // 12 + TALKBACK_SPEECH_TARGET_ROAR, // 13 + TALKBACK_SPEECH_TARGET_CAGE, // 14 + TALKBACK_SPEECH_TARGET_LAST = TALKBACK_SPEECH_TARGET_CAGE, // 14 + TALKBACK_NUM_SPEECH_TARGETS // 15 (0..14) +}; + +// Speech gesture track identifiers. + +enum +{ + TALKBACK_GESTURE_INVALID = -1, + TALKBACK_GESTURE_FIRST = 0, + TALKBACK_GESTURE_EYEBROW_RAISE_LEFT = TALKBACK_GESTURE_FIRST, // 0 + TALKBACK_GESTURE_EYEBROW_RAISE_RIGHT, // 1 + TALKBACK_GESTURE_BLINK_LEFT, // 2 + TALKBACK_GESTURE_BLINK_RIGHT, // 3 + TALKBACK_GESTURE_HEAD_BEND, // 4 + TALKBACK_GESTURE_HEAD_SIDE_SIDE, // 5 + TALKBACK_GESTURE_HEAD_TWIST, // 6 + TALKBACK_GESTURE_EYE_SIDE_SIDE_LEFT, // 7 + TALKBACK_GESTURE_EYE_SIDE_SIDE_RIGHT, // 8 + TALKBACK_GESTURE_EYE_UP_DOWN_LEFT, // 9 + TALKBACK_GESTURE_EYE_UP_DOWN_RIGHT, // 10 + TALKBACK_GESTURE_LAST = TALKBACK_GESTURE_EYE_UP_DOWN_RIGHT, // 10 + TALKBACK_NUM_GESTURES // 11 (0..10) +}; + +// Phoneme identifiers. + +enum +{ + TALKBACK_PHONEME_INVALID = -1, + TALKBACK_PHONEME_FIRST = 0, + TALKBACK_PHONEME_IY = TALKBACK_PHONEME_FIRST, // 0 + TALKBACK_PHONEME_IH, // 1 + TALKBACK_PHONEME_EH, // 2 + TALKBACK_PHONEME_EY, // 3 + TALKBACK_PHONEME_AE, // 4 + TALKBACK_PHONEME_AA, // 5 + TALKBACK_PHONEME_AW, // 6 + TALKBACK_PHONEME_AY, // 7 + TALKBACK_PHONEME_AH, // 8 + TALKBACK_PHONEME_AO, // 9 + TALKBACK_PHONEME_OY, // 10 + TALKBACK_PHONEME_OW, // 11 + TALKBACK_PHONEME_UH, // 12 + TALKBACK_PHONEME_UW, // 13 + TALKBACK_PHONEME_ER, // 14 + TALKBACK_PHONEME_AX, // 15 + TALKBACK_PHONEME_S, // 16 + TALKBACK_PHONEME_SH, // 17 + TALKBACK_PHONEME_Z, // 18 + TALKBACK_PHONEME_ZH, // 19 + TALKBACK_PHONEME_F, // 20 + TALKBACK_PHONEME_TH, // 21 + TALKBACK_PHONEME_V, // 22 + TALKBACK_PHONEME_DH, // 23 + TALKBACK_PHONEME_M, // 24 + TALKBACK_PHONEME_N, // 25 + TALKBACK_PHONEME_NG, // 26 + TALKBACK_PHONEME_L, // 27 + TALKBACK_PHONEME_R, // 28 + TALKBACK_PHONEME_W, // 29 + TALKBACK_PHONEME_Y, // 30 + TALKBACK_PHONEME_HH, // 31 + TALKBACK_PHONEME_B, // 32 + TALKBACK_PHONEME_D, // 33 + TALKBACK_PHONEME_JH, // 34 + TALKBACK_PHONEME_G, // 35 + TALKBACK_PHONEME_P, // 36 + TALKBACK_PHONEME_T, // 37 + TALKBACK_PHONEME_K, // 38 + TALKBACK_PHONEME_CH, // 39 + TALKBACK_PHONEME_SIL, // 40 + TALKBACK_PHONEME_LAST = TALKBACK_PHONEME_SIL, // 40 + TALKBACK_NUM_PHONEMES // 41 (0..40) +}; + +// ----------------------------------------------------------------------------- +// Function declarations. + +// --------------------------- +// Startup/shutdown functions. +// --------------------------- + +// Must be the first function called when using TalkBack. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackStartupLibrary( + char const *iCoreDataDir); // IN: full path of folder containing TalkBack data files. + +// Should be the last function called when using TalkBack. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackShutdownLibrary(); // IN: nothing. + +// ------------------ +// Version functions. +// ------------------ + +// Gets the TalkBack version number. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackGetVersion( + long *oMajor, // OUT: major version number. + long *oMinor, // OUT: minor version number. + long *oRevision); // OUT: revision version number. + +// Gets the TalkBack version number as a string. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackGetVersionString( + long iMaxChars, // IN: size of version string buffer. + char *oVersion); // OUT: version string buffer. + +// ------------------ +// Utility functions. +// ------------------ + +// Checks whether a sound file can be analyzed and returns some quality metrics. +// +// NOTE: this function is deprecated and has been supplanted by +// TalkBackGetSoundFileMetrics(). +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackCheckSoundFile( + char const *iSoundFileName, // IN: name of sound file to be checked. + long *oCanBeAnalyzed, // OUT: 1 if sound can be analyzed, 0 if not. + long *oIsClipped, // OUT: 1 if sound is clipped, 0 if not. + double *oDecibelRange); // OUT: used decibel range of sound. + +// Returns metrics for the specified sound file. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackGetSoundFileMetrics( + char const *iSoundFileName, // IN: name of sound file to be checked. + TALKBACK_SOUND_FILE_METRICS *ioMetrics); // IN/OUT: address of a structure where the metrics will be stored. + +// Checks whether text can be used for text-based analysis, returning the text +// as it will be analyzed. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackCheckSpokenText( + char const *iSpokenText, // IN: text to check. + long iMaxChars, // IN: size of analyzed text buffer. + char *oAnalyzedText); // OUT: buffer for text as it will be analyzed. + +// Convert a TalkBack error code to a description string. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackGetErrorString( + TALKBACK_ERR iErrorCode, // IN: TalkBack error code to convert. + long iMaxChars, // IN: size of the buffer. + char *oErrorString); // OUT: buffer for the description string. + +// Gets the error code and text for the most recent TalkBack error. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackGetLastError( + long iMaxChars, // IN: size of the buffer. + char *oErrorString, // OUT: buffer for the description string. + TALKBACK_ERR *oErrorCode); // OUT: most recent TalkBack error code. + +// ------------------- +// Analysis functions. +// ------------------- + +// Gets an opaque TALKBACK_ANALYSIS object. This object is then queried with the +// TalkBackGet* functions below. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackGetAnalysis( + TALKBACK_ANALYSIS **ioAnalysis, // IN/OUT: address of a TALKBACK_ANALYSIS *variable where analysis will be stored. + char const *iSoundFileName, // IN: name of the sound file to analyze. + char const *iSoundText, // IN: text spoken in sound file (can be NULL to use textless analysis). + TALKBACK_ANALYSIS_SETTINGS *iSettings); // IN: pointer to a TALKBACK_ANALYSIS_SETTINGS structure (can be NULL for defaults). + +// Frees an opaque TALKBACK_ANALYSIS object. This releases all memory used by +// the analysis. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackFreeAnalysis( + TALKBACK_ANALYSIS **ioAnalysis); // IN/OUT: analysis to free. + +// ####################################################################### +// NOTE: all functions from this point on require a valid analysis object. +// ####################################################################### + +// ------------------------ +// Speech target functions. +// ------------------------ + +// Gets the number of speech target tracks. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackGetNumSpeechTargetTracks( + TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis(). + long *oResult); // OUT: number of speech target tracks. + +// Gets the number of keys in the specified speech target track. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackGetNumSpeechTargetKeys( + TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis(). + long iTrackNum, // IN: speech target track. + long *oResult); // OUT: number of keys in the speech target track. + +// Gets key information (time, value, derivative in, and derivative out) for the +// specified key in the specified speech target track. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackGetSpeechTargetKeyInfo( + TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis(). + long iTrackNum, // IN: speech target track. + long iKeyNum, // IN: speech target key. + double *oTime, // OUT: time of key. + double *oValue, // OUT: value of key. + double *oDerivativeIn, // OUT: incoming derivative of key. + double *oDerivativeOut); // OUT: outgoing derivative of key. + +// Gets the value of the function curve for the specified speech target track at +// the specified time. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackGetSpeechTargetValueAtTime( + TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis(). + long iTrackNum, // IN: speech target track. + double iTime, // IN: time in seconds. + double *oResult); // OUT: value of the function curve. + +// Gets the derivatives of the function curve for the specified speech target +// track at the specified time. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackGetSpeechTargetDerivativesAtTime( + TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis(). + long iTrackNum, // IN: speech target track. + double iTime, // IN: time in seconds. + double *oDerivativeIn, // OUT: value of the incoming derivative of the function curve. + double *oDerivativeOut); // OUT: value of the outgoing derivative of the function curve. + +// ------------------------- +// Speech gesture functions. +// ------------------------- + +// Gets the number of speech gesture tracks. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackGetNumGestureTracks( + TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis(). + long *oResult); // OUT: number of speech gesture tracks + +// Gets the number of keys in the specified speech gesture track. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackGetNumGestureKeys( + TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis(). + long iTrackNum, // IN: speech gesture track. + long *oResult); // OUT: number of keys in the speech gesture track. + +// Gets key information (time, value, derivative in, and derivative out) for the +// specified key in the specified speech gesture track. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackGetGestureKeyInfo( + TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis(). + long iTrackNum, // IN: speech gesture track. + long iKeyNum, // IN: speech gesture key. + double *oTime, // OUT: time of key. + double *oValue, // OUT: value of key. + double *oDerivativeIn, // OUT: incoming derivative of key. + double *oDerivativeOut); // OUT: outgoing derivative of key. + +// Gets the value of the function curve for the specified speech gesture track +// at the specified time. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackGetGestureValueAtTime( + TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis(). + long iTrackNum, // IN: speech gesture track. + double iTime, // IN: time in seconds. + double *oResult); // OUT: value of the function curve. + +// Gets the derivatives of the function curve for the specified speech gesture +// track at the specified time. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackGetGestureDerivativesAtTime( + TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis(). + long iTrackNum, // IN: speech gesture track. + double iTime, // IN: time in seconds. + double *oDerivativeIn, // OUT: value of the incoming derivative of the function curve. + double *oDerivativeOut); // OUT: value of the outgoing derivative of the function curve. + +// ---------------- +// Frame functions. +// ---------------- + +// NOTE: these functions use the frame rate specified in the +// TALKBACK_ANALYSIS_SETTINGS structure passed to TalkBackGetAnalysis() and +// default to 30 fps (TALKBACK_DEFAULT_FRAME_RATE) if the structure pointer was +// NULL. + +// Gets the first frame number. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackGetFirstFrameNum( + TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis(). + long *oResult); // OUT: number of the first frame. + +// Gets the last frame number. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackGetLastFrameNum( + TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis(). + long *oResult); // OUT: number of the last frame. + +// Gets the start time of the specified frame. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackGetFrameStartTime( + TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis(). + long iFrameNum, // IN: frame. + double *oResult); // OUT: start time of the frame in seconds. + +// Gets the end time of the specified frame. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackGetFrameEndTime( + TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis(). + long iFrameNum, // IN: frame. + double *oResult); // OUT: end time of the frame in seconds. + +// Gets the value of the function curve for a speech target integrated over the +// specified frame. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackGetSpeechTargetValueAtFrame( + TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis(). + long iTrackNum, // IN: speech target track. + long iFrameNum, // IN: frame number. + double *oResult); // OUT: value of the function curve integrated over the frame. + +// Gets the dominant speech target at the specified frame. +// +// NOTE: this function is meant to be used in flipbook mode only. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackGetDominantSpeechTargetAtFrame( + TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis(). + long iFrameNum, // IN: frame number. + TALKBACK_SPEECH_TARGET *oSpeechTarget); // OUT: dominant speech target. + +// Gets the value of the function curve for a speech gesture integrated over the +// specified frame. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackGetGestureValueAtFrame( + TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis(). + long iTrackNum, // IN: speech gesture track. + long iFrameNum, // IN: frame number. + double *oResult); // OUT: value of the function curve integrated over the frame. + +// ------------------ +// Phoneme functions. +// ------------------ + +// Gets the number of phonemes. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackGetNumPhonemes( + TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis(). + long *oResult); // OUT: number of phonemes. + +// Gets the enumeration of the specified phoneme. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackGetPhonemeEnum( + TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis(). + long iPhonemeNum, // IN: phoneme. + TALKBACK_PHONEME *oResult); // OUT: enumeration of the specified phoneme. + +// Gets the start time of the specified phoneme. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackGetPhonemeStartTime( + TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis(). + long iPhonemeNum, // IN: phoneme. + double *oResult); // OUT: start time of the phoneme in seconds. + +// Gets the end time of the specified phoneme. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackGetPhonemeEndTime( + TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis(). + long iPhonemeNum, // IN: phoneme. + double *oResult); // OUT: end time of the phoneme in seconds. + +// --------------- +// Word functions. +// --------------- + +// NOTE: these functions only yield data for text-based analysis. + +// Gets the number of words. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackGetNumWords( + TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis(). + long *oResult); // OUT: number of words. + +// Gets the text of the specified word. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackGetWord( + TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis(). + long iWordNum, // IN: word. + long iMaxChars, // IN: size of word buffer. + char *oWord); // OUT: word buffer. + +// Gets the start time of the specified word. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackGetWordStartTime( + TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis(). + long iWordNum, // IN: word. + double *oResult); // OUT: start time of the word in seconds. + +// Gets the end time of the specified word. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackGetWordEndTime( + TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis(). + long iWordNum, // IN: word. + double *oResult); // OUT: end time of the word in seconds. + +// -------------------------- +// Phoneme editing functions. +// -------------------------- + +// Use these functions to modify the phoneme list after you get an opaque +// analysis object from TalkBackGetAnalysis(). After modifying the phoneme list +// in the opaque analysis object, subsequent TalkBackGet* calls on that opaque +// analysis object for speech target (lip-synching) data will return values +// based on the modified phoneme list. However, speech gesture data is not +// affected by phoneme editing. +// +// NOTE: phoneme editing is only provided in order to support Ventriloquist-like +// applications where tweaking of the phoneme segmenation (and subsequent +// recalculation of the animation data) is required. Most customers probably +// won't need this functionality. + +// Inserts a phoneme at the specified position in the specified manner. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackInsertPhoneme( + TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis(). + TALKBACK_PHONEME iPhoneme, // IN: enumeration of phoneme to insert. + long iInsertPosition, // IN: position (phoneme number) at which to insert. + int iInsertBefore); // IN: manner of insertion: + // 0 means put phoneme after insert position; + // 1 means put phoneme before insert position. + +// Deletes the specified phoneme. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackDeletePhoneme( + TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis(). + long iPhonemeToDelete); // IN: phoneme to delete. + +// Changes the start time of the specified phoneme. +// +// NOTE: the start time specified may not be the actual start time for a number +// of reasons, most notably if the specified start time will make the phoneme +// too short. This function returns the actual start time so the caller can +// check the result without having to query the phoneme. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackChangePhonemeStart( + TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis(). + long iPhonemeToChange, // IN: phoneme to change. + double *ioNewTime); // IN/OUT: new start time value in seconds (in); actual start time (out). + +// Changes the end time of the specified phoneme. +// +// NOTE: the end time specified may not be the actual end time for a number of +// reasons, most notably if the specified end time will make the phoneme too +// short. This function returns the actual end time so the caller can check the +// result without having to query the phoneme. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackChangePhonemeEnd( + TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis(). + long iPhonemeToChange, // IN: phoneme to change. + double *ioNewTime); // IN/OUT: new end time value in seconds (in); actual end time (out). + +// Changes the enumeration of the specified phoneme. +TALKBACK_ERR // RETURNS: TALKBACK_NOERR if successful, TalkBack error code if not. +TalkBackChangePhonemeEnum( + TALKBACK_ANALYSIS *iAnalysis, // IN: opaque analysis object returned by TalkBackGetAnalysis(). + long iPhonemeToChange, // IN: phoneme to change. + TALKBACK_PHONEME iNewPhoneme); // IN: new phoneme enumeration. + +#if defined(__cplusplus) +} +#endif + +#endif |