First version of the SOurce SDK 2013

author: Joe Ludwig <[email protected]> 2013-06-26 15:22:04 -0700
committer: Joe Ludwig <[email protected]> 2013-06-26 15:22:04 -0700
commit: 39ed87570bdb2f86969d4be821c94b722dc71179 (patch)
tree: abc53757f75f40c80278e87650ea92808274aa59 /mp/src/utils/phonemeextractor/phonemeextractor_ims.cpp
download: source-sdk-2013-39ed87570bdb2f86969d4be821c94b722dc71179.tar.xz
source-sdk-2013-39ed87570bdb2f86969d4be821c94b722dc71179.zip
1 files changed, 1075 insertions, 0 deletions
diff --git a/mp/src/utils/phonemeextractor/phonemeextractor_ims.cpp b/mp/src/utils/phonemeextractor/phonemeextractor_ims.cpp
new file mode 100644
index 00000000..70819f8e
--- /dev/null
+++ b/mp/src/utils/phonemeextractor/phonemeextractor_ims.cpp
@@ -0,0 +1,1075 @@
+//========= Copyright Valve Corporation, All rights reserved. ============//
+//
+// Purpose: 
+//
+// $NoKeywords: $
+//=============================================================================//
+#include <stdio.h>
+#include <stdarg.h>
+#include <memory.h>
+#include <windows.h>
+#include <mmsystem.h>
+#include <mmreg.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "phonemeextractor/PhonemeExtractor.h"
+#include "ims_helper/ims_helper.h"
+
+#include "tier0/dbg.h"
+#include "sentence.h"
+#include "PhonemeConverter.h"
+#include "tier1/strtools.h"
+
+#define TEXTLESS_WORDNAME	"[Textless]"
+
+static IImsHelper *talkback = NULL;
+
+//-----------------------------------------------------------------------------
+// Purpose: Expose the interface
+//-----------------------------------------------------------------------------
+class CPhonemeExtractorLipSinc : public IPhonemeExtractor
+{
+public:
+	virtual PE_APITYPE	GetAPIType() const
+	{
+		return SPEECH_API_LIPSINC;
+	}
+
+	// Used for menus, etc
+	virtual char const *GetName() const
+	{
+		return "IMS (LipSinc)";
+	}
+
+	SR_RESULT Extract( 
+		const char *wavfile,
+		int numsamples,
+		void (*pfnPrint)( const char *fmt, ... ),
+		CSentence& inwords,
+		CSentence& outwords );
+
+
+	CPhonemeExtractorLipSinc( void );
+	~CPhonemeExtractorLipSinc( void );
+
+	enum
+	{
+		MAX_WORD_LENGTH = 128,
+	};
+private:
+
+
+	class CAnalyzedWord
+	{
+	public:
+		char		buffer[ MAX_WORD_LENGTH ];
+		double		starttime;
+		double		endtime;
+	};
+
+	class CAnalyzedPhoneme
+	{
+	public:
+		char		phoneme[ 32 ];
+		double		starttime;
+		double		endtime;
+	};
+
+	bool InitLipSinc( void );
+	void ShutdownLipSinc( void );
+
+	void DescribeError( TALKBACK_ERR err );
+	void Printf( char const *fmt, ... );
+
+	bool CheckSoundFile( char const *filename );
+	bool GetInitialized( void );
+	void SetInitialized( bool init );
+
+	void (*m_pfnPrint)( const char *fmt, ... );
+
+	char const *ConstructInputSentence( CSentence& inwords );
+	bool AttemptAnalysis( TALKBACK_ANALYSIS **ppAnalysis, char const *wavfile, CSentence& inwords );
+
+	char const *ApplyTBWordRules( char const *word );
+
+	void ProcessWords( TALKBACK_ANALYSIS *analysis, CSentence& inwords, CSentence& outwords );
+	void ProcessWordsTextless( TALKBACK_ANALYSIS *analysis, CSentence& outwords );
+
+	int GetPhonemeIndexAtWord( TALKBACK_ANALYSIS *analysis, double time, bool checkstart );
+
+	int GetPhonemeIndexAtWordStart( TALKBACK_ANALYSIS *analysis, double starttime );
+	int GetPhonemeIndexAtWordEnd( TALKBACK_ANALYSIS *analysis, double endtime );
+
+	CAnalyzedWord *GetAnalyzedWord( TALKBACK_ANALYSIS *analysis, int index );
+	CAnalyzedPhoneme *GetAnalyzedPhoneme( TALKBACK_ANALYSIS *analysis, int index );
+
+	int ComputeByteFromTime( float time );
+
+	bool m_bInitialized;
+
+	float	m_flSampleCount;
+	float	m_flDuration;
+
+	float	m_flSamplesPerSecond;
+
+	int		m_nBytesPerSample;
+
+	HMODULE m_hHelper;
+};
+
+CPhonemeExtractorLipSinc::CPhonemeExtractorLipSinc( void )
+{
+	m_hHelper = (HMODULE)0;
+	m_pfnPrint = NULL;
+
+	m_bInitialized = false;
+	
+	m_flSampleCount = 0.0f;
+	m_flDuration = 0.0f;
+
+	m_flSamplesPerSecond = 0.0f;
+
+	m_nBytesPerSample = 0;
+}
+
+CPhonemeExtractorLipSinc::~CPhonemeExtractorLipSinc( void )
+{
+	if ( GetInitialized() )
+	{
+		ShutdownLipSinc();
+	}
+}
+
+bool CPhonemeExtractorLipSinc::GetInitialized( void )
+{
+	return m_bInitialized;
+}
+
+void CPhonemeExtractorLipSinc::SetInitialized( bool init )
+{
+	m_bInitialized = init;
+}
+
+int CPhonemeExtractorLipSinc::ComputeByteFromTime( float time )
+{
+	if ( !m_flDuration )
+		return 0;
+
+	float frac = time / m_flDuration;
+
+	float sampleNumber = frac * m_flSampleCount;
+
+	int bytenumber = sampleNumber * m_nBytesPerSample;
+
+	return bytenumber;
+}
+
+void CPhonemeExtractorLipSinc::DescribeError( TALKBACK_ERR err )
+{
+	Assert( m_pfnPrint );
+
+	// Get the error description.
+	char errorDesc[256] = "";
+	if ( err != TALKBACK_NOERR )
+	{
+		talkback->TalkBackGetErrorString( err, sizeof(errorDesc), errorDesc );
+	}
+	
+	// Report or log the error...
+	(*m_pfnPrint)( "LIPSINC ERROR:  %s\n", errorDesc );
+}
+
+//-----------------------------------------------------------------------------
+// Purpose: 
+// Input  : *fmt - 
+//			.. - 
+//-----------------------------------------------------------------------------
+void CPhonemeExtractorLipSinc::Printf( char const *fmt, ... )
+{
+	Assert( m_pfnPrint );
+
+	char string[ 4096 ];
+
+	va_list argptr;
+	va_start( argptr, fmt );
+	vsprintf( string, fmt, argptr );
+	va_end( argptr );
+
+	(*m_pfnPrint)( "%s", string );
+}
+
+bool CPhonemeExtractorLipSinc::CheckSoundFile( char const *filename )
+{
+	TALKBACK_SOUND_FILE_METRICS fm;
+	memset( &fm, 0, sizeof( fm ) );
+	fm.m_size = sizeof( fm );
+
+	TALKBACK_ERR err = talkback->TalkBackGetSoundFileMetrics( filename, &fm );
+	if ( err != TALKBACK_NOERR )
+	{
+		DescribeError( err );
+		return false;
+	}
+
+	if ( fm.m_canBeAnalyzed )
+	{
+		Printf( "%s:  %.2f s, rate %i, bits %i, channels %i\n",
+			filename,
+			fm.m_duration,
+			fm.m_sampleRate,
+			fm.m_bitsPerSample,
+			fm.m_channelCount );
+	}
+
+	m_flDuration = fm.m_duration;
+	if ( m_flDuration > 0 )
+	{
+		m_flSamplesPerSecond = m_flSampleCount / m_flDuration;
+	}
+	else
+	{
+		m_flSamplesPerSecond = 0.0f;
+	}
+
+	m_nBytesPerSample = ( fm.m_bitsPerSample >> 3 );
+
+	m_flSampleCount /= m_nBytesPerSample;
+
+	m_nBytesPerSample /= fm.m_channelCount;
+
+	return fm.m_canBeAnalyzed ? true : false;
+}
+
+typedef IImsHelper *(*pfnImsHelper)(void);
+
+//-----------------------------------------------------------------------------
+// Purpose: 
+// Output : Returns true on success, false on failure.
+//-----------------------------------------------------------------------------
+bool CPhonemeExtractorLipSinc::InitLipSinc( void )
+{
+	if ( GetInitialized() )
+	{
+		return true;
+	}
+
+	m_hHelper = LoadLibrary( "ims_helper.dll" );
+	if ( !m_hHelper )
+	{
+		return false;
+	}
+	
+	pfnImsHelper factory = (pfnImsHelper)::GetProcAddress( m_hHelper, "GetImsHelper" );
+	if ( !factory )
+	{
+		FreeLibrary( m_hHelper );
+		return false;
+	}
+
+	talkback = reinterpret_cast< IImsHelper * >( (*factory)() );
+	if ( !talkback )
+	{
+		FreeLibrary( m_hHelper );
+		return false;
+	}
+
+	char szExeName[ MAX_PATH ];
+	szExeName[0] = 0;
+	GetModuleFileName( (HMODULE)0, szExeName, sizeof( szExeName ) );
+
+	char szBaseDir[ MAX_PATH ];
+	Q_strncpy( szBaseDir, szExeName, sizeof( szBaseDir ) );
+
+	Q_StripLastDir( szBaseDir, sizeof( szBaseDir ) );
+	Q_StripTrailingSlash( szBaseDir );
+	Q_strlower( szBaseDir );
+
+	char coreDataDir[ 512 ];
+	Q_snprintf( coreDataDir, sizeof( coreDataDir ), "%s\\lipsinc_data\\",
+		szBaseDir );
+	Q_FixSlashes( coreDataDir );
+
+	char szCheck[ 512 ];
+	Q_snprintf( szCheck, sizeof( szCheck ), "%sDtC6dal.dat", coreDataDir );
+	struct __stat64 buf;
+
+	if ( _stat64( szCheck, &buf ) != 0 )
+	{
+		Q_snprintf( coreDataDir, sizeof( coreDataDir ), "%s\\bin\\lipsinc_data\\",
+			szBaseDir );
+		Q_FixSlashes( coreDataDir );
+		Q_snprintf( szCheck, sizeof( szCheck ), "%sDtC6dal.dat", coreDataDir );
+
+		if ( _stat64( szCheck, &buf ) != 0 )
+		{
+			Error( "Unable to find talkback data files in %s.", coreDataDir );
+		}
+	}
+
+	TALKBACK_ERR err;
+	
+	err = talkback->TalkBackStartupLibrary( coreDataDir );
+	if ( err != TALKBACK_NOERR )
+	{
+		DescribeError( err );
+		FreeLibrary( m_hHelper );
+		return false;
+	}
+
+	long verMajor = 0;
+	long verMinor = 0;
+	long verRevision = 0;
+	
+	err = talkback->TalkBackGetVersion(
+		&verMajor, 
+		&verMinor, 
+		&verRevision);
+	if ( err != TALKBACK_NOERR )
+	{
+		DescribeError( err );
+		FreeLibrary( m_hHelper );
+		return false;
+	}
+
+	Printf( "Lipsinc TalkBack Version %i.%i.%i\n", verMajor, verMinor, verRevision );
+
+	m_bInitialized = true;
+
+	return true;
+}
+
+//-----------------------------------------------------------------------------
+// Purpose: 
+//-----------------------------------------------------------------------------
+void CPhonemeExtractorLipSinc::ShutdownLipSinc( void )
+{
+	// HACK HACK:  This seems to crash on exit sometimes
+	__try
+	{
+		talkback->TalkBackShutdownLibrary();
+
+		FreeLibrary( m_hHelper );
+	}
+	__except(EXCEPTION_EXECUTE_HANDLER )
+	{
+		OutputDebugString( "----> Crash shutting down TALKBACK sdk, exception caught and ignored\n" );
+	}
+}
+
+//-----------------------------------------------------------------------------
+// Purpose: 
+// Input  : inwords - 
+// Output : char const
+//-----------------------------------------------------------------------------
+char const *CPhonemeExtractorLipSinc::ConstructInputSentence( CSentence& inwords )
+{
+	static char sentence[ 16384 ];
+
+	sentence[ 0 ] = 0;
+
+	int last = inwords.m_Words.Size() - 1;
+
+	for ( int i = 0 ; i <= last; i++ )
+	{
+		CWordTag *w = inwords.m_Words[ i ];
+
+		strcat( sentence, w->GetWord() );
+		if ( i != last )
+		{
+			strcat( sentence, " " );
+		}
+	}
+
+	if ( inwords.m_Words.Count() == 1 && 
+		!Q_strnicmp( inwords.GetText(), TEXTLESS_WORDNAME, Q_strlen( TEXTLESS_WORDNAME ) ) )
+	{
+		sentence[ 0 ] = 0;
+	}
+
+	return sentence;
+}
+
+bool CPhonemeExtractorLipSinc::AttemptAnalysis( TALKBACK_ANALYSIS **ppAnalysis, char const *wavfile, CSentence& inwords )
+{
+	*ppAnalysis = NULL;
+
+	TALKBACK_ANALYSIS_SETTINGS settings;
+	memset( &settings, 0, sizeof( settings ) );
+
+   // Set this field to sizeof(TALKBACK_ANALYSIS_SETTINGS) before using the
+    // structure.
+	settings.fSize = sizeof( TALKBACK_ANALYSIS_SETTINGS );
+	
+
+    // Default value: 30 (frames per second).
+    settings.fFrameRate = 100;
+    // Set this to 1 to optimize for flipbook output, 0 to do analysis normally.
+    //
+    // Default value: 0 (normal analysis).
+    settings.fOptimizeForFlipbook = 0;
+    // Set this to -1 to seed the random number generator with the current time.
+    // Any other number will be used directly for the random number seed, which
+    // is useful if you want repeatable speech gestures. This value does not
+    // influence lip-synching at all.
+    //
+    // Default value: -1 (use current time).
+    settings.fRandomSeed = -1;
+    // Path to the configuration (.INI) file with phoneme-to-speech-target
+    // mapping. Set this to NULL to use the default mapping.
+    //
+    // Default value: NULL (use default mapping).
+    settings.fConfigFile = NULL;
+
+	char const *text = ConstructInputSentence( inwords );
+
+	Printf( "Analyzing: \"%s\"\n", text[ 0 ] ? text : TEXTLESS_WORDNAME );
+
+	TALKBACK_ERR err = talkback->TalkBackGetAnalysis( 
+		ppAnalysis,
+		wavfile,
+		text,
+		&settings );
+
+	if ( err != TALKBACK_NOERR )
+	{
+		DescribeError( err );
+		return false;
+	}
+
+	Printf( "Analysis successful...\n" );
+
+	return true;
+}
+
+typedef struct
+{
+	TALKBACK_PHONEME phoneme;
+	char const		*string;
+} TBPHONEMES_t;
+
+static TBPHONEMES_t g_TBPhonemeList[]=
+{
+	{ TALKBACK_PHONEME_IY, "iy" },
+	{ TALKBACK_PHONEME_IH, "ih" },
+	{ TALKBACK_PHONEME_EH, "eh" },
+	{ TALKBACK_PHONEME_EY, "ey" },
+	{ TALKBACK_PHONEME_AE, "ae" },
+	{ TALKBACK_PHONEME_AA, "aa" },
+	{ TALKBACK_PHONEME_AW, "aw" },
+	{ TALKBACK_PHONEME_AY, "ay" },
+	{ TALKBACK_PHONEME_AH, "ah" },
+	{ TALKBACK_PHONEME_AO, "ao" },
+	{ TALKBACK_PHONEME_OY, "oy" },
+	{ TALKBACK_PHONEME_OW, "ow" },
+	{ TALKBACK_PHONEME_UH, "uh" },
+	{ TALKBACK_PHONEME_UW, "uw" },
+	{ TALKBACK_PHONEME_ER, "er" },
+	{ TALKBACK_PHONEME_AX, "ax" },
+	{ TALKBACK_PHONEME_S, "s" },
+	{ TALKBACK_PHONEME_SH, "sh" },
+	{ TALKBACK_PHONEME_Z, "z" },
+	{ TALKBACK_PHONEME_ZH, "zh" },
+	{ TALKBACK_PHONEME_F, "f" },
+	{ TALKBACK_PHONEME_TH, "th" },
+	{ TALKBACK_PHONEME_V, "v" },
+	{ TALKBACK_PHONEME_DH, "dh" },
+	{ TALKBACK_PHONEME_M, "m" },
+	{ TALKBACK_PHONEME_N, "n" },
+	{ TALKBACK_PHONEME_NG, "ng" },
+	{ TALKBACK_PHONEME_L, "l" },
+	{ TALKBACK_PHONEME_R, "r" },
+	{ TALKBACK_PHONEME_W, "w" },
+	{ TALKBACK_PHONEME_Y, "y" },
+	{ TALKBACK_PHONEME_HH, "hh" },
+	{ TALKBACK_PHONEME_B, "b" },
+	{ TALKBACK_PHONEME_D, "d" },
+	{ TALKBACK_PHONEME_JH, "jh" },
+	{ TALKBACK_PHONEME_G, "g" },
+	{ TALKBACK_PHONEME_P, "p" },
+	{ TALKBACK_PHONEME_T, "t" },
+	{ TALKBACK_PHONEME_K, "k" },
+	{ TALKBACK_PHONEME_CH, "ch" },
+	{ TALKBACK_PHONEME_SIL, "<sil>" },
+	{ -1, NULL }
+};
+
+char const *TBPhonemeToString( TALKBACK_PHONEME phoneme )
+{
+	if ( phoneme < TALKBACK_PHONEME_FIRST || phoneme > TALKBACK_PHONEME_LAST )
+	{
+		return "Bogus";
+	}
+
+	TBPHONEMES_t *item = &g_TBPhonemeList[ phoneme ];
+	return item->string;
+}
+
+//-----------------------------------------------------------------------------
+// Purpose: 
+// Input  : *analysis - 
+//			time - 
+//			start - 
+// Output : int
+//-----------------------------------------------------------------------------
+int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWord( TALKBACK_ANALYSIS *analysis, double time, bool start )
+{
+	long count;
+
+	TALKBACK_ERR err = talkback->TalkBackGetNumPhonemes( analysis, &count );
+	if ( err != TALKBACK_NOERR )
+	{
+		DescribeError( err );
+		return -1;
+	}
+
+	if ( count <= 0L )
+		return -1;
+
+	// Bogus
+	if ( count >= 100000L )
+		return -1;
+
+	for ( int i = 0; i < (int)count; i++ )
+	{
+		TALKBACK_PHONEME tbPhoneme = TALKBACK_PHONEME_INVALID;
+		err = talkback->TalkBackGetPhonemeEnum( analysis, i, &tbPhoneme );
+		if ( err != TALKBACK_NOERR )
+		{
+			DescribeError( err );
+			continue;
+		}
+
+		double t;
+
+		if ( start )
+		{
+			err = talkback->TalkBackGetPhonemeStartTime( analysis, i, &t );
+		}
+		else
+		{
+			err = talkback->TalkBackGetPhonemeEndTime( analysis, i, &t );
+		}
+
+		if ( err != TALKBACK_NOERR )
+		{
+			DescribeError( err );
+			continue;
+		}
+
+		if ( t == time )
+		{
+			return i;
+		}
+	}
+
+	return -1;
+}
+
+//-----------------------------------------------------------------------------
+// Purpose: 
+// Input  : *analysis - 
+//			starttime - 
+// Output : int
+//-----------------------------------------------------------------------------
+int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWordStart( TALKBACK_ANALYSIS *analysis, double starttime )
+{
+	return GetPhonemeIndexAtWord( analysis, starttime, true );
+}
+
+//-----------------------------------------------------------------------------
+// Purpose: 
+// Input  : *analysis - 
+//			endtime - 
+// Output : int
+//-----------------------------------------------------------------------------
+int CPhonemeExtractorLipSinc::GetPhonemeIndexAtWordEnd( TALKBACK_ANALYSIS *analysis, double endtime )
+{
+	return GetPhonemeIndexAtWord( analysis, endtime, false );
+}
+
+CPhonemeExtractorLipSinc::CAnalyzedPhoneme *CPhonemeExtractorLipSinc::GetAnalyzedPhoneme( TALKBACK_ANALYSIS *analysis, int index )
+{
+	static CAnalyzedPhoneme p;
+
+	memset( &p, 0, sizeof( p ) );
+
+	TALKBACK_PHONEME tb;
+
+	TALKBACK_ERR err = talkback->TalkBackGetPhonemeEnum( analysis, index, &tb );
+	if ( err != TALKBACK_NOERR )
+	{
+		DescribeError( err );
+		return NULL;
+	}
+
+	strcpy( p.phoneme, TBPhonemeToString( tb ) );
+
+	err = talkback->TalkBackGetPhonemeStartTime( analysis, index, &p.starttime );
+	if ( err != TALKBACK_NOERR )
+	{
+		DescribeError( err );
+		return NULL;
+	}
+	err = talkback->TalkBackGetPhonemeEndTime( analysis, index, &p.endtime );
+	if ( err != TALKBACK_NOERR )
+	{
+		DescribeError( err );
+		return NULL;
+	}
+
+	return &p;
+}
+
+CPhonemeExtractorLipSinc::CAnalyzedWord *CPhonemeExtractorLipSinc::GetAnalyzedWord( TALKBACK_ANALYSIS *analysis, int index )
+{
+	static CAnalyzedWord w;
+
+	memset( &w, 0, sizeof( w ) );
+
+	long chars = sizeof( w.buffer );
+
+	TALKBACK_ERR err = talkback->TalkBackGetWord( analysis, index, chars, w.buffer );
+	if ( err != TALKBACK_NOERR )
+	{
+		DescribeError( err );
+		return NULL;
+	}
+
+	err = talkback->TalkBackGetWordStartTime( analysis, index, &w.starttime );
+	if ( err != TALKBACK_NOERR )
+	{
+		DescribeError( err );
+		return NULL;
+	}
+	err = talkback->TalkBackGetWordEndTime( analysis, index, &w.endtime );
+	if ( err != TALKBACK_NOERR )
+	{
+		DescribeError( err );
+		return NULL;
+	}
+
+	return &w;
+}
+
+//-----------------------------------------------------------------------------
+// Purpose: 
+// Input  : *w1 - 
+//			*w2 - 
+// Output : Returns true on success, false on failure.
+//-----------------------------------------------------------------------------
+bool FuzzyWordMatch( char const *w1, char const *w2 )
+{
+	int len1 = strlen( w1 );
+	int len2 = strlen( w2 );
+
+	int minlen = min( len1, len2 );
+
+	// Found a match
+	if ( !strnicmp( w1, w2, minlen ) )
+		return true;
+
+	int letterdiff = abs( len1 - len2 );
+	// More than three letters different, don't bother
+	if ( letterdiff > 5 )
+		return false;
+
+	// Compute a "delta"
+	char *p1 = (char *)w1;
+	char *p2 = (char *)w2;
+
+	CUtlVector <char> word1;
+	CUtlVector <char> word2;
+
+	while ( *p1 )
+	{
+		if ( V_isalpha( *p1 ) )
+		{
+			word1.AddToTail( *p1 );
+		}
+		p1++;
+	}
+
+	while ( *p2 )
+	{
+		if ( V_isalpha( *p2 ) )
+		{
+			word2.AddToTail( *p2 );
+		}
+		p2++;
+	}
+
+	int i;
+	for ( i = 0; i < word1.Size(); i++ )
+	{
+		char c = word1[ i ];
+
+		// See if c is in word 2, if so subtract it out
+		int idx = word2.Find( c );
+
+		if ( idx != word2.InvalidIndex() )
+		{
+			word2.Remove( idx );
+		}
+	}
+
+	if ( word2.Size() <= letterdiff )
+		return true;
+
+	word2.RemoveAll();
+
+	while ( *p2 )
+	{
+		if ( V_isalpha( *p2 ) )
+		{
+			word2.AddToTail( *p2 );
+		}
+		p2++;
+	}
+
+	for ( i = 0; i < word2.Size(); i++ )
+	{
+		char c = word2[ i ];
+
+		// See if c is in word 2, if so subtract it out
+		int idx = word1.Find( c );
+
+		if ( idx != word1.InvalidIndex() )
+		{
+			word1.Remove( idx );
+		}
+	}
+
+	if ( word1.Size() <= letterdiff )
+		return true;
+
+	return false;
+}
+
+//-----------------------------------------------------------------------------
+// Purpose: For foreign language stuff, if inwords is empty, process anyway...
+// Input  : *analysis - 
+//			outwords - 
+//-----------------------------------------------------------------------------
+void CPhonemeExtractorLipSinc::ProcessWordsTextless( TALKBACK_ANALYSIS *analysis, CSentence& outwords )
+{
+	long count;
+
+	TALKBACK_ERR err = talkback->TalkBackGetNumPhonemes( analysis, &count );
+	if ( err != TALKBACK_NOERR )
+	{
+		DescribeError( err );
+		return;
+	}
+
+	CWordTag *newWord = new CWordTag;
+
+	newWord->SetWord( TEXTLESS_WORDNAME );
+
+	float starttime = 0.0f;
+	float endtime = 1.0f;
+
+
+	for ( int i = 0; i < count; ++i )
+	{
+		// Get phoneme and timing info
+		CAnalyzedPhoneme *ph = GetAnalyzedPhoneme( analysis, i );
+		if ( !ph )
+			continue;
+
+		CPhonemeTag *ptag = new CPhonemeTag;
+
+		if ( i == 0 || ( ph->starttime < starttime ) )
+		{
+			starttime = ph->starttime;
+		}
+
+		if ( i == 0 || ( ph->endtime > endtime ) )
+		{
+			endtime = ph->endtime;
+		}
+
+		ptag->SetStartTime( ph->starttime );
+		ptag->SetEndTime( ph->endtime );
+
+		ptag->m_uiStartByte = ComputeByteFromTime( ph->starttime );
+		ptag->m_uiEndByte = ComputeByteFromTime( ph->endtime );
+
+		ptag->SetTag( ph->phoneme );
+		ptag->SetPhonemeCode( TextToPhoneme( ptag->GetTag() ) );
+
+		newWord->m_Phonemes.AddToTail( ptag );
+	}
+
+	newWord->m_flStartTime = starttime;
+	newWord->m_flEndTime = endtime;
+
+	newWord->m_uiStartByte = ComputeByteFromTime( starttime );
+	newWord->m_uiEndByte = ComputeByteFromTime( endtime );
+
+	outwords.Reset();
+	outwords.AddWordTag( newWord );
+	outwords.SetTextFromWords();
+}
+
+//-----------------------------------------------------------------------------
+// Purpose: 
+// Input  : *analysis - 
+//			inwords - 
+//			outwords - 
+//-----------------------------------------------------------------------------
+void CPhonemeExtractorLipSinc::ProcessWords( TALKBACK_ANALYSIS *analysis, CSentence& inwords, CSentence& outwords )
+{
+	long count;
+
+	TALKBACK_ERR err = talkback->TalkBackGetNumWords( analysis, &count );
+	if ( err != TALKBACK_NOERR )
+	{
+		DescribeError( err );
+		return;
+	}
+
+	if ( count <= 0L )
+	{
+		if ( inwords.m_Words.Count() == 0 || 
+			!Q_strnicmp( inwords.GetText(), TEXTLESS_WORDNAME, Q_strlen( TEXTLESS_WORDNAME ) ) )
+		{
+			ProcessWordsTextless( analysis, outwords );
+		}
+		return;
+	}
+
+	// Bogus
+	if ( count >= 100000L )
+		return;
+
+	int inwordpos = 0;
+	int awordpos = 0;
+
+	outwords.Reset();
+
+	char previous[ 256 ];
+	previous[ 0 ] = 0;
+
+	while ( inwordpos < inwords.m_Words.Size() )
+	{
+		CWordTag *in = inwords.m_Words[ inwordpos ];
+
+		if ( awordpos >= count )
+		{
+			// Just copy the rest over without phonemes
+			CWordTag *copy = new CWordTag( *in );
+
+			outwords.AddWordTag( copy );
+
+			inwordpos++;
+			continue;
+		}
+
+		// Should never fail
+		CAnalyzedWord *w = GetAnalyzedWord( analysis, awordpos );
+		if ( !w )
+		{
+			return;
+		}
+
+		if ( !stricmp( w->buffer, "<SIL>" ) )
+		{
+			awordpos++;
+			continue;
+		}
+
+		char const *check = ApplyTBWordRules( in->GetWord() );
+		if ( !FuzzyWordMatch( check, w->buffer ) )
+		{
+			bool advance_input = true;
+			if ( previous[ 0 ] )
+			{
+				if ( FuzzyWordMatch( previous, w->buffer ) )
+				{
+					advance_input = false;
+				}
+			}
+
+			if ( advance_input )
+			{
+				inwordpos++;
+			}
+			awordpos++;
+			continue;
+		}
+		strcpy( previous, check );
+
+		CWordTag *newWord = new CWordTag;
+
+		newWord->SetWord( in->GetWord() );
+
+		newWord->m_flStartTime = w->starttime;
+		newWord->m_flEndTime = w->endtime;
+
+		newWord->m_uiStartByte = ComputeByteFromTime( w->starttime );
+		newWord->m_uiEndByte = ComputeByteFromTime( w->endtime );
+
+		int phonemestart, phonemeend;
+
+		phonemestart = GetPhonemeIndexAtWordStart( analysis, w->starttime );
+		phonemeend = GetPhonemeIndexAtWordEnd( analysis, w->endtime );
+
+		if ( phonemestart >= 0 && phonemeend >= 0 )
+		{
+			for ( ; phonemestart <= phonemeend; phonemestart++ )
+			{
+				// Get phoneme and timing info
+				CAnalyzedPhoneme *ph = GetAnalyzedPhoneme( analysis, phonemestart );
+				if ( !ph )
+					continue;
+
+				CPhonemeTag *ptag = new CPhonemeTag;
+				ptag->SetStartTime( ph->starttime );
+				ptag->SetEndTime( ph->endtime );
+
+				ptag->m_uiStartByte = ComputeByteFromTime( ph->starttime );
+				ptag->m_uiEndByte = ComputeByteFromTime( ph->endtime );
+
+				ptag->SetTag( ph->phoneme );
+				ptag->SetPhonemeCode( TextToPhoneme( ptag->GetTag() ) );
+
+				newWord->m_Phonemes.AddToTail( ptag );
+			}
+		}
+
+		outwords.AddWordTag( newWord );
+		inwordpos++;
+		awordpos++;
+	}
+}
+
+char const *CPhonemeExtractorLipSinc::ApplyTBWordRules( char const *word )
+{
+	static char outword[ 256 ];
+
+	char const *in = word;
+	char *out = outword;
+
+	while ( *in && ( ( out - outword ) <= 255 ) )
+	{
+		if ( *in == '\t' ||
+			 *in == ' ' ||
+			 *in == '\n' ||
+			 *in == '-' ||
+			 *in == '.' ||
+			 *in == ',' ||
+			 *in == ';' ||
+			 *in == '?' ||
+			 *in == '"' ||
+			 *in == ':' ||
+			 *in == '(' ||
+			 *in == ')' )
+		{
+			in++;
+			*out++ = ' ';
+			continue;
+		}
+
+		if ( !V_isprint( *in ) )
+		{
+			in++;
+			continue;
+		}
+
+		if ( *in >= 128 )
+		{
+			in++;
+			continue;
+		}
+
+		// Skip numbers
+		if ( *in >= '0' && *in <= '9' )
+		{
+			in++;
+			continue;
+		}
+
+		// Convert all letters to upper case
+		if ( *in >= 'a' && *in <= 'z' )
+		{
+			*out++ = ( *in++ ) - 'a' + 'A';
+			continue;
+		}
+
+		if ( *in >= 'A' && *in <= 'Z' )
+		{
+			*out++ = *in++;
+			continue;
+		}
+
+		if ( *in == '\'' )
+		{
+			*out++ = *in++;
+			continue;
+		}
+
+		in++;
+	}
+
+	*out = 0;
+
+	return outword;
+}
+
+//-----------------------------------------------------------------------------
+// Purpose: Given a wavfile and a list of inwords, determines the word/phonene 
+//  sample counts for the sentce
+// Output : SR_RESULT
+//-----------------------------------------------------------------------------
+SR_RESULT CPhonemeExtractorLipSinc::Extract( 
+	const char *wavfile,
+	int numsamples,
+	void (*pfnPrint)( const char *fmt, ... ),
+	CSentence& inwords,
+	CSentence& outwords )
+{
+	// g_enableTalkBackDebuggingOutput = 1;
+
+	m_pfnPrint = pfnPrint;
+
+	if ( !InitLipSinc() )
+	{
+		return SR_RESULT_ERROR;
+	}
+	
+	m_flSampleCount = numsamples;
+
+	if ( !CheckSoundFile( wavfile ) )
+	{
+		FreeLibrary( m_hHelper );
+		return SR_RESULT_ERROR;
+	}
+
+	TALKBACK_ANALYSIS *analysis = NULL;
+
+	if ( !AttemptAnalysis( &analysis, wavfile, inwords ) )
+	{
+		FreeLibrary( m_hHelper );
+		return SR_RESULT_FAILED;
+	}
+	
+	if ( strlen( inwords.GetText() ) <= 0 )
+	{
+		inwords.SetTextFromWords();
+	}
+
+	outwords = inwords;
+
+	// Examine data
+	ProcessWords( analysis, inwords, outwords );
+
+	if ( analysis )
+	{
+		talkback->TalkBackFreeAnalysis( &analysis );
+	}
+
+	return SR_RESULT_SUCCESS;
+}
+
+EXPOSE_SINGLE_INTERFACE( CPhonemeExtractorLipSinc, IPhonemeExtractor, VPHONEME_EXTRACTOR_INTERFACE );
+\ No newline at end of file
author	Joe Ludwig <[email protected]>	2013-06-26 15:22:04 -0700
committer	Joe Ludwig <[email protected]>	2013-06-26 15:22:04 -0700
commit	39ed87570bdb2f86969d4be821c94b722dc71179 (patch)
tree	abc53757f75f40c80278e87650ea92808274aa59 /mp/src/utils/phonemeextractor/phonemeextractor_ims.cpp
download	source-sdk-2013-39ed87570bdb2f86969d4be821c94b722dc71179.tar.xz source-sdk-2013-39ed87570bdb2f86969d4be821c94b722dc71179.zip