summaryrefslogtreecommitdiff
path: root/utils/hlfaceposer/phonemeeditor.h
blob: 98972c753ec3cb000a53c062b342a131ae8c1448 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
//========= Copyright Valve Corporation, All rights reserved. ============//
//
// Purpose: 
//
// $NoKeywords: $
//=============================================================================//

#ifndef PHONEEDITOR_H
#define PHONEEDITOR_H
#ifdef _WIN32
#pragma once
#endif

#include <mxtk/mx.h>
class CAudioSource;
class CAudioMixer;
class mxBitmapButton;
class mxButton;

#include "utlvector.h"
#include "faceposertoolwindow.h"

#define IDC_PHONEME_SCROLL				1001
#define IDC_PHONEME_PLAY_ORIG			1002
#define IDC_EDIT_PHONEME				1004
#define IDC_EDIT_INSERTPHONEMEBEFORE	1005
#define IDC_EDIT_INSERTPHONEMEAFTER		1006
#define IDC_EDIT_DELETEPHONEME			1007

#define IDC_PLAY_EDITED_SELECTION		1008

#define IDC_REDO_PHONEMEEXTRACTION		1009
#define IDC_REDO_PHONEMEEXTRACTION_SELECTION 1010
#define IDC_DESELECT					1011
#define IDC_PLAY_EDITED					1012
#define IDC_SAVE_LINGUISTIC				1013
#define IDC_CANCELPLAYBACK				1014

#define IDC_EDITWORDLIST				1015
#define IDC_SNAPWORDS					1016
#define IDC_SEPARATEWORDS				1017
#define IDC_LOADWAVEFILE				1018
#define IDC_SNAPPHONEMES				1019
#define IDC_SEPARATEPHONEMES			1020

#define IDC_COMMITEXTRACTED				1021
#define IDC_CLEAREXTRACTED				1022

#define IDC_ADDTAG						1023
#define IDC_DELETETAG					1024

#define IDC_CVUNDO						1025
#define IDC_CVREDO						1026

#define IDC_EDIT_DELETEWORD				1027
#define IDC_EDIT_WORD					1028
#define IDC_EDIT_INSERTWORDBEFORE		1029
#define IDC_EDIT_INSERTWORDAFTER		1030
#define IDC_EDIT_INSERTFIRSTPHONEMEOFWORD 1031

#define IDC_SELECT_WORDSRIGHT			1032
#define IDC_SELECT_WORDSLEFT			1033
#define IDC_SELECT_PHONEMESRIGHT		1034
#define IDC_SELECT_PHONEMESLEFT			1035

#define IDC_DESELECT_PHONEMESANDWORDS	1036
#define IDC_CLEANUP						1037
#define IDC_CLEARUNDO					1038

#define IDC_PLAYBUTTON					1039

#define IDC_MODE_TAB					1040

#define IDC_EMPHASIS_DELETE				1041
#define IDC_EMPHASIS_DESELECT			1042
#define IDC_EMPHASIS_SELECTALL			1043

#define IDC_PLAYBACKRATE				1044

#define IDC_REALIGNPHONEMES				1045
#define IDC_REALIGNWORDS				1046

// Support for multiple speech api's
#define IDC_API_SAPI					1050
#define IDC_API_LIPSINC					1051

#define IDC_EXPORT_SENTENCE				1075
#define IDC_IMPORT_SENTENCE				1076
#define IDC_TOGGLE_VOICEDUCK			1077

#define		IDC_PE_LANGUAGESTART		1100
// #define  IDC_PE_LANGUAGEEND			1106 or so

class IterateOutputRIFF;
class IterateRIFF;
class CChoreoWidgetDrawHelper;
class CChoreoEvent;
class CEventRelativeTag;
class CChoreoView;
class IPhonemeExtractor;
class CPhonemeModeTab;
class mxPopupMenu;

#include "sentence.h"

enum
{
	COLOR_PHONEME_BACKGROUND = 0,
	COLOR_PHONEME_TEXT,
	COLOR_PHONEME_LIGHTTEXT,
	COLOR_PHONEME_PLAYBACKTICK,
	COLOR_PHONEME_WAVDATA,
	COLOR_PHONEME_TIMELINE,
	COLOR_PHONEME_TIMELINE_MAJORTICK,
	COLOR_PHONEME_TIMELINE_MINORTICK,
	COLOR_PHONEME_EXTRACTION_RESULT_FAIL,
	COLOR_PHONEME_EXTRACTION_RESULT_SUCCESS,
	COLOR_PHONEME_EXTRACTION_RESULT_ERROR,
	COLOR_PHONEME_EXTRACTION_RESULT_OTHER,
	COLOR_PHONEME_TAG_BORDER,
	COLOR_PHONEME_TAG_BORDER_SELECTED,
	COLOR_PHONEME_TAG_FILLER_NORMAL,
	COLOR_PHONEME_TAG_SELECTED,
	COLOR_PHONEME_TAG_TEXT,
	COLOR_PHONEME_TAG_TEXT_SELECTED,	
	COLOR_PHONEME_WAV_ENDPOINT,		
	COLOR_PHONEME_AB,
	COLOR_PHONEME_AB_LINE,
	COLOR_PHONEME_AB_TEXT,

	COLOR_PHONEME_ACTIVE_BORDER,
	COLOR_PHONEME_SELECTED_BORDER,
	COLOR_PHONEME_TIMING_TAG,

	COLOR_PHONEME_EMPHASIS_BG,
	COLOR_PHONEME_EMPHASIS_BG_STRONG,
	COLOR_PHONEME_EMPHASIS_BG_WEAK,

	COLOR_PHONEME_EMPHASIS_BORDER,
	COLOR_PHONEME_EMPHASIS_LINECOLOR,
	COLOR_PHONEME_EMPHASIS_DOTCOLOR,
	COLOR_PHONEME_EMPHASIS_DOTCOLOR_SELECTED,
	COLOR_PHONEME_EMPHASIS_TEXT,
	COLOR_PHONEME_EMPHASIS_MIDLINE,

	NUM_COLORS,
};

//-----------------------------------------------------------------------------
// Purpose: Shows WAV data and allows blanking it out and tweaking phoneme tags
//-----------------------------------------------------------------------------
class PhonemeEditor : public mxWindow, public IFacePoserToolWindow
{
public:
	enum
	{
		BOUNDARY_NONE = 0,
		BOUNDARY_PHONEME,
		BOUNDARY_WORD,
	};

	typedef enum
	{
		MODE_PHONEMES = 0,
		MODE_EMPHASIS
	} EditorMode;

	// Construction
						PhonemeEditor( mxWindow *parent );
						~PhonemeEditor( void );

	virtual void		Think( float dt );

	virtual void		OnDelete();
	virtual bool		CanClose();

	void				ValidateSpeechAPIIndex();

	virtual int			handleEvent( mxEvent *event );
	virtual void		redraw( void );
	virtual bool		PaintBackground( void );

	EditorMode			GetMode( void ) const;
	void				SetupPhonemeEditorColors( void );
	COLORREF			PEColor( int colornum );
	void				OnModeChanged( void );

	// Change wave file being edited
	void				SetCurrentWaveFile( const char *wavefile, bool force = false, CChoreoEvent *event = NULL );

	// called when scene is unloaded in choreview or when event/channel/actor gets deleted
	// so we don't have dangling pointers to tags, events, scene
	void				ClearEvent( void );

	void				Play();

private:
	void				DrawWords( CChoreoWidgetDrawHelper& drawHelper, RECT& rcWorkSpace, CSentence &sentence, int type, bool showactive = true );
	void				DrawPhonemes( CChoreoWidgetDrawHelper& drawHelper, RECT& rcWorkSpace, CSentence &sentence, int type, bool showactive = true );
	void				DrawRelativeTags( CChoreoWidgetDrawHelper& drawHelper, RECT& rc );

	void				Emphasis_Redraw( CChoreoWidgetDrawHelper& drawHelper, RECT& rcWorkSpace );
	void				Emphasis_GetRect( RECT const & rcWorkSpace, RECT& rcEmphasis );

	void				Emphasis_Init( void );
	CEmphasisSample		*Emphasis_GetSampleUnderMouse( mxEvent *event );
	void				Emphasis_DeselectAll( void );
	void				Emphasis_SelectAll( void );
	void				Emphasis_Delete( void );
	void				Emphasis_AddSample( CEmphasisSample const& sample );
	void				Emphasis_CountSelected( void );
	void				Emphasis_ShowContextMenu( mxEvent *event );
	void				Emphasis_MouseDrag( int x, int y );
	bool				Emphasis_IsValid( void );
	void				Emphasis_SelectPoints( void );
	
	// Data
	int					m_nNumSelected;

	// Readjust slider
	void				MoveTimeSliderToPos( int x );

	// Handle scrollbar
	void				SetTimeZoomScale( int scale );
	float				GetTimeZoomScale( void );
	float				GetPixelsPerSecond( void );
	// Adjust scroll bars
	void				RepositionHSlider( void );

	// Edit commands
	void				EditPhoneme( CPhonemeTag *pPhoneme, bool positionDialog = false );
	void				EditPhoneme( void );
	void				EditInsertPhonemeBefore( void );
	void				EditInsertPhonemeAfter( void );
	void				EditDeletePhoneme( void );

	void				SelectPhonemes( bool forward );

	void				EditInsertFirstPhonemeOfWord( void );

	void				EditWord( CWordTag *pWord, bool positionDialog = false );
	void				EditWord( void );
	void				EditInsertWordBefore( void );
	void				EditInsertWordAfter( void );
	void				EditDeleteWord( void );

	void				SelectWords( bool forward );

	// Edit word list
	void				EditWordList( void );
	void				SentenceFromString( CSentence& sentence, char const *str );


	// Wav processing commands
	void				RedoPhonemeExtraction( void );
	// Redo extraction of selected words only
	void				RedoPhonemeExtractionSelected( void );
	void				Deselect( void );

	void				PlayEditedWave( bool selection = false );
	void				CommitChanges( void );

	// Context menu
	void				ShowPhonemeMenu( CPhonemeTag *pho, int mx, int my );
	void				ShowWordMenu( CWordTag *word, int mx, int my );

	void				ShowContextMenu( int mx, int my );
	void				ShowContextMenu_Phonemes( int mx, int my );
	void				ShowContextMenu_Emphasis( int mx, int my );

	// UI helpers
	void				GetWorkspaceRect( RECT &rc );


	bool				IsMouseOverWordRow( int my );
	bool				IsMouseOverPhonemeRow( int my );

	int					IsMouseOverBoundary( mxEvent *event );
	int					GetWordUnderMouse( int mx, int my );
	int					ComputeHPixelsNeeded( void );
	void				DrawFocusRect( char *reason );
	void				StartDragging( int dragtype, int startx, int starty, HCURSOR cursor );

	void				FinishPhonemeMove( int startx, int endx );
	void				FinishPhonemeDrag( int startx, int endx );
	void				FinishWordMove( int startx, int endx );
	void				FinishWordDrag( int startx, int endx );

	float				GetTimeForPixel( int mx );
	void				GetScreenStartAndEndTime( float &starttime, float& endtime );
	float				GetTimePerPixel( void );
	int					GetSampleForMouse( int mx );
	int					GetPixelForSample( int sample );

	bool				FindSpanningPhonemes( float time, CPhonemeTag **pp1, CPhonemeTag **pp2 );
	bool				FindSpanningWords( float time, CWordTag **pp1, CWordTag **pp2 );
	int					FindWordForTime( float time );
	CPhonemeTag			*FindPhonemeForTime( float time );
	void				DeselectWords( void );
	void				SnapWords( void );
	void				SeparateWords( void );

	void				DeselectPhonemes( void );
	void				SnapPhonemes( void );
	void				SeparatePhonemes( void );

	void				CreateEvenWordDistribution( const char *wordlist );

	// Dirty flag
	void				SetDirty( bool dirty, bool clearundo = true );
	bool				GetDirty( void );

	// FIXME:  Do something else here
	void				ResampleChunk( IterateOutputRIFF& store, void *format, int chunkname, char *buffer, int buffersize, int start_silence = 0, int end_silence = 0 );

	// Mouse control over selected samples
	void				SelectSamples( int start, int end );
	void				FinishSelect( int startx, int mx );
	void				FinishMoveSelection( int startx, int mx );
	void				FinishMoveSelectionStart( int startx, int mx );
	void				FinishMoveSelectionEnd( int startx, int mx );

	bool				IsMouseOverSamples( int mx, int my );
	bool				IsMouseOverSelection( int mx, int my );
	bool				IsMouseOverSelectionStartEdge( mxEvent *event );
	bool				IsMouseOverSelectionEndEdge( mxEvent *event );

	bool				IsMouseOverTag( int mx, int my );
	void				FinishEventTagDrag( int startx, int endx );
	CEventRelativeTag	*GetTagUnderMouse( int mx );
	bool				IsMouseOverTagRow( int my );
	void				ShowTagMenu( int mx, int my );
	void				AddTag( void );
	void				DeleteTag( void );

	// After running liset/sapi, retrieve phoneme tag data from stream
	void				RetrieveLinguisticData( void );

	// Copy current phoneme chunk over existing data chunk of .wav file
	void				SaveLinguisticData( void );
	void				StoreValveDataChunk( IterateOutputRIFF& store );

	void				ExportValveDataChunk( char const *tempfile );
	void				ImportValveDataChunk( char const *tempfile );

	void				OnImport();
	void				OnExport();

	// Playback (returns true if sound had been playing)
	bool				StopPlayback( void );

	CPhonemeTag			*GetPhonemeTagUnderMouse( int mx, int my );
	CWordTag			*GetWordTagUnderMouse( int mx, int my );

	void				ReadLinguisticTags( void );

	void				LoadWaveFile( void );

	void				GetPhonemeTrayTopBottom( RECT& rc );
	void				GetWordTrayTopBottom( RECT& rc );

	void				GetWordRect( const CWordTag *tag, RECT& rc );
	void				GetPhonemeRect( const CPhonemeTag *tag, RECT& rc );
	int					GetMouseForTime( float time );

	void				CommitExtracted( void );
	void				ClearExtracted( void );

	const char *		GetExtractionResultString( int resultCode );

	void				AddFocusRect( RECT& rc );

	void				CountSelected( void );

	typedef void (PhonemeEditor::*PEWORDITERFUNC)( CWordTag *word, float fparam );
	typedef void (PhonemeEditor::*PEPHONEMEITERFUNC)( CPhonemeTag *phoneme, CWordTag *word, float fparam );

	void				TraverseWords( PEWORDITERFUNC pfn, float fparam );
	void				TraversePhonemes( PEPHONEMEITERFUNC pfn, float fparam );

	// Iteration functions
	void				ITER_MoveSelectedWords( CWordTag *word, float amount );
	void				ITER_MoveSelectedPhonemes( CPhonemeTag *phoneme, CWordTag *word, float amount );

	void				ITER_ExtendSelectedPhonemeEndTimes( CPhonemeTag *phoneme, CWordTag *word, float amount );
	void				ITER_ExtendSelectedWordEndTimes( CWordTag *word, float amount );

	void				ITER_AddFocusRectSelectedWords( CWordTag *word, float amount );
	void				ITER_AddFocusRectSelectedPhonemes( CPhonemeTag *phoneme, CWordTag *word, float amount );

	void				ITER_CountSelectedWords( CWordTag *word, float amount );
	void				ITER_CountSelectedPhonemes( CPhonemeTag *phoneme, CWordTag *word, float amount );

	void				ITER_SelectSpanningWords( CWordTag *word, float amount );

// Undo/Redo
	void				Undo( void );
	void				Redo( void );
	void				ClearUndo( void );

	// Do push before changes
	void				PushUndo( void );
	// Do this push after changes, must match pushundo 1for1
	void				PushRedo( void );

	void				WipeUndo( void );
	void				WipeRedo( void );

	CPhonemeTag			*GetClickedPhoneme( void );
	CWordTag			*GetClickedWord( void );
	void				SetClickedPhoneme( int word, int phoneme );

	void				ShiftSelectedPhoneme( int direction );
	void				ExtendSelectedPhonemeEndTime( int direction );
	void				SelectNextPhoneme( int direction );
	void				SelectNextWord( int direction );
	bool				IsPhonemeSelected( CWordTag *word );
	void				ShiftSelectedWord( int direction );
	void				ExtendSelectedWordEndTime( int direction );

	float				GetTimeGapToNextWord( bool forward, CWordTag *currentWord, CWordTag **ppNextWord = NULL );
	float				GetTimeGapToNextPhoneme( bool forward, CPhonemeTag *currentPhoneme, CWordTag **ppword = NULL, CPhonemeTag **phoneme = NULL );
	int					IndexOfWord( CWordTag *word );
	CPhonemeTag			*GetSelectedPhoneme( void );
	CWordTag			*GetSelectedWord( void );

	void				OnMouseMove( mxEvent *event );

	bool				AreSelectedWordsContiguous( void );
	bool				AreSelectedPhonemesContiguous( void );

	bool				CreateCroppedWave( char const *filename, int startsample, int endsample );
	void				CleanupWordsAndPhonemes( bool prepareundo );
	void				RealignPhonemesToWords( bool prepareundo );
	void				RealignWordsToPhonemes( bool prepareundo );
	void				SortWords( bool prepareundo );
	void				SortPhonemes( bool prepareundo );

	float				ComputeMaxWordShift( bool forward, bool allowcrop );
	float				ComputeMaxPhonemeShift( bool forward, bool allowcrop );

	int					PixelsForDeltaTime( float dt );

	void				ClearDragLimit( void );
	void				SetDragLimit( int dragtype );
	void				LimitDrag( int& mousex );

	void				SelectExpression( CPhonemeTag *tag );

	void				OnSAPI( void );
	void				OnLipSinc( void );

	bool				CheckSpeechAPI( void );
	char const			*GetSpeechAPIName( void );

	void				LoadPhonemeConverters();
	void				UnloadPhonemeConverters();

	bool				IsMouseOverScrubHandle( mxEvent *event );
	bool				IsMouseOverScrubArea( mxEvent *event );
	void				GetScrubHandleRect( RECT& rcHandle, bool clipped = false );
	void				GetScrubAreaRect( RECT& rcArea );
	void				DrawScrubHandle();

	void				DrawScrubHandle( CChoreoWidgetDrawHelper& drawHelper );
	void				ScrubThink( float dt, bool scrubbing );

	void				SetScrubTime( float t );
	void				SetScrubTargetTime( float t );

	float				GetTimeForSample( int sample );
	void				ClampTimeToSelectionInterval( float& timeval );
	void				OnToggleVoiceDuck();

	// Data
private:
	// Type of mouse movement
	enum
	{
		DRAGTYPE_NONE = 0,
		DRAGTYPE_PHONEME ,
		DRAGTYPE_WORD,
		DRAGTYPE_SELECTSAMPLES,
		DRAGTYPE_MOVESELECTION,
		DRAGTYPE_MOVESELECTIONSTART,
		DRAGTYPE_MOVESELECTIONEND,
		DRAGTYPE_MOVEWORD,
		DRAGTYPE_MOVEPHONEME,
		DRAGTYPE_EVENTTAG_MOVE,
		DRAGTYPE_EMPHASIS_SELECT,
		DRAGTYPE_EMPHASIS_MOVE,
		DRAGTYPE_SCRUBBER
	};

	float				m_flScrub;
	float				m_flScrubTarget;

	EditorMode			m_CurrentMode;
	// Graph scale
	float				m_flPixelsPerSecond;
	// Graph scale
	int					m_nTimeZoom;
	int					m_nTimeZoomStep;

	int					m_nTickHeight;

	// Current wave file
	CAudioSource		*m_pWaveFile;
	CAudioMixer			*m_pMixer;
	CChoreoEvent		*m_pEvent;
	int					m_nClickX;

	struct CWorkFile
	{
	public:
		char				m_szWaveFile[ 256 ];
		char				m_szWorkingFile[ 256 ];
		char				m_szBasePath[ 256 ];
		bool				m_bDirty;
	};
	CWorkFile			m_WorkFile;

	mxScrollbar			*m_pHorzScrollBar;
	// Current sb value
	int					m_nLeftOffset;

	CPhonemeModeTab		*m_pModeTab;

	mxSlider			*m_pPlaybackRate;
	float				m_flPlaybackRate;

	mxButton			*m_btnRedoPhonemeExtraction;
	mxButton			*m_btnSave;
	mxButton			*m_btnLoad;

	mxButton			*m_btnPlay; // selection or full depending

	// Mouse dragging
	HCURSOR				m_hPrevCursor;

	int					m_nStartX;
	int					m_nStartY;
	int					m_nLastX;
	int					m_nLastY;
	int					m_nDragType;
	struct CFocusRect
	{
		RECT	m_rcOrig;
		RECT	m_rcFocus;
	};
	CUtlVector < CFocusRect >	m_FocusRects;

	int					m_nClickedPhoneme;
	int					m_nClickedWord;

	// Current set of tags
	CSentence			m_Tags;

	CSentence			m_TagsExt;

	int					m_nSelection[ 2 ];
	bool				m_bSelectionActive;

	int					m_nLastExtractionResult;

	int					m_nSelectedPhonemeCount;
	int					m_nSelectedWordCount;

	bool				m_bWordsActive;

	struct PEUndo
	{
		CSentence *undo;
		CSentence *redo;
	};

	CUtlVector< PEUndo * >	m_UndoStack;
	int					m_nUndoLevel;
	bool				m_bRedoPending;

	bool				m_bLimitDrag;
	int					m_nLeftLimit;
	int					m_nRightLimit;

	IPhonemeExtractor	*m_pPhonemeExtractor;
	float				m_flScrubberTimeOffset;
};

extern PhonemeEditor	*g_pPhonemeEditor;

#endif // PHONEEDITOR_H