include/tts/tts.h

Go to the documentation of this file.
00001 
00002 // Name:        tts.h
00003 // Purpose:     Text to speech classes for wxWidgets
00004 // Author:      Julian Smart
00005 // Modified by: 
00006 // Created:     2009-02-10
00007 // RCS-ID:      
00008 // Copyright:   (c) Julian Smart
00009 // Licence:     New BSD License
00011 
00012 #ifndef _WX_TTS_
00013 #define _WX_TTS_
00014 
00015 /*
00016  * Includes
00017  */
00018 
00019 #include "tts/tts_property.h"
00020 
00021 /*
00022  * Styles and types
00023  */
00024 
00025 // Input formats. "Native" means - don't convert the text, pass directly
00026 // to the engine. For example, MS SAPI XML.
00027 // Consider adding "Auto" value.
00028 #define wxTTS_PROP_INPUT_FORMAT_TEXT            wxString(wxT("Text"))
00029 #define wxTTS_PROP_INPUT_FORMAT_HTML            wxString(wxT("HTML"))
00030 #define wxTTS_PROP_INPUT_FORMAT_SSML            wxString(wxT("SSML"))
00031 #define wxTTS_PROP_INPUT_FORMAT_NATIVE          wxString(wxT("Native"))
00032 
00033 // Properties
00034 #define wxTTS_PROP_SPEED                        wxT("Speed")
00035 #define wxTTS_PROP_PITCH                        wxT("Pitch")
00036 #define wxTTS_PROP_VOLUME                       wxT("Volume")
00037 #define wxTTS_PROP_VOICE                        wxT("Voice")
00038 #define wxTTS_PROP_VOICE_VARIANT                wxT("Voice variant")
00039 #define wxTTS_PROP_PROGRAM_LOCATION             wxT("Program location")
00040 #define wxTTS_PROP_DATA_LOCATION                wxT("Data location")
00041 #define wxTTS_PROP_INPUT_FORMAT                 wxT("Input format")
00042 #define wxTTS_PROP_PRIORITY                     wxT("Priority")
00043 
00044 // Pass text unchanged to engine, apart from word substitutions
00045 #define wxTTS_PROP_NO_SUBSTITUTIONS             wxT("No pronunciation substitutions")
00046 #define wxTTS_PROP_SIMPLIFY_PUNCTUATION         wxT("Simplify punctuation")
00047 #define wxTTS_PROP_TEXT_ENCODING                wxT("Text encoding")
00048 #define wxTTS_PROP_ENGINE_WEBSITE               wxT("Web site")
00049 
00050 // Capabilities
00051 typedef enum {
00052     wxTTS_CAP_PROGRAM_LOCATION = 1,
00053     wxTTS_CAP_DATA_LOCATION,
00054     wxTTS_CAP_WRITE_WAVE_FILE,
00055     wxTTS_CAP_SPEAK_HTML,
00056     wxTTS_CAP_VOICE,
00057     wxTTS_CAP_VOICE_VARIATIONS,
00058     wxTTS_CAP_PITCH,
00059     wxTTS_CAP_SPEED,
00060     wxTTS_CAP_VOLUME,
00061     wxTTS_CAP_SSML,
00062     wxTTS_CAP_PAUSE,
00063     wxTTS_CAP_SKIP
00064 } wxTTSCapability;
00065 
00066 typedef enum
00067 {
00068     wxTTS_NATIVE,
00069     wxTTS_PERCENT
00070 } wxTTSQuantityType;
00071 
00072 // Skip types
00073 typedef enum
00074 {
00075     wxTTS_SkipSentence,
00076     wxTTS_SkipParagraph
00077 } wxTTSSkipType;
00078 
00079 // Priority types
00080 typedef enum
00081 {
00082     wxTTS_Normal = 0,
00083     wxTTS_Alert = 1,
00084     wxTTS_Over = 2
00085 };
00086 
00087 // Speak options
00088 #define wxTTS_SPEAK_ASYNC       0x01
00089 #define wxTTS_SPEAK_SYNC        0x02
00090 #define wxTTS_SPEAK_SYNC_YIELD  0x04
00091 
00092 // Speech defaults
00093 #define wxTTS_SPEED_DEFAULT_PERCENT  50
00094 #define wxTTS_PITCH_DEFAULT_PERCENT  50
00095 #define wxTTS_VOLUME_DEFAULT_PERCENT 50
00096 
00097 /*
00098  * Forward declarations
00099  */
00100 
00101 class wxTTSHandler;
00102 
00111 class wxTTSTransformer;
00112 
00113 class wxTextToSpeech: public wxEvtHandler
00114 {    
00115     DECLARE_DYNAMIC_CLASS( wxTextToSpeech )
00116 
00117 public:
00118 // Constructors
00119 
00120     wxTextToSpeech();
00121     ~wxTextToSpeech();
00122 
00123 // Operations
00124 
00126     void Init();
00127 
00129     virtual bool Speak(const wxString& text, int options = wxTTS_SPEAK_ASYNC);
00130 
00132     virtual bool SpeakFile(const wxString& filename, int options = wxTTS_SPEAK_ASYNC, const wxString& waveFilename = wxEmptyString);
00133 
00135     virtual bool IsSpeaking() const;
00136 
00138     virtual bool IsPlaying() const { return IsSpeaking() && !IsPaused(); }
00139 
00141     virtual bool Stop();
00142 
00144     virtual bool Pause();
00145 
00147     virtual bool Resume();
00148 
00150     virtual bool IsPaused() const;
00151 
00153     virtual bool CanPause() const;
00154 
00158     virtual bool Skip(int toSkip, wxTTSSkipType itemType = wxTTS_SkipSentence);
00159 
00161     virtual bool CanSkip() const;
00162 
00164     virtual bool HasCapability(wxTTSCapability capability) const;
00165 
00167     void SetProperty(const wxTTSProperty& property);
00168     void SetProperty(const wxString& name, const wxString& value);
00169     void SetProperty(const wxString& name, const wxString& value, const wxArrayString& choices);
00170     void SetProperty(const wxString& name, long value);
00171     void SetProperty(const wxString& name, int value);
00172     void SetProperty(const wxString& name, bool value);
00173     void SetProperty(const wxVariant& value);
00174 
00176     wxTTSProperty GetProperty(const wxString& name) const;
00177 
00179     wxVariant GetPropertyValue(const wxString& name) const;
00180 
00182     wxString GetPropertyString(const wxString& name) const;
00183 
00185     long GetPropertyLong(const wxString& name) const;
00186 
00188     double GetPropertyDouble(const wxString& name) const;
00189 
00191     long GetPropertyBool(const wxString& name) const;
00192 
00194     bool HasProperty(const wxString& name) const;
00195 
00197     wxArrayString EnumerateProperties() const;
00198 
00200     int GetPropertyCount() const;
00201 
00203     wxTTSProperty GetProperty(size_t i) const;
00204 
00206     wxTTSTransformer* GetTransformer() const { return m_transformer; }
00207 
00209     void SetTransformer(wxTTSTransformer* transformer) { m_transformer = transformer; }
00210 
00212     void SetProgramLocation(const wxString& location);
00213 
00215     wxString GetProgramLocation() const;
00216 
00218     void SetDataLocation(const wxString& location);
00219 
00221     wxString GetDataLocation() const;
00222 
00224     void SetVoice(const wxString& voice);
00225 
00227     wxString GetVoice() const;
00228 
00230     wxString GetDefaultVoice() const;
00231 
00233     void SetVoiceVariant(const wxString& variant);
00234 
00236     wxString GetVoiceVariant() const;
00237 
00239     wxString GetDefaultVoiceVariant() const;
00240 
00247     bool VoicesAreCompatible(const wxString& engine, const wxString& voice1, const wxString& voice2) const;
00248     bool VoicesAreCompatible(const wxString& voice1, const wxString& voice2) const;
00249 
00251     void SetSpeed(int speed, wxTTSQuantityType quantity = wxTTS_NATIVE);
00252 
00254     int GetSpeed(wxTTSQuantityType quantity = wxTTS_NATIVE) const;
00255 
00257     void SetVolume(int vol, wxTTSQuantityType quantity = wxTTS_NATIVE);
00258 
00260     int GetVolume(wxTTSQuantityType quantity = wxTTS_NATIVE) const;
00261 
00263     void SetPitch(int pitch, wxTTSQuantityType quantity = wxTTS_NATIVE);
00264 
00266     int GetPitch(wxTTSQuantityType quantity = wxTTS_NATIVE) const;
00267 
00271     void SetPriority(int priority);
00272 
00274     int GetPriority() const;
00275 
00277     void SetErrorCode(int errorCode) { m_errorCode = errorCode; }
00278 
00280     int GetErrorCode() const { return m_errorCode; }
00281 
00283     wxArrayString GetAvailableVoices() const;
00284 
00286     wxArrayString GetAvailableVoiceVariants() const;
00287 
00289     wxArrayString GetEngineIds() const;
00290 
00292     wxArrayString GetEngineDisplayNames() const;
00293 
00295     wxArrayString GetEngineDescriptions() const;
00296 
00298     wxTTSHandler* GetCurrentHandler() const { return m_currentHandler; }
00299     
00301     void SetCurrentHandler(wxTTSHandler* handler) { m_currentHandler = handler; }
00302 
00304     bool AddHandler(wxTTSHandler* handler);
00305 
00307     wxTTSHandler* FindHandler(const wxString& name) const;
00308 
00310     int GetHandlerCount() const { return m_handlers.GetCount(); }
00311 
00313     wxTTSHandler* GetHandler(size_t i) const;
00314 
00316     void ClearHandlers();
00317 
00319     bool SetEngine(const wxString& id);
00320 
00322     wxString GetEngine() const;
00323 
00326     bool ProcessInput();
00327 
00329     wxString GetLastCommand() const;
00330 
00332     static wxString AppendPaths(const wxString& p1, const wxString& p2, const wxString& sep = wxFILE_SEP_PATH);
00333 
00337     static bool StripExtension(wxString& path, const wxString& validExtensions = wxEmptyString);
00338 
00340     static wxString FindHtmlEncoding(const wxString& filename);
00341 
00343     static wxString FindXmlEncoding(const wxString& filename);
00344 
00346     static bool IsXmlString(const wxString& text);
00347 
00349     static bool IsXmlFile(const wxString& filename);
00350 
00351 protected:
00352     int                 m_errorCode;
00353     wxList              m_handlers;
00354     wxTTSHandler*       m_currentHandler;
00355     wxTTSTransformer*   m_transformer;
00356 };
00357 
00358 WX_DECLARE_STRING_HASH_MAP(wxString*, wxTTSStringHashMap);
00359 
00360 // Do word substitution
00361 #define wxTTS_TRANSFORM_OPTIONS_WORD_SUBSTITUTION       0x0001
00362 
00363 // Remove title element to stop duplication
00364 #define wxTTS_TRANSFORM_OPTIONS_REMOVE_TITLE_ELEMENT    0x0002
00365 
00366 // Replace Unicode punctuation with simpler punctuation
00367 #define wxTTS_TRANSFORM_OPTIONS_SIMPLIFY_PUNCTUATION    0x0004
00368 
00369 // Remove XML preamble (xml, doctype)
00370 #define wxTTS_TRANSFORM_OPTIONS_REMOVE_XML_PREAMBLE     0x0008
00371 
00372 // Pass to the engine unchanged, except for word substitutions
00373 #define wxTTS_TRANSFORM_PASS_RAW_TEXT                   0x0010
00374 
00375 // Convert HTML to text
00376 #define wxTTS_TRANSFORM_HTML_TO_TEXT                    0x0020
00377 
00378 // Convert HTML to SAPI XML
00379 #define wxTTS_TRANSFORM_HTML_TO_SAPI_XML                0x0040
00380 
00381 // Convert HTML to SSML
00382 #define wxTTS_TRANSFORM_HTML_TO_SSML                    0x0080
00383 
00384 // Convert text to SAPI XML
00385 #define wxTTS_TRANSFORM_TEXT_TO_SAPI_XML                0x0100
00386 
00387 // Convert text to SSML
00388 #define wxTTS_TRANSFORM_TEXT_TO_SSML                    0x0200
00389 
00390 // Speak HTML heading levels
00391 #define wxTTS_TRANSFORM_SPEAK_HEADINGS                  0x0400
00392 
00393 // Speak HTML list numbers
00394 #define wxTTS_TRANSFORM_SPEAK_ORDERED_LIST              0x0800
00395 
00405 class wxTTSTransformer
00406 {    
00407 public:
00408     // Constructors
00409 
00410     wxTTSTransformer();
00411     virtual ~wxTTSTransformer();
00412 
00414     void AddString(const wxString& word, const wxString& replacement);
00415 
00417     void InsertString(const wxString& word, const wxString& replacement, int pos);
00418 
00420     void RemoveString(const wxString& word);
00421     void RemoveString(size_t i);
00422 
00424     wxString GetString(const wxString& word);
00425 
00427     bool HasString(const wxString& word);
00428 
00430     virtual void Clear();
00431 
00433     virtual void Index();
00434 
00436     virtual bool TransformString(const wxString& input, wxString& output, int options = wxTTS_TRANSFORM_OPTIONS_WORD_SUBSTITUTION);
00437 
00439     virtual bool TransformFile(const wxString& filename, const wxString& newFilename, int options = wxTTS_TRANSFORM_OPTIONS_WORD_SUBSTITUTION);
00440 
00442     virtual bool SaveFile(const wxString& filename);
00443 
00445     virtual bool LoadFile(const wxString& filename);
00446 
00448     virtual bool ConvertFormat(const wxString& filename, wxOutputStream& rawStream, wxTextOutputStream& stream, int options);
00449 
00451     virtual bool ConvertHTMLToText(const wxString& filename, wxOutputStream& rawStream, wxTextOutputStream& stream, int options);
00452 
00454     virtual bool ConvertTextToXML(const wxString& filename, wxOutputStream& rawStream, wxTextOutputStream& stream, int options);
00455 
00457     wxArrayString& GetStrings() { return m_strings; }
00458     wxArrayString& GetReplacements() { return m_replacements; }
00459 
00461     bool HasStrings() const { return m_strings.GetCount() > 0; }
00462 
00464     void SetDirty(bool dirty) { m_dirty = dirty; }
00465     bool GetDirty() const { return m_dirty; }
00466 
00468     void SetModified(bool mod = true) { m_modified = mod; }
00469     bool GetModified() const { return m_modified; }
00470 
00472     void SetUsageDescription(const wxString& descr) { m_usageDescription = descr; }
00473 
00475     wxString GetUsageDescription() const { return m_usageDescription; }
00476 
00478     static bool ReplaceChar(wxString& str, const wxChar& ch1, const wxChar& ch2);
00479 
00480 protected:
00481 
00482     wxArrayString       m_strings;
00483     wxArrayString       m_replacements;
00484     wxTTSStringHashMap  m_hashMap;
00485     bool                m_dirty;
00486     bool                m_modified;
00487     wxString            m_usageDescription;
00488 };
00489 
00498 class wxTTSHandler: public wxEvtHandler
00499 {    
00500     DECLARE_CLASS( wxTTSHandler )
00501 
00502 public:
00503 // Constructors
00504 
00505     wxTTSHandler(const wxString& id = wxEmptyString, const wxString& displayName = wxEmptyString);
00506     ~wxTTSHandler();
00507 
00508 // Operations
00509 
00511     virtual bool Initialize() { return true; }
00512 
00514     virtual bool Uninitialize() { return true; }
00515 
00517     virtual bool Speak(const wxString& text, int options = wxTTS_SPEAK_ASYNC) = 0;
00518 
00520     virtual bool SpeakFile(const wxString& filename, int options = wxTTS_SPEAK_ASYNC, const wxString& waveFilename = wxEmptyString) = 0;
00521 
00523     virtual bool IsSpeaking() const = 0;
00524 
00526     virtual bool IsPaused() const { return false; }
00527 
00529     virtual bool Stop() = 0;
00530 
00532     virtual bool Pause() { return false; }
00533 
00535     virtual bool Resume() { return false; }
00536 
00540     virtual bool Skip(int WXUNUSED(toSkip), wxTTSSkipType WXUNUSED(itemType) = wxTTS_SkipSentence) { return false; }
00541 
00543     virtual bool CanSkip() const { return false; }
00544 
00546     virtual bool HasCapability(wxTTSCapability capability) const = 0;
00547 
00552     virtual bool VoicesAreCompatible(const wxString& WXUNUSED(voice1), const wxString& WXUNUSED(voice2)) const { return true; }
00553 
00556     virtual bool ProcessInput() { return true; }
00557 
00559     virtual wxString GetLastCommand() const { return wxEmptyString; }
00560 
00562     virtual void SetProperty(const wxTTSProperty& property);
00563     virtual void SetProperty(const wxString& name, const wxString& value);
00564     virtual void SetProperty(const wxString& name, const wxString& value, const wxArrayString& choices);
00565     virtual void SetProperty(const wxString& name, long value);
00566     virtual void SetProperty(const wxString& name, int value);
00567     virtual void SetProperty(const wxString& name, bool value);
00568     virtual void SetProperty(const wxVariant& value);
00569 
00571     virtual wxTTSProperty GetProperty(const wxString& name) const;
00572 
00574     virtual wxVariant GetPropertyValue(const wxString& name) const;
00575 
00577     virtual wxString GetPropertyString(const wxString& name) const;
00578 
00580     virtual long GetPropertyLong(const wxString& name) const;
00581 
00583     virtual double GetPropertyDouble(const wxString& name) const;
00584 
00586     virtual long GetPropertyBool(const wxString& name) const;
00587 
00589     virtual bool HasProperty(const wxString& name) const;
00590 
00592     virtual wxArrayString EnumerateProperties() const;
00593 
00595     virtual int GetPropertyCount() const { return m_properties.GetCount(); }
00596 
00598     virtual wxTTSProperty GetProperty(size_t i) const;
00599 
00601     virtual const wxTTSProperties& GetProperties() const { return m_properties; }
00602     virtual wxTTSProperties& GetProperties() { return m_properties; }
00603 
00605     virtual void SetProgramLocation(const wxString& location) { SetProperty(wxTTS_PROP_PROGRAM_LOCATION, location); }
00606     virtual wxString GetProgramLocation() const { return GetPropertyString(wxTTS_PROP_PROGRAM_LOCATION); }
00607 
00609     virtual void SetDataLocation(const wxString& location) { SetProperty(wxTTS_PROP_DATA_LOCATION, location); }
00610     virtual wxString GetDataLocation() const { return GetPropertyString(wxTTS_PROP_DATA_LOCATION); }
00611 
00613     virtual void SetVoice(const wxString& voice) { SetProperty(wxTTS_PROP_VOICE, voice); }
00614     virtual wxString GetVoice() const { return GetPropertyString(wxTTS_PROP_VOICE); }
00615 
00617     virtual wxString GetDefaultVoice() const = 0;
00618 
00620     virtual wxArrayString GetAvailableVoices() const = 0;
00621 
00623     virtual wxArrayString GetAvailableVoiceVariants() const { return wxArrayString(); }
00624 
00626     virtual void SetVoiceVariant(const wxString& variant) { SetProperty(wxTTS_PROP_VOICE_VARIANT, variant); }
00627     virtual wxString GetVoiceVariant() const { return GetPropertyString(wxTTS_PROP_VOICE_VARIANT); }
00628 
00630     virtual wxString GetDefaultVoiceVariant() const = 0;
00631 
00633     virtual void SetSpeed(int speed, wxTTSQuantityType quantity = wxTTS_NATIVE);
00634     virtual int GetSpeed(wxTTSQuantityType quantity = wxTTS_NATIVE) const;
00635     virtual int GetMinSpeed() const = 0;
00636     virtual int GetMaxSpeed() const = 0;
00637 
00639     virtual void SetVolume(int vol, wxTTSQuantityType quantity = wxTTS_NATIVE);
00640     virtual int GetVolume(wxTTSQuantityType quantity = wxTTS_NATIVE) const;
00641     virtual int GetMinVolume() const = 0;
00642     virtual int GetMaxVolume() const = 0;
00643 
00645     virtual void SetPitch(int pitch, wxTTSQuantityType quantity = wxTTS_NATIVE);
00646     virtual int GetPitch(wxTTSQuantityType quantity = wxTTS_NATIVE) const;
00647     virtual int GetMinPitch() const = 0;
00648     virtual int GetMaxPitch() const = 0;
00649 
00651     void SetErrorCode(int errorCode) { m_errorCode = errorCode; }
00652     int GetErrorCode() const { return m_errorCode; }
00653 
00655     void SetTextToSpeechObject(wxTextToSpeech* tts) { m_textToSpeech = tts; }
00656     wxTextToSpeech* GetTextToSpeechObject() const { return m_textToSpeech; }
00657 
00659     void SetId(const wxString& id) { m_id = id; }
00660     wxString GetId() const { return m_id; }
00661 
00663     void SetDisplayName(const wxString& name) { m_displayName = name; }
00664     const wxString& GetDisplayName() const { return m_displayName; }
00665 
00667     void SetDescription(const wxString& descr) { m_description = descr; }
00668     const wxString& GetDescription() const { return m_description; }
00669 
00671     void Init();
00672 
00674     bool GetInitialized() const { return m_isInitialized; }
00675     void SetInitialized(bool init) { m_isInitialized = init; }
00676 
00678     int GetOptions() const { return m_options; }
00679     void SetOptions(int options) { m_options = options; }
00680 
00682     void DeleteTempFile();
00683 
00685     static int PercentToNativeValue(int value, int minValue, int maxValue);
00686     static int NativeValueToPercent(int value, int minValue, int maxValue);
00687 
00689     virtual int CreateTransformerOptions(int mandatoryOptions = 0) const;
00690 
00692     wxString DoTransformation(const wxString& originalFilename, int transformationFlags);
00693 
00695     wxTTSTransformer* GetTransformer() const;
00696 
00697 protected:
00698     int                 m_errorCode;
00699     int                 m_options;
00700     wxString            m_tempFilename;     // Temp name when applying transformations
00701     wxString            m_id;               // The identifier for this handler
00702     wxString            m_displayName;      // Display name for this handler
00703     wxString            m_description;      // Description for this handler
00704     wxTextToSpeech*     m_textToSpeech;
00705     wxTTSProperties     m_properties;
00706     bool                m_isInitialized;
00707 };
00708 
00717 class wxTTSSpeechSettingsInfo
00718 {
00719 public:
00720     wxTTSSpeechSettingsInfo() { Init(); }
00721 
00722     void Init()
00723     { m_speed = wxTTS_SPEED_DEFAULT_PERCENT; m_volume = wxTTS_VOLUME_DEFAULT_PERCENT;
00724       m_pitch = wxTTS_PITCH_DEFAULT_PERCENT; m_speech = NULL; m_testText = _("Testing text to speech settings."); }
00725     void Copy(const wxTTSSpeechSettingsInfo& info)
00726     { m_speech = info.m_speech; m_testText = info.m_testText;
00727       m_speed = info.m_speed; m_volume = info.m_volume; m_pitch = info.m_pitch; m_engine = info.m_engine; }
00728     void operator=(const wxTTSSpeechSettingsInfo& info) { Copy(info); }
00729 
00731     bool ApplySettings();
00732 
00733     int                 m_speed, m_volume, m_pitch;
00734     wxString            m_engine, m_testText;
00735     wxTextToSpeech*     m_speech;
00736 };
00737 
00738 #endif
00739     // _WX_TTS_

Generated on Wed May 6 19:20:19 2009 for AxTk by  doxygen 1.5.1