00001
00002
00003
00004
00005
00006
00007
00008
00009
00011
00012 #ifndef _TTS_HTMLPARSER_H_
00013 #define _TTS_HTMLPARSER_H_
00014
00015 #include "wx/module.h"
00016 #include "wx/stream.h"
00017
00018
00019
00020
00021
00022
00023 class wxTTSSimpleHtmlAttribute
00024 {
00025 friend class wxTTSSimpleHtmlTag;
00026 public:
00027 wxTTSSimpleHtmlAttribute(const wxString& name, const wxString& value)
00028 {
00029 m_name = name; m_value = value; m_next = NULL;
00030 }
00032
00033
00034 void Write(wxOutputStream& stream);
00035
00037 const wxString& GetName() const { return m_name; }
00038 const wxString& GetValue() const { return m_value; }
00039
00040 wxTTSSimpleHtmlAttribute* GetNextAttribute() { return m_next; }
00041 void SetNextAttribute(wxTTSSimpleHtmlAttribute* attr) { m_next = attr; }
00042
00043 bool HasName(const wxString& name) const { return (0 == m_name.CmpNoCase(name)); }
00044 bool HasValue(const wxString& val) const { return (0 == m_value.CmpNoCase(val)); }
00045
00046 private:
00047 wxString m_name;
00048 wxString m_value;
00049 wxTTSSimpleHtmlAttribute* m_next;
00050 };
00051
00052
00053
00054
00055
00056
00057
00058 enum { wxTTSSimpleHtmlTag_Text, wxTTSSimpleHtmlTag_TopLevel, wxTTSSimpleHtmlTag_Open, wxTTSSimpleHtmlTag_Close, wxTTSSimpleHtmlTag_Directive,
00059 wxTTSSimpleHtmlTag_Entity };
00060
00061 class wxTTSSimpleHtmlTag
00062 {
00063 public:
00064 wxTTSSimpleHtmlTag(const wxString& tagName, int tagType);
00065 ~wxTTSSimpleHtmlTag();
00066
00068 void ClearAttributes();
00069 wxTTSSimpleHtmlAttribute* FindAttribute(const wxString& name) const ;
00070 void AppendAttribute(const wxString& name, const wxString& value);
00071 void ClearChildren();
00072 void AppendTag(wxTTSSimpleHtmlTag* tag);
00073
00074 void Write(wxOutputStream& stream);
00075
00076
00077 wxString GetTagText();
00078
00080 const wxString& GetName() const { return m_name; }
00081 void SetName(const wxString& name) { m_name = name; }
00082
00083 int GetType() const { return m_type; }
00084 void SetType(int t) { m_type = t; }
00085
00086
00087 const wxString& GetText() const { return m_text; }
00088 void SetText(const wxString& text) { m_text = text; }
00089
00090 wxTTSSimpleHtmlAttribute* GetFirstAttribute() { return m_attributes; }
00091 void SetFirstAttribute(wxTTSSimpleHtmlAttribute* attr) { m_attributes = attr; }
00092
00093 int GetAttributeCount() const ;
00094 wxTTSSimpleHtmlAttribute* GetAttribute(int i) const ;
00095
00096 wxTTSSimpleHtmlTag* GetChildren() const { return m_children; }
00097 void SetChildren(wxTTSSimpleHtmlTag* children) { m_children = children; }
00098
00099 wxTTSSimpleHtmlTag* GetParent() const { return m_parent; }
00100 void SetParent(wxTTSSimpleHtmlTag* parent) { m_parent = parent; }
00101 int GetChildCount() const;
00102 wxTTSSimpleHtmlTag* GetChild(int i) const;
00103 wxTTSSimpleHtmlTag* GetNext() const { return m_next; }
00104
00106 bool NameIs(const wxString& name) { return (m_name.CmpNoCase(name) == 0); }
00107 bool HasAttribute(const wxString& name, const wxString& value) const;
00108 bool HasAttribute(const wxString& name) const;
00109 bool GetAttributeValue(wxString& value, const wxString& attrName);
00110
00111
00112 wxTTSSimpleHtmlTag* FindTag(const wxString& tagName, const wxString& attrName = wxEmptyString);
00113
00114
00115 bool FindTextUntilTagClose(wxString& text, const wxString& tagName);
00116
00117 private:
00118 wxString m_name;
00119 int m_type;
00120 wxString m_text;
00121 wxTTSSimpleHtmlAttribute* m_attributes;
00122
00123
00124 wxTTSSimpleHtmlTag* m_children;
00125 wxTTSSimpleHtmlTag* m_next;
00126 wxTTSSimpleHtmlTag* m_parent;
00127 };
00128
00129
00130
00131
00132
00133
00134 class wxTTSSimpleHtmlParser : public wxObject
00135 {
00136
00137 public:
00138 wxTTSSimpleHtmlParser();
00139 ~wxTTSSimpleHtmlParser();
00140
00142 bool ParseFile(const wxString& filename, const wxString& encoding = wxEmptyString);
00143 bool ParseString(const wxString& str);
00144 void Clear();
00145
00146 void Write(wxOutputStream& stream);
00147 bool WriteFile(wxString& filename);
00148
00150
00151
00152 bool ParseHtml(wxTTSSimpleHtmlTag* parent);
00153
00154 wxTTSSimpleHtmlTag* ParseTagHeader();
00155 wxTTSSimpleHtmlTag* ParseTagClose();
00156 bool ParseAttributes(wxTTSSimpleHtmlTag* tag);
00157 wxTTSSimpleHtmlTag* ParseDirective();
00158 bool ParseComment();
00159
00160 bool ParseText(wxString& text);
00161
00162 bool EatWhitespace();
00163 bool EatWhitespace(int& pos);
00164 bool ReadString(wxString& str, bool eatIt = FALSE);
00165 bool ReadWord(wxString& str, bool eatIt = FALSE);
00166 bool ReadNumber(wxString& str, bool eatIt = FALSE);
00167
00168 bool ReadLiteral(wxString& str, bool eatIt = FALSE);
00169
00170 bool IsDirective();
00171 bool IsComment();
00172 bool IsString();
00173 bool IsWord();
00174 bool IsTagClose();
00175 bool IsTagStartBracket(int ch);
00176 bool IsTagEndBracket(int ch);
00177 bool IsWhitespace(int ch);
00178 bool IsAlpha(int ch);
00179 bool IsWordChar(int ch);
00180 bool IsNumeric(int ch);
00181
00182
00183 bool Matches(const wxString& tok, bool eatIt = FALSE) ;
00184 bool Eof() const { return (m_pos >= m_length); }
00185 bool Eof(int pos) const { return (pos >= m_length); }
00186
00187 void SetPosition(int pos) { m_pos = pos; }
00188
00189
00191 wxTTSSimpleHtmlTag* GetTopLevelTag() const { return m_topLevel; }
00192
00193
00194 int GetChar(size_t i) const;
00195
00196 private:
00197
00198 wxTTSSimpleHtmlTag* m_topLevel;
00199 int m_pos;
00200 int m_length;
00201 wxString m_text;
00202
00203 };
00204
00205 #endif
00206
00207