1
2
3
4
5
6
7 package org.wcb.plugins.speech;
8
9 import java.util.StringTokenizer;
10 import java.util.Vector;
11
12 /***
13 *This class does holds some very simple rules on taking a text and formating it into
14 *an allophone. The concatination of the sounds creates the words that are heard
15 *and understood. For more details on this read the java world article
16 * http://www.javaworld.com/javaworld/jw-08-2001/jw-0817-javatalk_p.html
17 *
18 * @author wbogaardt
19 */
20 public class Translator {
21
22 private static String[][] RULES = {{"AA", "AER", "AR", "AY", "BB", "CH", "CK",
23 "DD", "DTH", "EAR", "EAU", "EE", "ER", "ERR", "GG", "GGG", "HH", "II", "LL", "NG", "NN",
24 "OO", "OR", "OU", "OUU", "OW", "OY", "TH", "TT", "UH", "WH", "YY", "ZH", "aa", "aer",
25 "ar", "ay", "bb", "ch", "ck", "dd", "dth", "ear", "eau", "ee", "er", "err", "gg",
26 "ggg", "hh", "ii", "ll", "ng", "nn", "oo", "or", "ou", "ouu", "ow", "oy", "th",
27 "tt", "uh", "wh", "yy", "zh", "to", "hello", "how", "on", "off"},
28 {"AA", "AER", "AR", "AY", "BB", "CH", "CK",
29 "DD", "DTH", "EAR", "EAU", "EE", "ER", "ERR", "GG", "GGG", "HH", "II", "LL", "NG", "NN",
30 "OO", "OR", "OU", "OUU", "OW", "OY", "TH", "TT", "UH", "WH", "YY", "ZH", "aa", "aer", "ar",
31 "ay", "bb", "ch", "ck", "dd", "dth", "ear", "eau", "ee", "er", "err", "gg", "ggg",
32 "hh", "ii", "ll", "ng", "nn", "oo", "or", "ou", "ouu", "ow", "oy", "th", "tt", "uh", "wh",
33 "yy", "zh", "t|ouu", "H|e|l|oo", "h|ow", "O|n", "O|ff"}};
34
35 private static String ON = "ON";
36
37 private static String OFF = "OFF";
38
39 /***
40 * Creates a phonem translator this class is old
41 * @deprecated
42 */
43 public Translator() {
44 }
45
46 /***
47 * Main method to access this class from the command line
48 * @param args string of words to translate
49 */
50 public static void main(String[] args) {
51 Translator t = new Translator();
52 System.out.println(t.parseForMatch(args[0]));
53 }
54
55 /***
56 * Get the words by parsing the string pipe
57 * @param words The string of words with pipes separated
58 * @return A vector of words to say
59 */
60 public Vector getWords(String words) {
61 StringTokenizer stoken = new StringTokenizer(words, " ");
62 String word;
63 Vector returnValue = new Vector();
64 while (stoken.hasMoreTokens())
65 {
66 word = stoken.nextToken();
67
68
69 if (word.length() < 2 && !Character.isDigit(word.charAt(0)))
70 {
71 returnValue.addElement(allophoneHouseCode(word.charAt(0)));
72 }
73 else if (word.length() < 3 && Character.isDigit(word.charAt(0)))
74 {
75 returnValue.addElement(allophoneDeviceCode(Integer.parseInt(word)));
76 }
77 else
78 {
79 returnValue.addElement(getPhoneWord(word));
80 }
81 returnValue.addElement("10ms");
82 }
83 return returnValue;
84 }
85
86 /***
87 * Get the phoneme of the word
88 * @param words The word to determin the phonem rules
89 * @return The piped characters which can be understood as phonems
90 */
91 public String getPhoneWord(String words) {
92 for (int i = 0; i < RULES.length; i++)
93 {
94 if (RULES[i][0].equalsIgnoreCase(words))
95 {
96 return RULES[i][1];
97 }
98 }
99 return parseAllophone(words);
100 }
101
102 private String parseForMatch(String word) {
103 for (int j = 0; j < RULES.length; j++)
104 {
105 if (word.indexOf(RULES[j][0]) != -1)
106 {
107 word = findAndReplace(word, RULES[j][0], RULES[j][1]);
108 }
109 }
110 return word;
111 }
112
113 private String findAndReplace(String s, String sFind, String replace) {
114 System.out.println("Find in:" + s + " this=" + sFind + " toreplace=" + replace);
115 int iStartIndex = 0;
116 String tempReplace;
117 String tempSource = s;
118 StringBuffer sNewStr = new StringBuffer();
119
120 if (replace == null)
121 {
122 tempReplace = "";
123 }
124 else
125 {
126 tempReplace = replace;
127 }
128 iStartIndex = tempSource.indexOf(sFind);
129 while (iStartIndex != -1)
130 {
131 sNewStr.append(tempSource.substring(0, iStartIndex));
132 sNewStr.append(tempReplace + "|");
133 tempSource = tempSource.substring(iStartIndex + sFind.length());
134 iStartIndex = tempSource.indexOf(sFind);
135 }
136 sNewStr.append(tempSource);
137 return sNewStr.toString();
138 }
139
140 private String parseAllophone(String word) {
141 int size = word.length();
142 StringBuffer returnString = new StringBuffer();
143 for (int i = 0; i < size; i++)
144 {
145 returnString.append(word.charAt(i) + "|");
146 }
147 return returnString.toString();
148
149 }
150
151 /***
152 *This converts the character to a proper allaphone
153 *string that the speach plugin can handle
154 *
155 *@param hs - the char that needs to be converted.
156 *@return String - the resulting allaphone string.
157 */
158 private String allophoneHouseCode(char hs) {
159 switch(hs)
160 {
161 case 'A':
162 return "ay";
163 case 'B':
164 return "b|ee";
165 case 'C':
166 return "c|ee";
167 case 'D':
168 return "d|ee";
169 case 'E':
170 return "ee";
171 case 'F':
172 return "e|ff";
173 case 'G':
174 return "g|ee";
175 case 'H':
176 return "e|ch";
177 case 'I':
178 return "ii";
179 case 'J':
180 return "j|a|ee";
181 case 'K':
182 return "k|ay";
183 case 'L':
184 return "e|l";
185 case 'M':
186 return "e|m";
187 case 'N':
188 return "e|n";
189 case 'O':
190 return "o|h";
191 case 'P':
192 return "p|ee";
193 default:
194 return "u|h|n n|ou|n";
195 }
196 }
197
198 /***
199 *This converts the int to a proper allaphone
200 *string that the speach plugin can handle
201 *
202 *@param cs - integer that needs to be converted.
203 *@return String - the resulting allaphone string.
204 */
205 private String allophoneDeviceCode(int cs) {
206 switch(cs)
207 {
208 case 1:
209 return "w|o|n";
210 case 2:
211 return "t|ouu";
212 case 3:
213 return "th|r|ee";
214 case 4:
215 return "f|or";
216 case 5:
217 return "f|ii|v";
218 case 6:
219 return "s|i|k|s";
220 case 7:
221 return "s|e|v|n";
222 case 8:
223 return "e|g|h|t|t";
224 case 9:
225 return "n|ii|n";
226 case 10:
227 return "t|e|h|n";
228 case 11:
229 return "e|l|e|v|n";
230 case 12:
231 return "t|a|wh|e|l|v";
232 case 13:
233 return "th|er|t|ee|n";
234 case 14:
235 return "f|or|t|ee|n";
236 case 15:
237 return "f|i|f|t|ee|n";
238 case 16:
239 return "s|i|k|s|t|ee|n";
240
241 default:
242 return "u|h|n n|ou|n";
243 }
244 }
245
246 }