1   /*
2    * Translator.java
3    *
4    * Created on August 22, 2001, 11:56 AM
5    */
6   
7   package org.wcb.plugins.speech;
8   
9   import java.util.StringTokenizer;
10  import java.util.Vector;
11  
12  /***
13   *This class does holds some very simple rules on taking a text and formating it into
14   *an allophone.  The concatination of the sounds creates the words that are heard
15   *and understood. For more details on this read the java world article
16   * http://www.javaworld.com/javaworld/jw-08-2001/jw-0817-javatalk_p.html
17   *
18   * @author  wbogaardt
19   */
20  public class Translator {
21  
22      private static String[][] RULES = {{"AA", "AER", "AR", "AY", "BB", "CH", "CK",
23      "DD", "DTH", "EAR", "EAU", "EE", "ER", "ERR", "GG", "GGG", "HH", "II", "LL", "NG", "NN",
24      "OO", "OR", "OU", "OUU", "OW", "OY", "TH", "TT", "UH", "WH", "YY", "ZH", "aa", "aer",
25      "ar", "ay", "bb", "ch", "ck", "dd", "dth", "ear", "eau", "ee", "er", "err", "gg",
26      "ggg", "hh", "ii", "ll", "ng", "nn", "oo", "or", "ou", "ouu", "ow", "oy", "th",
27      "tt", "uh", "wh", "yy", "zh", "to", "hello", "how", "on", "off"},
28      {"AA", "AER", "AR", "AY", "BB", "CH", "CK",
29      "DD", "DTH", "EAR", "EAU", "EE", "ER", "ERR", "GG", "GGG", "HH", "II", "LL", "NG", "NN",
30      "OO", "OR", "OU", "OUU", "OW", "OY", "TH", "TT", "UH", "WH", "YY", "ZH", "aa", "aer", "ar",
31      "ay", "bb", "ch", "ck", "dd", "dth", "ear", "eau", "ee", "er", "err", "gg", "ggg",
32      "hh", "ii", "ll", "ng", "nn", "oo", "or", "ou", "ouu", "ow", "oy", "th", "tt", "uh", "wh",
33      "yy", "zh", "t|ouu", "H|e|l|oo", "h|ow", "O|n", "O|ff"}};
34      
35      private static String ON = "ON";
36  
37      private static String OFF = "OFF";
38  
39      /***
40       * Creates a phonem translator this class is old
41       * @deprecated
42       */
43      public Translator() {
44      }
45  
46      /***
47       * Main method to access this class from the command line
48       * @param args string of words to translate
49       */
50      public static void main(String[] args) {
51          Translator t = new Translator();
52          System.out.println(t.parseForMatch(args[0]));
53      }
54  
55      /***
56       * Get the words by parsing the string pipe
57       * @param words The string of words with pipes separated
58       * @return A vector of words to say
59       */
60      public Vector getWords(String words) {
61          StringTokenizer stoken = new StringTokenizer(words, " ");
62          String word;
63          Vector returnValue = new Vector();
64          while (stoken.hasMoreTokens())
65          {
66              word = stoken.nextToken();
67              
68              /* if the character is a single letter then get its allophone */
69              if (word.length() < 2 && !Character.isDigit(word.charAt(0)))
70              {
71                  returnValue.addElement(allophoneHouseCode(word.charAt(0)));
72              }
73              else if (word.length() < 3 && Character.isDigit(word.charAt(0)))
74              {
75                  returnValue.addElement(allophoneDeviceCode(Integer.parseInt(word)));
76              }
77              else
78              {
79                  returnValue.addElement(getPhoneWord(word));
80              }
81              returnValue.addElement("10ms");
82          }
83          return returnValue;
84      }
85  
86      /***
87       * Get the phoneme of the word
88       * @param words The word to determin the phonem rules
89       * @return The piped characters which can be understood as phonems
90       */
91      public String getPhoneWord(String words) {
92          for (int i = 0; i < RULES.length; i++)
93          {
94              if (RULES[i][0].equalsIgnoreCase(words))
95              {
96                  return RULES[i][1];
97              }
98          }
99          return parseAllophone(words);         
100     }
101 
102     private String parseForMatch(String word) {
103         for (int j = 0; j < RULES.length; j++)
104         {
105             if (word.indexOf(RULES[j][0]) != -1)
106             {
107                     word = findAndReplace(word, RULES[j][0], RULES[j][1]);
108             }
109         }
110         return word;
111     }
112     
113     private String findAndReplace(String s, String sFind, String replace) {
114         System.out.println("Find in:" + s + " this=" + sFind + "  toreplace=" + replace);
115         int iStartIndex = 0;
116         String tempReplace;
117         String tempSource = s;
118         StringBuffer sNewStr = new StringBuffer();
119         
120         if (replace == null)
121         {
122             tempReplace = "";
123         }
124         else
125         {
126             tempReplace = replace;
127         }
128         iStartIndex = tempSource.indexOf(sFind);
129         while (iStartIndex != -1)
130         {
131                 sNewStr.append(tempSource.substring(0, iStartIndex));
132                 sNewStr.append(tempReplace + "|");
133                 tempSource = tempSource.substring(iStartIndex + sFind.length());
134                 iStartIndex = tempSource.indexOf(sFind);
135         }
136         sNewStr.append(tempSource);
137         return sNewStr.toString();
138     }
139 
140     private String parseAllophone(String word) {
141         int size = word.length();
142         StringBuffer returnString = new StringBuffer();
143         for (int i = 0; i < size; i++)
144         {
145             returnString.append(word.charAt(i) + "|");
146         }
147         return returnString.toString();
148             
149     }
150     
151     /***
152      *This converts the character to a proper allaphone
153      *string that the speach plugin can handle
154      *  
155      *@param hs - the char that needs to be converted.
156      *@return String - the resulting allaphone string.
157      */
158     private String allophoneHouseCode(char hs) {
159         switch(hs)
160         {
161             case 'A':
162                 return "ay";
163             case 'B':
164                 return "b|ee";
165             case 'C':
166                 return "c|ee";
167             case 'D':
168                 return "d|ee";
169             case 'E':
170                 return "ee";
171             case 'F':
172                 return "e|ff";
173             case 'G':
174                 return "g|ee";
175             case 'H':
176                 return "e|ch";
177             case 'I':
178                 return "ii";
179             case 'J':
180                 return "j|a|ee";
181             case 'K':
182                 return "k|ay";
183             case 'L':
184                 return "e|l";
185             case 'M':
186                 return "e|m";
187             case 'N':
188                 return "e|n";
189             case 'O':
190                 return "o|h";
191             case 'P':
192                 return "p|ee";
193             default:
194                 return "u|h|n n|ou|n";
195         }
196     }
197     
198     /***
199      *This converts the int to a proper allaphone
200      *string that the speach plugin can handle
201      *
202      *@param cs - integer that needs to be converted.
203      *@return String - the resulting allaphone string.
204      */
205     private String allophoneDeviceCode(int cs) {
206         switch(cs)
207         {
208             case 1:
209                 return "w|o|n";
210             case 2:
211                 return "t|ouu";
212             case 3:
213                 return "th|r|ee";
214             case 4:
215                 return "f|or";
216             case 5:
217                 return "f|ii|v";
218             case 6:
219                 return "s|i|k|s";
220             case 7:
221                 return "s|e|v|n";
222             case 8:
223                 return "e|g|h|t|t";
224             case 9:
225                 return "n|ii|n";
226             case 10:
227                 return "t|e|h|n";
228             case 11:
229                 return "e|l|e|v|n";
230             case 12:
231                 return "t|a|wh|e|l|v";
232             case 13:
233                 return "th|er|t|ee|n";
234             case 14:    
235                 return "f|or|t|ee|n";
236             case 15:
237                 return "f|i|f|t|ee|n";
238             case 16:
239                 return "s|i|k|s|t|ee|n";
240                         
241             default:
242                 return "u|h|n n|ou|n";
243         }
244     }
245     
246 }