View Javadoc

1   /*
2     Java Japanese-English dictionary/Kanjidic browser
3     Copyright (C) 1997 Todd David Rudick
4   
5     This program is free software; you can redistribute it and/or
6     modify it under the terms of the GNU General Public License
7     as published by the Free Software Foundation; either version 2
8     of the License, or (at your option) any later version.
9     
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14    
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, write to the Free Software
17    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18  
19   */
20  package edu.arizona.cs.javadict;
21  
22  import java.io.BufferedReader;
23  import java.io.Closeable;
24  import java.io.IOException;
25  import java.io.InputStream;
26  import java.io.InputStreamReader;
27  import java.util.ArrayList;
28  import java.util.Iterator;
29  import java.util.List;
30  import java.util.StringTokenizer;
31  
32  // Handwritten Kanji Recognizer
33  // use as a panel. The ActionListener will receive an action whose
34  // name is a string of 10 or less kanji with the best match first.
35  
36  public class DrawPanel {
37  	private final ClassLoader classLoader;
38  	public final List<List<Integer>> ystrokes = new ArrayList<List<Integer>>();
39  	public final List<List<Integer>> xstrokes = new ArrayList<List<Integer>>();
40  	public List<Integer> curxvec = null;
41  	public List<Integer> curyvec = null;
42  
43  	private static final int NUMKAN = 15;
44  
45  	private <T> T last(List<? extends T> list) {
46  		return list.get(list.size() - 1);
47  	}
48  
49  	public DrawPanel(final ClassLoader cl) {
50  		classLoader = cl;
51  	}
52  
53  	/***
54  	 * Clears the kanji strokes.
55  	 */
56  	public void clear() {
57  		xstrokes.clear();
58  		ystrokes.clear();
59  		curxvec = null;
60  		curyvec = null;
61  	}
62  	
63  	public void undoLastStroke() {
64  		if(ystrokes.isEmpty()) {
65  			return;
66  		}
67  		ystrokes.subList(ystrokes.size()-1, ystrokes.size()).clear();
68  		xstrokes.subList(xstrokes.size()-1, xstrokes.size()).clear();
69  		curxvec = xstrokes.isEmpty()?null:xstrokes.get(xstrokes.size()-1);
70  		curyvec = ystrokes.isEmpty()?null:ystrokes.get(ystrokes.size()-1);
71  	}
72  
73  	private BufferedReader getResource(final int strokes) throws IOException {
74  		final InputStream result = classLoader.getResourceAsStream("edu/arizona/cs/javadict/unistrok." + strokes);
75  		if (result == null) {
76  			return null;
77  		}
78  		return new BufferedReader(new InputStreamReader(result, "UTF-8"));
79  	}
80  
81  	private void closeQuietly(final Closeable c) {
82  		try {
83  			c.close();
84  		} catch (Exception ex) {
85  			System.out.println("WARN: FAILED TO CLOSE " + ex);
86  		}
87  	}
88  
89  	/***
90  	 * Performs analysis of the currently drawn kanji. Returns {@value #NUMKAN}
91  	 * best matches.
92  	 * 
93  	 * @return the best matches, ordered from best to worst match.
94  	 */
95  	public String analyzeKanji() throws IOException {
96  		int sc;
97  		List<Integer> minScores = new ArrayList<Integer>(); // sorted such that
98  		// the best is last
99  		List<String> minChars = new ArrayList<String>();
100 		String curk;
101 		final BufferedReader in = getResource(xstrokes.size());
102 		if (in == null) {
103 			// no kanjis with given stroke count, just return an empty string
104 			return "";
105 		}
106 		try {
107 			String line;
108 			while (true) {
109 				line = in.readLine();
110 				String goline = "";
111 				if (line == null) {
112 					int sz;
113 					sz = minChars.size();
114 					char[] kanj = new char[sz];
115 					int i;
116 					for (i = 0; i < sz; i++) { // reverse array
117 						String s;
118 						s = minChars.get(sz - i - 1);
119 						if (s.charAt(0) == '0')
120 							kanj[i] = '?';
121 						else {
122 							int index;
123 							index = s.indexOf(' ');
124 							if (index != -1)
125 								s = s.substring(0, index);
126 							try {
127 								int hexcode;
128 								hexcode = Integer.parseInt(s, 16);
129 								kanj[i] = (char) hexcode;
130 							} catch (Exception ez11) {
131 								kanj[i] = '?';
132 							}
133 						}
134 					}
135 					return new String(kanj);
136 				} else {
137 					if (line.length() == 0)
138 						continue;
139 					if (line.charAt(0) == '#')
140 						continue;
141 					int index;
142 					index = line.indexOf('|');
143 					if (index == -1)
144 						continue;
145 					curk = line.substring(0, index);
146 					line = line.substring(index + 1);
147 					String tokline;
148 					String argline;
149 					int tokindex = line.indexOf('|');
150 					if (tokindex != -1) {
151 						tokline = line.substring(0, tokindex);
152 						argline = line.substring(tokindex + 1);
153 					} else {
154 						argline = null;
155 						tokline = line;
156 					}
157 					StringTokenizer st = new StringTokenizer(tokline);
158 					if (st.countTokens() != xstrokes.size())
159 						continue;
160 
161 					WhileLoop: while (st.hasMoreTokens()) {
162 						String tok = st.nextToken();
163 						int i;
164 						for (i = 0; i < tok.length(); i++) {
165 							switch (tok.charAt(i)) {
166 							case '2':
167 							case '1':
168 							case '3':
169 							case '4':
170 							case '6':
171 							case '7':
172 							case '8':
173 							case '9':
174 								goline = goline + tok.charAt(i);
175 								break;
176 							case 'b':
177 								goline = goline + "62";
178 								break;
179 							case 'c':
180 								goline = goline + "26";
181 								break;
182 							case 'x':
183 								goline = goline + "21";
184 								break;
185 							case 'y':
186 								goline = goline + "23";
187 								break;
188 							case '|':
189 								break WhileLoop;
190 							default:
191 								throw new IOException("unknown symbol in kanji database: " + line);
192 								// continue;
193 							}
194 						}
195 						goline = goline + " ";
196 					}
197 					int ns;
198 					if (minScores.size() < NUMKAN)
199 						ns = getScore(goline, 999999);
200 					else {
201 						int cutoff1, cutoff2;
202 						cutoff1 = minScores.get(0);
203 						cutoff2 = minScores.get(minScores.size() - 1) * 2;
204 						ns = getScore(goline, Math.min(cutoff1, cutoff2));
205 					}
206 					// if more tokens exist, apply filters
207 					if (argline != null) {
208 						st = new StringTokenizer(argline);
209 						while (st.hasMoreTokens()) {
210 							try {
211 								String tok = st.nextToken();
212 								int minindex;
213 								minindex = tok.indexOf("-");
214 								if (minindex == -1) {
215 									throw new IOException("bad filter");
216 									// continue;
217 								}
218 								String arg1, arg2;
219 								arg1 = tok.substring(0, minindex);
220 								arg2 = tok.substring(minindex + 1, tok.length());
221 								int arg1stroke, arg2stroke;
222 								arg1stroke = Integer.parseInt(arg1.substring(1));
223 								boolean must = (arg2.charAt(arg2.length() - 1) == '!');
224 								if (must)
225 									arg2stroke = Integer.parseInt(arg2.substring(1, arg2.length() - 1));
226 								else
227 									arg2stroke = Integer.parseInt(arg2.substring(1));
228 
229 								List<Integer> stroke1x, stroke1y, stroke2x, stroke2y;
230 								stroke1x = xstrokes.get(arg1stroke - 1);
231 								stroke1y = ystrokes.get(arg1stroke - 1);
232 								stroke2x = xstrokes.get(arg2stroke - 1);
233 								stroke2y = ystrokes.get(arg2stroke - 1);
234 
235 								int val1, val2;
236 								switch (arg1.charAt(0)) {
237 								case 'x':
238 									val1 = stroke1x.get(0);
239 									break;
240 								case 'y':
241 									val1 = stroke1y.get(0);
242 									break;
243 								case 'i':
244 									val1 = stroke1x.get(stroke1x.size() - 1);
245 									break;
246 								case 'j':
247 									val1 = stroke1y.get(stroke1y.size() - 1);
248 									break;
249 								case 'a':
250 									val1 = ((stroke1x.get(0)) + (stroke1x.get(stroke1x.size() - 1))) / 2;
251 									break;
252 								case 'b':
253 									val1 = ((stroke1y.get(0)) + (stroke1y.get(stroke1y.size() - 1))) / 2;
254 									break;
255 								case 'l':
256 									int dx,
257 									dy;
258 									dx = (last(stroke1x)) - (stroke1x.get(0));
259 									dy = (last(stroke1y)) - (stroke1y.get(0));
260 									val1 = (int) (Math.sqrt((double) (dx * dx + dy * dy)));
261 									break;
262 								default:
263 									throw new IOException("bad filter");
264 									// continue;
265 								}
266 								// now the same thing for arg2 & val2
267 								switch (arg2.charAt(0)) {
268 								case 'x':
269 									val2 = (stroke2x.get(0));
270 									break;
271 								case 'y':
272 									val2 = (stroke2y.get(0));
273 									break;
274 								case 'i':
275 									val2 = (last(stroke2x));
276 									break;
277 								case 'j':
278 									val2 = (last(stroke2y));
279 									break;
280 								case 'a':
281 									val2 = ((stroke2x.get(0)) + (last(stroke2x))) / 2;
282 									break;
283 								case 'b':
284 									val2 = ((stroke2y.get(0)) + (last(stroke2y))) / 2;
285 									break;
286 								case 'l':
287 									int dx,
288 									dy;
289 									dx = (last(stroke2x)) - (stroke2x.get(0));
290 									dy = (last(stroke2y)) - (stroke2y.get(0));
291 									val2 = (int) (Math.sqrt((double) (dx * dx + dy * dy)));
292 									break;
293 								default:
294 									throw new IOException("bad filter");
295 									// continue;
296 								}
297 								// so now val1 and val2 have the right values
298 								ns = ns - (val1 - val2);
299 								if (must && (val1 < val2))
300 									ns += 9999999;
301 							} catch (Exception ez2) {
302 								throw new RuntimeException("bad filter", ez2);
303 								// continue;
304 							} // try-catch
305 						} // while
306 					}
307 					// now ns == the score
308 					int size;
309 					size = minScores.size();
310 					if ((size < NUMKAN) || (ns < minScores.get(0))) {
311 						if (size == 0) {
312 							minScores.add(ns);
313 							minChars.add(curk);
314 						} else {
315 							if (ns <= (last(minScores))) {
316 								minScores.add(new Integer(ns));
317 								minChars.add(curk);
318 							} else {
319 								int i = 0;
320 								while ((minScores.get(i)) > ns)
321 									i++;
322 								minScores.add(i, ns);
323 								minChars.add(i, curk);
324 							}
325 						}
326 					}
327 					size = minScores.size();
328 					if (size > NUMKAN) {
329 						minScores.remove(0);
330 						minChars.remove(0);
331 					}
332 				}
333 			}
334 
335 		} finally {
336 			closeQuietly(in);
337 		}
338 	}
339 
340 	// static final int HASHSIZE = 3000;
341 	static final int angScale = 1000;
342 	static final int sCost = (int) Math.round(Math.PI / 60.0 * angScale);
343 	static final int hugeCost = ((int) Math.round(Math.PI * angScale) + sCost) * 100;
344 
345 	// endi is exclusive, begi inclusive
346 	int scoreStroke(List<Integer> xv, List<Integer> yv, int begi, int endi, String dir, int depth) {
347 
348 		/*
349 		 * if (endi-1<=begi) { System.out.println("Ouch1"); return(hugeCost); }
350 		 */
351 		if (dir.length() == 1) {
352 			int i;
353 			int difx, dify;
354 			difx = (xv.get(endi - 1)) - (xv.get(begi));
355 			dify = (yv.get(endi - 1)) - (yv.get(begi));
356 			if ((difx == 0) && (dify == 0)) {
357 				// System.out.println("Ouch2");
358 				// return((int)Math.round(Math.PI*angScale)+sCost);
359 				return (hugeCost);
360 			}
361 			if ((difx * difx + dify * dify > (20 * 20)) && (endi - begi > 5) && (depth < 4)) {
362 				int mi = (endi + begi) / 2;
363 				int cost1, cost2;
364 				cost1 = scoreStroke(xv, yv, begi, mi, dir, depth + 1);
365 				cost2 = scoreStroke(xv, yv, mi, endi, dir, depth + 1);
366 				// return the average cost of the substrokes, but penalize if
367 				// they're
368 				// different.
369 				return ((cost1 + cost2) / 2);// +Math.abs(cost1-cost2));
370 			}
371 
372 			double ang;
373 			ang = Math.atan2(-dify, difx);
374 			double myang;
375 			switch (dir.charAt(0)) {
376 			case '6':
377 				myang = 0;
378 				break;
379 			case '9':
380 				myang = Math.PI / 4;
381 				break;
382 			case '8':
383 				myang = Math.PI / 2;
384 				break;
385 			case '7':
386 				myang = Math.PI * 3 / 4;
387 				break;
388 			case '4':
389 				myang = Math.PI;
390 				break;
391 			case '3':
392 				myang = -Math.PI / 4;
393 				break;
394 			case '2':
395 				myang = -Math.PI / 2;
396 				break;
397 			case '1':
398 				myang = -Math.PI * 3 / 4;
399 				break;
400 			default:
401 				throw new RuntimeException("Illegal char: " + dir.charAt(0));
402 				// myang = 0;
403 			}
404 			double difang = myang - ang;
405 			while (difang < 0)
406 				difang += 2 * Math.PI;
407 			while (difang > 2 * Math.PI)
408 				difang -= 2 * Math.PI;
409 			if (difang > Math.PI)
410 				difang = 2 * Math.PI - difang;
411 
412 			int retcost = (int) Math.round(difang * angScale) + sCost;
413 			return (retcost);
414 		} else if (begi == endi) {
415 			/*
416 			 * int i; int cost=0; for (i=0;i<dir.length();i++) {
417 			 * cost+=scoreStroke(xv,yv,begi,endi,dir.charAt(i)+""); } int
418 			 * retcost = cost/dir.length(); return(retcost);
419 			 */
420 			return (hugeCost * dir.length());
421 		} else { // recurse
422 			int l1, l2;
423 			l1 = dir.length() / 2;
424 			l2 = dir.length() - l1;
425 			String s1, s2;
426 			s1 = dir.substring(0, l1);
427 			s2 = dir.substring(l1, dir.length());
428 			int i;
429 			int mincost = hugeCost * dir.length() * 2;
430 			// 9999991;
431 			int s1l = s1.length();
432 			int s2l = s2.length();
433 			int step = (endi - begi) / 10;
434 			if (step < 1)
435 				step = 1;
436 
437 			for (i = begi + 1 + s1l; i < endi - 1 - s2l; i += step) {
438 				int ncost;
439 				ncost = scoreStroke(xv, yv, begi, i + 1, s1, depth) + scoreStroke(xv, yv, i - 1, endi, s2, depth);
440 				if (ncost < mincost)
441 					mincost = ncost;
442 			}
443 			// if (mincost==hugeCost*dir.length()*2)
444 			// System.out.println("Ouch3");
445 			return (mincost);
446 		}
447 	}
448 
449 	private int getScore(String s, int cutoff) { // , Hashtable h, int cutoff) {
450 		double score = 0;
451 		int strokes = 0;
452 		int maxscore = 0;
453 		cutoff = cutoff * xstrokes.size(); // need it before the averaging
454 		StringTokenizer st = new StringTokenizer(s);
455 		Iterator<List<Integer>> xe = xstrokes.iterator();
456 		Iterator<List<Integer>> ye = ystrokes.iterator();
457 		while (st.hasMoreTokens())
458 			if (!xe.hasNext())
459 				return (99997);
460 			else {
461 				/*
462 				 * if ((score>cutoff)&&(strokes>0)) { return(score/strokes); }
463 				 */
464 				List<Integer> vxe = xe.next();
465 				List<Integer> vye = ye.next();
466 				int thisscore;
467 				thisscore = scoreStroke(vxe, vye, 0, vxe.size(), st.nextToken(), 0);
468 
469 				score = score + thisscore * thisscore;
470 				maxscore = Math.max(maxscore, thisscore);
471 				strokes++;
472 			}
473 		if (xe.hasNext())
474 			return (99998);
475 		else {
476 			if (strokes == 0)
477 				return (99997);
478 			else
479 				return ((int) Math.round(Math.sqrt(score)));
480 			// return(score/strokes+maxscore);
481 
482 			/*
483 			 * count the worst stroke every time (sort of a pseudo-fuzzy type
484 			 * alg.)
485 			 */
486 		}
487 	}
488 }