View Javadoc

1   /***
2    *     Aedict - an EDICT browser for Android
3    Copyright (C) 2009 Martin Vysny
4    
5    This program is free software: you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation, either version 3 of the License, or
8    (at your option) any later version.
9    
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14  
15   You should have received a copy of the GNU General Public License
16   along with this program.  If not, see <http://www.gnu.org/licenses/>.
17   */
18  
19  package sk.baka.aedict;
20  
21  import java.io.IOException;
22  import java.io.Serializable;
23  import java.util.ArrayList;
24  import java.util.List;
25  
26  import sk.baka.aedict.dict.DictEntry;
27  import sk.baka.aedict.dict.DictTypeEnum;
28  import sk.baka.aedict.dict.Dictionary;
29  import sk.baka.aedict.dict.EdictEntry;
30  import sk.baka.aedict.dict.KanjidicEntry;
31  import sk.baka.aedict.dict.LuceneSearch;
32  import sk.baka.aedict.dict.MatcherEnum;
33  import sk.baka.aedict.dict.SearchQuery;
34  import sk.baka.aedict.kanji.KanjiUtils;
35  import sk.baka.aedict.kanji.Radicals;
36  import sk.baka.aedict.util.DictEntryListActions;
37  import sk.baka.aedict.util.ShowRomaji;
38  import sk.baka.autils.AbstractTask;
39  import sk.baka.autils.MiscUtils;
40  import sk.baka.autils.Progress;
41  import android.app.Activity;
42  import android.app.ListActivity;
43  import android.content.Intent;
44  import android.os.Bundle;
45  import android.view.Menu;
46  import android.view.MenuItem;
47  import android.view.View;
48  import android.view.ViewGroup;
49  import android.widget.ArrayAdapter;
50  import android.widget.ListView;
51  import android.widget.TextView;
52  
53  /***
54   * Analyzes each kanji in given word.
55   * 
56   * @author Martin Vysny
57   */
58  public class KanjiAnalyzeActivity extends ListActivity {
59  	/***
60  	 * The string word to analyze.
61  	 */
62  	public static final String INTENTKEY_WORD = "word";
63  	/***
64  	 * A list of {@link KanjidicEntry}.
65  	 */
66  	public static final String INTENTKEY_ENTRYLIST = "entrylist";
67  	/***
68  	 * Boolean value: False if we parsed given word on a per-character basis,
69  	 * true on a per-word basis.
70  	 */
71  	public static final String INTENTKEY_WORD_ANALYSIS = "wordAnalysis";
72  
73  	/***
74  	 * Longest kana word from the dictionary has 33 characters: ニューモノウルトラマイクロスコーピックシリコヴォルケーノコニオシス
75  	 */
76  	private static final int MAX_KANA_WORD_LENGTH=10;
77  	/***
78  	 * Longest kanji word from the dictionary has 37 characters: プログラム制御式及びキーボード制御式のアドレス指定可能な記憶域をもつ計算器
79  	 * However this would slow the word analysis to a crawl. Let's expect that the longest word may take at most 10 characters.
80  	 */
81  	private static final int MAX_KANJI_WORD_LENGTH=10;
82  	
83  	public static void launch(final Activity activity, final String word, final boolean isWordAnalysis) {
84  		if (word == null) {
85  			throw new IllegalArgumentException("word is null");
86  		}
87  		if (!AedictApp.getDownloader().checkDictionary(activity, new Dictionary(DictTypeEnum.Kanjidic, null), null, false)) {
88  			return;
89  		}
90  		final Intent i = new Intent(activity, KanjiAnalyzeActivity.class);
91  		i.putExtra(INTENTKEY_WORD, word);
92  		i.putExtra(INTENTKEY_WORD_ANALYSIS, isWordAnalysis);
93  		activity.startActivity(i);
94  	}
95  
96  	public static void launch(final Activity activity, final List<? extends DictEntry> entries, final boolean isWordAnalysis) {
97  		if (entries == null) {
98  			throw new IllegalArgumentException("entries is null");
99  		}
100 		if (!AedictApp.getDownloader().checkDictionary(activity, new Dictionary(DictTypeEnum.Kanjidic, null), null, false)) {
101 			return;
102 		}
103 		final Intent i = new Intent(activity, KanjiAnalyzeActivity.class);
104 		i.putExtra(INTENTKEY_ENTRYLIST, (Serializable) entries);
105 		i.putExtra(INTENTKEY_WORD_ANALYSIS, isWordAnalysis);
106 		activity.startActivity(i);
107 	}
108 
109 	private List<DictEntry> model = null;
110 	/***
111 	 * The word to analyze. If null then we were simply given a list of
112 	 * EdictEntry directly.
113 	 */
114 	private String word;
115 	/***
116 	 * True if we parsed given word on a per-character basis, false on a
117 	 * per-word basis.
118 	 */
119 	private boolean isAnalysisPerCharacter = true;
120 	private ShowRomaji showRomaji;
121 
122 	@Override
123 	protected void onRestoreInstanceState(Bundle savedInstanceState) {
124 		showRomaji.loadState(savedInstanceState);
125 	}
126 
127 	@Override
128 	protected void onSaveInstanceState(Bundle outState) {
129 		showRomaji.saveState(outState);
130 	}
131 
132 	@SuppressWarnings("unchecked")
133 	@Override
134 	protected void onCreate(Bundle savedInstanceState) {
135 		super.onCreate(savedInstanceState);
136 		showRomaji = new ShowRomaji() {
137 
138 			@Override
139 			protected void show(boolean romaji) {
140 				if (getListAdapter() != null) {
141 					((ArrayAdapter<?>) getListAdapter()).notifyDataSetChanged();
142 				}
143 			}
144 		};
145 		word = getIntent().getStringExtra(INTENTKEY_WORD);
146 		model = (List<DictEntry>) getIntent().getSerializableExtra(INTENTKEY_ENTRYLIST);
147 		isAnalysisPerCharacter = !getIntent().getBooleanExtra(INTENTKEY_WORD_ANALYSIS, false);
148 		if (word == null && model == null) {
149 			throw new IllegalArgumentException("Both word and entrylist are null");
150 		}
151 		setTitle(AedictApp.format(R.string.kanjiAnalysisOf, word != null ? word : DictEntry.getJapaneseWord(model)));
152 		if (model == null) {
153 			recomputeModel();
154 		} else {
155 			// if the activity received a list of EdictEntry instead of a word,
156 			// the model was not set to the activity and the activity shown an
157 			// empty list
158 			// fixes http://code.google.com/p/aedict/issues/detail?id=29
159 			setListAdapter(newAdapter());
160 		}
161 		new DictEntryListActions(this, true, true, false, true).register(getListView());
162 	}
163 
164 	private ArrayAdapter<DictEntry> newAdapter() {
165 		return new ArrayAdapter<DictEntry>(this, R.layout.kanjidic_list_item, model) {
166 
167 			@Override
168 			public View getView(int position, View convertView, ViewGroup parent) {
169 				View v = convertView;
170 				if (v == null) {
171 					v = getLayoutInflater().inflate(R.layout.kanjidic_list_item, getListView(), false);
172 				}
173 				final DictEntry e = model.get(position);
174 				((TextView) v.findViewById(android.R.id.text1)).setText(showRomaji.romanize(e.reading));
175 				final StringBuilder sb = new StringBuilder();
176 				if (e instanceof KanjidicEntry) {
177 					final KanjidicEntry ee = (KanjidicEntry) e;
178 					sb.append(' ').append(Radicals.getRadicals(ee.kanji.charAt(0)));
179 					sb.append(" Strokes:").append(ee.strokes);
180 					sb.append(" SKIP:").append(ee.skip);
181 					if (ee.grade != null) {
182 						sb.append(" Grade:").append(ee.grade);
183 					}
184 				}
185 				if (sb.length() > 0) {
186 					sb.replace(0, 1, "\n");
187 				}
188 				sb.insert(0, e.english);
189 				((TextView) v.findViewById(android.R.id.text2)).setText(sb.toString());
190 				final TextView tv = (TextView) v.findViewById(R.id.kanjiBig);
191 				// if the japanese word is too big the reading and the
192 				// translation is not shown anymore
193 				// workaround: add \n character after each third char
194 				tv.setText(splitToRows(e.getJapanese()));
195 				return v;
196 			}
197 
198 			private String splitToRows(final String str) {
199 				if (str == null) {
200 					return "";
201 				}
202 				final StringBuilder sb = new StringBuilder(str.length() * 4 / 3);
203 				for (int i = 0; i < str.length(); i++) {
204 					if ((i > 0) && (i % 3 == 0)) {
205 						sb.append('\n');
206 					}
207 					sb.append(str.charAt(i));
208 				}
209 				return sb.toString();
210 			}
211 		};
212 	}
213 
214 	@Override
215 	protected void onListItemClick(ListView l, View v, int position, long id) {
216 		final DictEntry e = model.get(position);
217 		if (!e.isValid()) {
218 			return;
219 		}
220 		if (e instanceof KanjidicEntry) {
221 			KanjiDetailActivity.launch(this, (KanjidicEntry) e);
222 		} else if (e instanceof EdictEntry){
223 			EdictEntryDetailActivity.launch(this, (EdictEntry)e);
224 		}else{
225 			// this only happens when the word analysis is turned off and the entry shows a single kana character.
226 			// just do nothing.
227 			// fixes http://code.google.com/p/aedict/issues/detail?id=69
228 		}
229 	}
230 
231 	@Override
232 	public boolean onPrepareOptionsMenu(Menu menu) {
233 		menu.clear();
234 		if (word == null) {
235 			return false;
236 		}
237 		final MenuItem item;
238 		if (!isAnalysisPerCharacter) {
239 			item = menu.add(R.string.analyzeCharacters);
240 			item.setIcon(android.R.drawable.ic_menu_zoom);
241 		} else {
242 			item = menu.add(R.string.analyzeWords);
243 			item.setIcon(android.R.drawable.ic_menu_search);
244 		}
245 		item.setOnMenuItemClickListener(new MenuItem.OnMenuItemClickListener() {
246 
247 			public boolean onMenuItemClick(MenuItem item) {
248 				isAnalysisPerCharacter = !isAnalysisPerCharacter;
249 				recomputeModel();
250 				return true;
251 			}
252 		});
253 		showRomaji.register(this, menu);
254 		return true;
255 	}
256 
257 	@Override
258 	protected void onResume() {
259 		super.onResume();
260 		showRomaji.onResume();
261 	}
262 
263 	private void recomputeModel() {
264 		new RecomputeModel().execute(AedictApp.isInstrumentation, this, word);
265 	}
266 
267 	private class RecomputeModel extends AbstractTask<String, List<DictEntry>> {
268 
269 		@Override
270 		protected void cleanupAfterError(Exception ex) {
271 			// nothing to do
272 		}
273 
274 		@Override
275 		protected void onSucceeded(List<DictEntry> result) {
276 			model = result;
277 			setListAdapter(newAdapter());
278 		}
279 
280 		@Override
281 		public List<DictEntry> impl(String... params) throws Exception {
282 			publish(new Progress(AedictApp.getStr(R.string.analyzing), 0, 100));
283 			if (isAnalysisPerCharacter) {
284 				// remove all non-letter characters
285 				final String w = word.replaceAll("[^//p{javaLetter}]+", "");
286 				return analyzeByCharacters(KanjiUtils.halfwidthToKatakana(w));
287 			} else {
288 				return analyzeByWords(KanjiUtils.halfwidthToKatakana(word));
289 			}
290 		}
291 
292 		private List<DictEntry> analyzeByWords(final String sentence) throws IOException {
293 			final List<DictEntry> result = new ArrayList<DictEntry>();
294 			final LuceneSearch lsEdict = new LuceneSearch(DictTypeEnum.Edict, AedictApp.getConfig().getDictionaryLoc(), AedictApp.getConfig().isSorted());
295 			try {
296 				final String[] words = getWords(sentence);
297 				final int progressMax = getNumberOfCharacters(words);
298 				int currentProgress = 0;
299 				for (int i = 0; i < words.length; i++) {
300 					if (isCancelled()) {
301 						return null;
302 					}
303 					String w = words[i].trim();
304 					while (w.length() > 0) {
305 						final MatchedWord match = findLongestWord(w, lsEdict);
306 						result.add(match.entry);
307 						w = w.substring(match.wordLength);
308 						currentProgress += match.wordLength;
309 						publish(new Progress(null, currentProgress, progressMax));
310 					}
311 				}
312 				return result;
313 			} finally {
314 				MiscUtils.closeQuietly(lsEdict);
315 			}
316 		}
317 
318 		private final String[] getWords(final String sentence) {
319 			// split the sentence by a non-word characters, like space, hyphen,
320 			// -, etc.
321 			return sentence.split("[^//p{javaLetter}]+");
322 		}
323 
324 		private int getNumberOfCharacters(final String[] words) {
325 			int result = 0;
326 			for (String word : words) {
327 				result += word.length();
328 			}
329 			return result;
330 		}
331 
332 		/***
333 		 * Tries to find longest word which is present in the EDICT dictionary.
334 		 * The search starts with given word, then cuts the last character off,
335 		 * etc.
336 		 * 
337 		 * @param word
338 		 *            the word to analyze. Must not contain romaji.
339 		 * @return longest word found or an entry consisting of the first
340 		 *         character if we were unable to find nothing
341 		 * @throws IOException
342 		 *             on i/o error
343 		 */
344 		private MatchedWord findLongestWord(final String word, final LuceneSearch edict) throws IOException {
345 			String w = word;
346 			final int maxLength = KanjiUtils.isKanji(word.charAt(0)) ? MAX_KANJI_WORD_LENGTH : MAX_KANA_WORD_LENGTH;
347 			if (w.length() > maxLength) {
348 				// optimization to avoid quadratic search complexity
349 				w = w.substring(0, maxLength);
350 			}
351 			while (w.length() > 0) {
352 				final List<DictEntry> result = edict.search(SearchQuery.searchJpEdict(w, MatcherEnum.Exact), 1);
353 				DictEntry.removeInvalid(result);
354 				if (!result.isEmpty()) {
355 					for (final DictEntry e : result) {
356 						return new MatchedWord(e, w.length());
357 					}
358 					// no luck, continue with the search
359 				}
360 				w = w.substring(0, w.length() - 1);
361 			}
362 			return new MatchedWord(new DictEntry(word.substring(0, 1), "", ""), 1);
363 		}
364 
365 		private List<DictEntry> analyzeByCharacters(final String word) throws IOException {
366 			final List<DictEntry> result = new ArrayList<DictEntry>(word.length());
367 			final LuceneSearch lsEdict = new LuceneSearch(DictTypeEnum.Edict, AedictApp.getConfig().getDictionaryLoc(), AedictApp.getConfig().isSorted());
368 			try {
369 				LuceneSearch lsKanjidic = null;
370 				if (AedictApp.getDownloader().isComplete(DictTypeEnum.Kanjidic)) {
371 					lsKanjidic = new LuceneSearch(DictTypeEnum.Kanjidic, null, AedictApp.getConfig().isSorted());
372 				}
373 				try {
374 					final String w = MiscUtils.removeWhitespaces(word);
375 					for (int i = 0; i < w.length(); i++) {
376 						publish(new Progress(null, i, w.length()));
377 						if (isCancelled()) {
378 							return null;
379 						}
380 						final char c = w.charAt(i);
381 						final boolean isKanji = KanjiUtils.isKanji(c);
382 						if (!isKanji) {
383 							result.add(new DictEntry(String.valueOf(c), String.valueOf(c), ""));
384 						} else {
385 							// it is a kanji. search for it in the
386 							// dictionary.
387 							final SearchQuery q = SearchQuery.searchJpEdict(String.valueOf(c), MatcherEnum.Exact);
388 							List<DictEntry> matches = null;
389 							DictEntry ee = null;
390 							if (lsKanjidic != null) {
391 								matches = lsKanjidic.search(q, 1);
392 								DictEntry.removeInvalid(matches);
393 							}
394 							if (matches != null && !matches.isEmpty()) {
395 								ee = matches.get(0);
396 							}
397 							if (ee == null) {
398 								matches = lsEdict.search(q, 1);
399 								DictEntry.removeInvalid(matches);
400 								if (!matches.isEmpty()) {
401 									ee = matches.get(0);
402 								}
403 							}
404 							if (ee == null) {
405 								// no luck. Just add the kanji
406 								ee = new DictEntry(String.valueOf(c), "", "");
407 							}
408 							result.add(ee);
409 						}
410 					}
411 					return result;
412 				} finally {
413 					MiscUtils.closeQuietly(lsKanjidic);
414 				}
415 			} finally {
416 				MiscUtils.closeQuietly(lsEdict);
417 			}
418 		}
419 	}
420 
421 	private static class MatchedWord {
422 		public final DictEntry entry;
423 		public final int wordLength;
424 
425 		public MatchedWord(DictEntry entry, int wordLength) {
426 			this.entry = entry;
427 			this.wordLength = wordLength;
428 		}
429 	}
430 }