1 /***
2 * Aedict - an EDICT browser for Android
3 Copyright (C) 2009 Martin Vysny
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18 package sk.baka.aedict.kanji;
19
20 import java.util.ArrayList;
21 import java.util.HashSet;
22 import java.util.List;
23 import java.util.Set;
24
25 import sk.baka.aedict.dict.DictTypeEnum;
26 import sk.baka.aedict.dict.MatcherEnum;
27 import sk.baka.aedict.dict.SearchQuery;
28 import sk.baka.aedict.kanji.Deinflections.Deinflection;
29 import sk.baka.aedict.kanji.VerbInflection.Form;
30
31 /***
32 * Performs a simple verb deinflection.
33 *
34 * @author Martin Vysny
35 */
36 public final class VerbDeinflection {
37
38 private static class IrregularDeinflector extends EndsWithDeinflector {
39
40 public IrregularDeinflector(final String inflected, final Form form, final String... base) {
41 super(inflected, true, true, form, base);
42 }
43 }
44
45 private static class EndsWithDeinflector extends AbstractDeinflector {
46
47 private final String endsWith;
48 private final String[] replaceBy;
49 private final boolean isAllowEntireWordMatch;
50 private final boolean isStopIfMatch;
51 private final Form form;
52
53 /***
54 * Deinflects a verb if it ends with one of the following strings.
55 *
56 * @param endsWith
57 * a non-empty non-null list of possible endings, lower-case
58 * trimmed romaji.
59 * @param form the form of the verb.
60 * @param replaceBy
61 * the ending is replaced by this string.
62 */
63 public EndsWithDeinflector(final String endsWith, final Form form, final String... replaceBy) {
64 this(endsWith, false, false, form, replaceBy);
65 }
66
67 /***
68 * Deinflects a verb if it ends with one of the following strings.
69 *
70 * @param endsWith
71 * a non-empty non-null list of possible endings, lower-case
72 * trimmed romaji.
73 * @param isAllowEntireWordMatch
74 * if true then an entire word must match, if false then only
75 * a suffix (not an entire word) must match. This is often
76 * not wanted, e.g. the itai suffix would match the itai
77 * word.
78 * @param isStopIfMatch
79 * defines the return value of {@link #stopIfMatch()}.
80 * @param form the form of the verb.
81 * @param replaceBy
82 * the ending is replaced by this string.
83 */
84 public EndsWithDeinflector(final String endsWith, final boolean isAllowEntireWordMatch, final boolean isStopIfMatch, final Form form, final String... replaceBy) {
85 this.endsWith = endsWith;
86 this.replaceBy = replaceBy;
87 this.isAllowEntireWordMatch = isAllowEntireWordMatch;
88 this.isStopIfMatch = isStopIfMatch;
89 this.form = form;
90 }
91
92 @Override
93 public Set<String> deinflect(String romaji) {
94 final String ending = isMatch(romaji);
95 if (ending == null) {
96
97 return null;
98 }
99 final Set<String> result = new HashSet<String>(replaceBy.length);
100 final String verbPart = romaji.substring(0, romaji.length() - ending.length());
101 for (final String rb : replaceBy) {
102 result.add(verbPart + rb);
103 }
104 return result;
105 }
106
107 private String isMatch(final String romaji) {
108 final String ending = endsWith;
109 if (isAllowEntireWordMatch && romaji.equals(ending)) {
110 return ending;
111 }
112 if (!isAllowEntireWordMatch) {
113 if (romaji.endsWith(ending) && !romaji.equals(ending)) {
114 return ending;
115 }
116 }
117 return null;
118 }
119
120 @Override
121 public boolean stopIfMatch() {
122 return isStopIfMatch;
123 }
124
125 @Override
126 public Form getForm() {
127 return form;
128 }
129 }
130
131 private static class EruDeinflector extends AbstractDeinflector {
132
133
134
135
136 private final AbstractDeinflector eruDeinflector = new EndsWithDeinflector("eru", Form.ABLE_TO_DO2, "eru", "u");
137
138 @Override
139 public Set<String> deinflect(String romaji) {
140
141 if (romaji.endsWith("rareru")) {
142 return null;
143 }
144 return eruDeinflector.deinflect(romaji);
145 }
146
147 @Override
148 public boolean stopIfMatch() {
149
150 return true;
151 }
152
153 @Override
154 public Form getForm() {
155 return Form.ABLE_TO_DO2;
156 }
157 }
158
159 private static abstract class AbstractDeinflector {
160
161 /***
162 * Tries to deinflect a verb.
163 *
164 * @param romaji
165 * a verb in lower-case, trimmed romaji.
166 * @return deinflected verb, or a multiple verbs if there are multiple
167 * possibilities to deinflect. If this rule cannot be applied to
168 * deinflect the verb, null or an empty array should be
169 * returned.
170 */
171 public abstract Set<String> deinflect(String romaji);
172
173 /***
174 * If true then there is nothing more to deinflect and the process can
175 * be safely stopped.
176 *
177 * @return true if there is nothing more to deinflect, false if the
178 * deinflection should continue.
179 */
180 public abstract boolean stopIfMatch();
181
182 /***
183 * Returns the originating form.
184 * @return originating form.
185 */
186 public abstract Form getForm();
187 }
188
189 private static AbstractDeinflector basicSuffix(final String endsWith, final Form form, final String... replaceBy) {
190 return new EndsWithDeinflector(endsWith, false, true, form, replaceBy);
191 }
192
193 private static List<IrregularDeinflector> irregular(String[] endsWith, final Form form, final String replaceBy) {
194 final List<IrregularDeinflector> result=new ArrayList<VerbDeinflection.IrregularDeinflector>();
195 for(final String ew:endsWith){
196 result.add(new IrregularDeinflector(ew, form, replaceBy));
197 }
198 return result;
199 }
200
201 private final static List<? extends AbstractDeinflector> DEINFLECTORS;
202
203 static {
204 final List<AbstractDeinflector> d = new ArrayList<AbstractDeinflector>();
205 d.addAll(irregular(new String[]{"dewaarimasen", "dehaarimasen", "de wa arimasen", "de ha arimasen", "zya arimasen", "zyaarimasen"}, Form.POLITE_NEGATIVE, "desu"));
206 d.addAll(irregular(new String[]{"dewaarimasendesita", "dehaarimasendesita", "de wa arimasen desita", "de ha arimasen desita", "zya arimasen desita", "zyaarimasendesita"}, Form.POLITE_PAST_NEGATIVE, "desu"));
207
208 d.add(new EndsWithDeinflector("masen", Form.POLITE_NEGATIVE, "masu"));
209 d.add(new EndsWithDeinflector("masita", Form.POLITE_PAST, "masu"));
210 d.add(new EndsWithDeinflector("masendesita", Form.POLITE_PAST_NEGATIVE, "masu"));
211 d.add(new EndsWithDeinflector("masen desita", Form.POLITE_PAST_NEGATIVE, "masu"));
212
213 d.add(new EndsWithDeinflector("nakatta", Form.NEGATIVE_PAST, "nai"));
214
215 d.add(new IrregularDeinflector("sinai", Form.NEGATIVE, "suru"));
216 d.add(new IrregularDeinflector("sita", Form.PAST_TENSE, "suru"));
217 d.add(new IrregularDeinflector("site", Form.CONTINUATION , "suru"));
218 d.add(new IrregularDeinflector("simasu", Form.POLITE, "suru"));
219 d.add(new IrregularDeinflector("siyou", Form.LET_S2, "suru"));
220 d.add(new IrregularDeinflector("sareru", Form.PLAIN, "sareru"));
221 d.add(new IrregularDeinflector("sarenai", Form.NEGATIVE, "sareru"));
222 d.add(new IrregularDeinflector("sareta", Form.PAST_TENSE, "sareru"));
223 d.add(new IrregularDeinflector("konai", Form.NEGATIVE, "kuru"));
224 d.add(new IrregularDeinflector("kita",Form.PAST_TENSE, "kuru"));
225 d.add(new IrregularDeinflector("kite", Form.CONTINUATION, "kuru"));
226 d.add(new IrregularDeinflector("kimasu", Form.POLITE, "kuru"));
227 d.add(new IrregularDeinflector("koyou",Form.LET_S2, "kuru"));
228 d.add(new IrregularDeinflector("da", Form.PLAIN, "desu"));
229 d.addAll(irregular(new String[]{"dewanai", "zyanai"},Form.NEGATIVE, "desu"));
230 d.add(new IrregularDeinflector("datta", Form.PAST_TENSE , "desu"));
231 d.add(new IrregularDeinflector("desita", Form.POLITE_PAST, "desu"));
232 d.add(new IrregularDeinflector("de", null, "desu"));
233 d.addAll(irregular(new String[]{"dehanai", "de ha nai", "dewanai", "de wa nai"}, Form.NEGATIVE, "desu"));
234 d.addAll(irregular(new String[]{"dehaaru", "de ha aru", "de wa aru", "dewaaru"}, Form.PLAIN, "desu"));
235 d.add(new IrregularDeinflector("itta", Form.PAST_TENSE, "iku"));
236 d.add(new IrregularDeinflector("itte", Form.CONTINUATION, "iku"));
237 d.add(new IrregularDeinflector("ikimasu", Form.POLITE, "iku"));
238 d.add(new IrregularDeinflector("ikareru", Form.PLAIN, "ikareru"));
239 d.add(new IrregularDeinflector("ikarenai", Form.NEGATIVE, "ikareru"));
240 d.add(new IrregularDeinflector("ikareta",Form.PAST_TENSE, "ikareru"));
241 d.add(new IrregularDeinflector("nai", Form.NEGATIVE, "aru"));
242 d.add(new IrregularDeinflector("nakatta", Form.NEGATIVE_PAST, "aru"));
243 d.add(new IrregularDeinflector("arimasu",Form.POLITE, "aru"));
244 d.add(new IrregularDeinflector("atte", Form.CONTINUATION, "aru", "au"));
245 d.add(new IrregularDeinflector("atta", Form.PAST_TENSE, "aru", "au"));
246
247 d.add(basicSuffix("kereba", Form.NEGATIVE_CONDITIONAL, "i"));
248 d.add(basicSuffix("arenai", Form.NEGATIVE, "areru"));
249 d.add(basicSuffix("areta", Form.PAST_TENSE, "areru"));
250 d.add(basicSuffix("areru", Form.PLAIN, "areru"));
251 d.add(basicSuffix("wanai", Form.NEGATIVE, "u"));
252 d.add(basicSuffix("anai", Form.NEGATIVE, "u"));
253
254 d.add(new EndsWithDeinflector("enai", Form.NEGATIVE, "eru"));
255
256 d.add(basicSuffix("inai", Form.NEGATIVE, "iru"));
257 d.add(basicSuffix("itai", Form.WANT, "u"));
258
259 d.add(new EndsWithDeinflector("etai", Form.WANT, "eru"));
260 d.add(basicSuffix("eba", Form.IF2, "u"));
261 d.add(new EndsWithDeinflector("emasu", Form.ABLE_TO_DO2, "u", "eru"));
262 d.add(new EndsWithDeinflector("imasu", Form.POLITE, "u", "iru"));
263 d.add(basicSuffix("outosuru", null, "u"));
264 d.add(basicSuffix("ou to suru",null, "u"));
265
266
267
268
269 d.add(new EruDeinflector());
270
271 d.add(basicSuffix("sita", Form.PAST_TENSE, "su"));
272 d.add(basicSuffix("site",Form.CONTINUATION, "su"));
273
274 d.add(basicSuffix("ita", Form.PAST_TENSE, "ku", "iru"));
275 d.add(basicSuffix("ite",Form.CONTINUATION, "ku", "iru"));
276
277 d.add(basicSuffix("eta", Form.PAST_TENSE, "eru"));
278 d.add(basicSuffix("ete",Form.CONTINUATION, "eru"));
279 d.add(basicSuffix("ida", Form.PAST_TENSE, "gu"));
280 d.add(basicSuffix("ide",Form.CONTINUATION, "gu"));
281 d.add(basicSuffix("tta", Form.PAST_TENSE, "tu", "u", "ru"));
282 d.add(basicSuffix("tte",Form.CONTINUATION, "tu", "u", "ru"));
283 d.add(basicSuffix("nda", Form.PAST_TENSE, "nu", "bu", "mu"));
284 d.add(basicSuffix("nde",Form.CONTINUATION, "nu", "bu", "mu"));
285 DEINFLECTORS = d;
286 }
287
288 /***
289 * Attempts to deinflect given verb.
290 *
291 * @param japanese
292 * {@link RomanizationEnum#NihonShiki} romaji or hiragana.
293 * @return deinflected verb(s)
294 */
295 public static Deinflections deinflect(final String japanese) {
296 final Deinflections result = new Deinflections();
297 result.deinflections=new ArrayList<Deinflection>();
298 result.deinflectedVerbs = new HashSet<String>();
299 Set<String> finalDeinflect = new HashSet<String>();
300 result.deinflectedVerbs.add(RomanizationEnum.NihonShiki.toRomaji(japanese).trim());
301 for (final AbstractDeinflector deinflector : DEINFLECTORS) {
302 final Set<String> newResult = new HashSet<String>(result.deinflectedVerbs);
303 for (final String romaji : result.deinflectedVerbs) {
304 final Set<String> deinflected = deinflector.deinflect(romaji);
305 if (deinflected != null && !deinflected.isEmpty()) {
306
307
308 newResult.remove(romaji);
309 if (deinflector.stopIfMatch()) {
310 finalDeinflect.addAll(deinflected);
311 } else {
312 newResult.addAll(deinflected);
313 }
314 result.deinflections.add(new Deinflection(romaji, deinflector.getForm(), deinflected.toArray(new String[0])));
315 }
316 }
317 result.deinflectedVerbs = newResult;
318 }
319 result.deinflectedVerbs.addAll(finalDeinflect);
320 return result;
321 }
322
323 private VerbDeinflection() {
324 throw new AssertionError();
325 }
326
327 /***
328 * Creates an EDICT query which searches for a japanese term. Automatically performs a verb deinflection.
329 *
330 * @param verb
331 * the word to search, in japanese language, may contain romaji.
332 * Full-width katakana conversion is performed automatically. Not
333 * null
334 * @param romanization
335 * the romanization system to use, not null.
336 * @return search query, never null
337 */
338 public static Deinflections searchJpDeinflected(final String verb, final RomanizationEnum romanization) {
339 final SearchQuery result = new SearchQuery(DictTypeEnum.Edict);
340 final String conv = KanjiUtils.halfwidthToKatakana(verb);
341 final String romaji = RomanizationEnum.NihonShiki.toRomaji(romanization.toHiragana(conv));
342 final Deinflections deinflections = VerbDeinflection.deinflect(romaji);
343 result.query = deinflections.deinflectedVerbs.toArray(new String[0]);
344 for (int i = 0; i < result.query.length; i++) {
345 result.query[i] = RomanizationEnum.NihonShiki.toHiragana(result.query[i]);
346 }
347 result.isJapanese = true;
348 result.matcher = MatcherEnum.Exact;
349 deinflections.query = result;
350 return deinflections;
351 }
352 }