#include #include #include "el_runtime.h" el_val_t str_ends(el_val_t s, el_val_t suf); el_val_t str_last_char(el_val_t s); el_val_t str_last2(el_val_t s); el_val_t str_last3(el_val_t s); el_val_t str_drop_last(el_val_t s, el_val_t n); el_val_t is_vowel(el_val_t c); el_val_t morph_apply_suffix(el_val_t base, el_val_t suffix); el_val_t en_irregular_plural(el_val_t word); el_val_t en_irregular_singular(el_val_t word); el_val_t en_irregular_verb(el_val_t base); el_val_t en_verb_3sg(el_val_t base); el_val_t en_should_double_final(el_val_t base); el_val_t en_verb_past(el_val_t base); el_val_t en_verb_gerund(el_val_t base); el_val_t en_pluralize_regular(el_val_t singular); el_val_t en_verb_form(el_val_t base, el_val_t tense, el_val_t person, el_val_t number); el_val_t agree_determiner(el_val_t det, el_val_t noun); el_val_t morph_pluralize(el_val_t noun, el_val_t profile); el_val_t morph_map_canonical(el_val_t verb, el_val_t code); el_val_t morph_conjugate(el_val_t verb, el_val_t tense, el_val_t person, el_val_t number, el_val_t profile); el_val_t morph_inflect(el_val_t word, el_val_t features, el_val_t profile); el_val_t pluralize(el_val_t singular); el_val_t singularize(el_val_t plural); el_val_t verb_form(el_val_t base, el_val_t tense, el_val_t person, el_val_t number); el_val_t irregular_plural(el_val_t word); el_val_t irregular_singular(el_val_t word); el_val_t fi_harmony(el_val_t word); el_val_t fi_suffix(el_val_t base, el_val_t harmony); el_val_t fi_noun_case(el_val_t stem, el_val_t gram_case, el_val_t number, el_val_t harmony); el_val_t fi_str_last_char(el_val_t s); el_val_t fi_apply_case(el_val_t noun, el_val_t gram_case, el_val_t number); el_val_t fi_verb_stem(el_val_t dict_form); el_val_t fi_irregular_verb(el_val_t dict_form); el_val_t fi_present_ending(el_val_t stem, el_val_t person, el_val_t number, el_val_t harmony); el_val_t fi_past_stem(el_val_t stem); el_val_t fi_past_ending(el_val_t stem, el_val_t person, el_val_t number, el_val_t harmony); el_val_t fi_neg_aux(el_val_t person, el_val_t number); el_val_t fi_negative(el_val_t verb, el_val_t person, el_val_t number); el_val_t fi_conjugate(el_val_t verb, el_val_t tense, el_val_t person, el_val_t number); el_val_t fi_question_suffix(el_val_t harmony); el_val_t fi_make_question(el_val_t verb_form, el_val_t harmony); el_val_t fi_full_paradigm(el_val_t noun); el_val_t fi_harmony(el_val_t word) { el_val_t n = str_len(word); el_val_t i = (n - 1); while (i >= 0) { el_val_t c = str_slice(word, i, (i + 1)); if (str_eq(c, EL_STR("a"))) { return EL_STR("back"); } if (str_eq(c, EL_STR("o"))) { return EL_STR("back"); } if (str_eq(c, EL_STR("u"))) { return EL_STR("back"); } if (str_eq(c, EL_STR("\xc3\xa4"))) { return EL_STR("front"); } if (str_eq(c, EL_STR("\xc3\xb6"))) { return EL_STR("front"); } if (str_eq(c, EL_STR("y"))) { return EL_STR("front"); } i = (i - 1); } return EL_STR("front"); return 0; } el_val_t fi_suffix(el_val_t base, el_val_t harmony) { if (str_eq(harmony, EL_STR("front"))) { if (str_eq(base, EL_STR("a"))) { return EL_STR("\xc3\xa4"); } if (str_eq(base, EL_STR("ssa"))) { return EL_STR("ss\xc3\xa4"); } if (str_eq(base, EL_STR("sta"))) { return EL_STR("st\xc3\xa4"); } if (str_eq(base, EL_STR("an"))) { return EL_STR("\xc3\xa4n"); } if (str_eq(base, EL_STR("aan"))) { return EL_STR("\xc3\xa4\xc3\xa4n"); } if (str_eq(base, EL_STR("lla"))) { return EL_STR("ll\xc3\xa4"); } if (str_eq(base, EL_STR("lta"))) { return EL_STR("lt\xc3\xa4"); } if (str_eq(base, EL_STR("lle"))) { return EL_STR("lle"); } if (str_eq(base, EL_STR("na"))) { return EL_STR("n\xc3\xa4"); } if (str_eq(base, EL_STR("ksi"))) { return EL_STR("ksi"); } if (str_eq(base, EL_STR("tta"))) { return EL_STR("tt\xc3\xa4"); } if (str_eq(base, EL_STR("ta"))) { return EL_STR("t\xc3\xa4"); } if (str_eq(base, EL_STR("ja"))) { return EL_STR("j\xc3\xa4"); } if (str_eq(base, EL_STR("oja"))) { return EL_STR("\xc3\xb6j\xc3\xa4"); } if (str_eq(base, EL_STR("issa"))) { return EL_STR("iss\xc3\xa4"); } if (str_eq(base, EL_STR("ista"))) { return EL_STR("ist\xc3\xa4"); } if (str_eq(base, EL_STR("ihin"))) { return EL_STR("ihin"); } if (str_eq(base, EL_STR("illa"))) { return EL_STR("ill\xc3\xa4"); } if (str_eq(base, EL_STR("ilta"))) { return EL_STR("ilt\xc3\xa4"); } if (str_eq(base, EL_STR("ille"))) { return EL_STR("ille"); } if (str_eq(base, EL_STR("ina"))) { return EL_STR("in\xc3\xa4"); } if (str_eq(base, EL_STR("itta"))) { return EL_STR("itt\xc3\xa4"); } if (str_eq(base, EL_STR("ko"))) { return EL_STR("k\xc3\xb6"); } if (str_eq(base, EL_STR("pa"))) { return EL_STR("p\xc3\xa4"); } if (str_eq(base, EL_STR("va"))) { return EL_STR("v\xc3\xa4"); } if (str_eq(base, EL_STR("ma"))) { return EL_STR("m\xc3\xa4"); } if (str_eq(base, EL_STR("han"))) { return EL_STR("h\xc3\xa4n"); } if (str_eq(base, EL_STR("lla"))) { return EL_STR("ll\xc3\xa4"); } return base; } return base; return 0; } el_val_t fi_noun_case(el_val_t stem, el_val_t gram_case, el_val_t number, el_val_t harmony) { el_val_t sg = str_eq(number, EL_STR("singular")); if (str_eq(gram_case, EL_STR("nominative"))) { if (sg) { return stem; } return el_str_concat(stem, EL_STR("t")); } if (str_eq(gram_case, EL_STR("genitive"))) { if (sg) { return el_str_concat(stem, EL_STR("n")); } return el_str_concat(stem, EL_STR("jen")); } if (str_eq(gram_case, EL_STR("accusative"))) { if (sg) { return el_str_concat(stem, EL_STR("n")); } return el_str_concat(stem, EL_STR("t")); } if (str_eq(gram_case, EL_STR("partitive"))) { if (sg) { return el_str_concat(stem, fi_suffix(EL_STR("a"), harmony)); } return el_str_concat(stem, fi_suffix(EL_STR("ja"), harmony)); } if (str_eq(gram_case, EL_STR("inessive"))) { if (sg) { return el_str_concat(stem, fi_suffix(EL_STR("ssa"), harmony)); } return el_str_concat(stem, fi_suffix(EL_STR("issa"), harmony)); } if (str_eq(gram_case, EL_STR("elative"))) { if (sg) { return el_str_concat(stem, fi_suffix(EL_STR("sta"), harmony)); } return el_str_concat(stem, fi_suffix(EL_STR("ista"), harmony)); } if (str_eq(gram_case, EL_STR("illative"))) { if (sg) { el_val_t last = fi_str_last_char(stem); return el_str_concat(el_str_concat(stem, last), EL_STR("n")); } return el_str_concat(stem, fi_suffix(EL_STR("ihin"), harmony)); } if (str_eq(gram_case, EL_STR("adessive"))) { if (sg) { return el_str_concat(stem, fi_suffix(EL_STR("lla"), harmony)); } return el_str_concat(stem, fi_suffix(EL_STR("illa"), harmony)); } if (str_eq(gram_case, EL_STR("ablative"))) { if (sg) { return el_str_concat(stem, fi_suffix(EL_STR("lta"), harmony)); } return el_str_concat(stem, fi_suffix(EL_STR("ilta"), harmony)); } if (str_eq(gram_case, EL_STR("allative"))) { if (sg) { return el_str_concat(stem, EL_STR("lle")); } return el_str_concat(stem, EL_STR("ille")); } if (str_eq(gram_case, EL_STR("essive"))) { if (sg) { return el_str_concat(stem, fi_suffix(EL_STR("na"), harmony)); } return el_str_concat(stem, fi_suffix(EL_STR("ina"), harmony)); } if (str_eq(gram_case, EL_STR("translative"))) { if (sg) { return el_str_concat(stem, EL_STR("ksi")); } return el_str_concat(stem, EL_STR("iksi")); } if (str_eq(gram_case, EL_STR("instructive"))) { return el_str_concat(stem, EL_STR("in")); } if (str_eq(gram_case, EL_STR("abessive"))) { if (sg) { return el_str_concat(stem, fi_suffix(EL_STR("tta"), harmony)); } return el_str_concat(stem, fi_suffix(EL_STR("itta"), harmony)); } if (str_eq(gram_case, EL_STR("comitative"))) { return el_str_concat(stem, EL_STR("ineen")); } return stem; return 0; } el_val_t fi_str_last_char(el_val_t s) { el_val_t n = str_len(s); if (n == 0) { return EL_STR(""); } return str_slice(s, (n - 1), n); return 0; } el_val_t fi_apply_case(el_val_t noun, el_val_t gram_case, el_val_t number) { el_val_t harmony = fi_harmony(noun); if (str_eq(gram_case, EL_STR("nominative"))) { if (str_eq(number, EL_STR("singular"))) { return noun; } return el_str_concat(noun, EL_STR("t")); } return fi_noun_case(noun, gram_case, number, harmony); return 0; } el_val_t fi_verb_stem(el_val_t dict_form) { if (str_ends_with(dict_form, EL_STR("da"))) { return str_drop_last(dict_form, 2); } if (str_ends_with(dict_form, EL_STR("d\xc3\xa4"))) { return str_drop_last(dict_form, 2); } if (str_ends_with(dict_form, EL_STR("lla"))) { return str_drop_last(dict_form, 2); } if (str_ends_with(dict_form, EL_STR("ll\xc3\xa4"))) { return str_drop_last(dict_form, 2); } if (str_ends_with(dict_form, EL_STR("rra"))) { return str_drop_last(dict_form, 2); } if (str_ends_with(dict_form, EL_STR("nna"))) { return str_drop_last(dict_form, 2); } if (str_ends_with(dict_form, EL_STR("a"))) { return str_drop_last(dict_form, 1); } if (str_ends_with(dict_form, EL_STR("\xc3\xa4"))) { return str_drop_last(dict_form, 1); } return dict_form; return 0; } el_val_t fi_irregular_verb(el_val_t dict_form) { el_val_t empty = el_list_empty(); if (str_eq(dict_form, EL_STR("olla"))) { el_val_t r = el_list_new(18, EL_STR("olla"), EL_STR("olen"), EL_STR("olet"), EL_STR("on"), EL_STR("olemme"), EL_STR("olette"), EL_STR("ovat"), EL_STR("olin"), EL_STR("olit"), EL_STR("oli"), EL_STR("olimme"), EL_STR("olitte"), EL_STR("olivat"), EL_STR("ole"), EL_STR("olis"), EL_STR("ole"), EL_STR("oleva"), EL_STR("ollut")); return r; } if (str_eq(dict_form, EL_STR("voida"))) { el_val_t r = el_list_new(18, EL_STR("voida"), EL_STR("voin"), EL_STR("voit"), EL_STR("voi"), EL_STR("voimme"), EL_STR("voitte"), EL_STR("voivat"), EL_STR("voin"), EL_STR("voit"), EL_STR("voi"), EL_STR("voimme"), EL_STR("voitte"), EL_STR("voivat"), EL_STR("voi"), EL_STR("vois"), EL_STR("voi"), EL_STR("voiva"), EL_STR("voinut")); return r; } if (str_eq(dict_form, EL_STR("menn\xc3\xa4"))) { el_val_t r = el_list_new(18, EL_STR("menn\xc3\xa4"), EL_STR("menen"), EL_STR("menet"), EL_STR("menee"), EL_STR("menemme"), EL_STR("menette"), EL_STR("menev\xc3\xa4t"), EL_STR("menin"), EL_STR("menit"), EL_STR("meni"), EL_STR("menimme"), EL_STR("menitte"), EL_STR("meniv\xc3\xa4t"), EL_STR("mene"), EL_STR("menis"), EL_STR("mene"), EL_STR("menev\xc3\xa4"), EL_STR("mennyt")); return r; } if (str_eq(dict_form, EL_STR("tulla"))) { el_val_t r = el_list_new(18, EL_STR("tulla"), EL_STR("tulen"), EL_STR("tulet"), EL_STR("tulee"), EL_STR("tulemme"), EL_STR("tulette"), EL_STR("tulevat"), EL_STR("tulin"), EL_STR("tulit"), EL_STR("tuli"), EL_STR("tulimme"), EL_STR("tulitte"), EL_STR("tulivat"), EL_STR("tule"), EL_STR("tulis"), EL_STR("tule"), EL_STR("tuleva"), EL_STR("tullut")); return r; } if (str_eq(dict_form, EL_STR("tehd\xc3\xa4"))) { el_val_t r = el_list_new(18, EL_STR("tehd\xc3\xa4"), EL_STR("teen"), EL_STR("teet"), EL_STR("tekee"), EL_STR("teemme"), EL_STR("teette"), EL_STR("tekev\xc3\xa4t"), EL_STR("tein"), EL_STR("teit"), EL_STR("teki"), EL_STR("teimme"), EL_STR("teitte"), EL_STR("tekiv\xc3\xa4t"), EL_STR("tee"), EL_STR("tekis"), EL_STR("tee"), EL_STR("tekev\xc3\xa4"), EL_STR("tehnyt")); return r; } if (str_eq(dict_form, EL_STR("n\xc3\xa4hd\xc3\xa4"))) { el_val_t r = el_list_new(18, EL_STR("n\xc3\xa4hd\xc3\xa4"), EL_STR("n\xc3\xa4""en"), EL_STR("n\xc3\xa4""et"), EL_STR("n\xc3\xa4kee"), EL_STR("n\xc3\xa4""emme"), EL_STR("n\xc3\xa4""ette"), EL_STR("n\xc3\xa4kev\xc3\xa4t"), EL_STR("n\xc3\xa4in"), EL_STR("n\xc3\xa4it"), EL_STR("n\xc3\xa4ki"), EL_STR("n\xc3\xa4imme"), EL_STR("n\xc3\xa4itte"), EL_STR("n\xc3\xa4kiv\xc3\xa4t"), EL_STR("n\xc3\xa4""e"), EL_STR("n\xc3\xa4kis"), EL_STR("n\xc3\xa4""e"), EL_STR("n\xc3\xa4kev\xc3\xa4"), EL_STR("n\xc3\xa4hnyt")); return r; } if (str_eq(dict_form, EL_STR("saada"))) { el_val_t r = el_list_new(18, EL_STR("saada"), EL_STR("saan"), EL_STR("saat"), EL_STR("saa"), EL_STR("saamme"), EL_STR("saatte"), EL_STR("saavat"), EL_STR("sain"), EL_STR("sait"), EL_STR("sai"), EL_STR("saimme"), EL_STR("saitte"), EL_STR("saivat"), EL_STR("saa"), EL_STR("sais"), EL_STR("saa"), EL_STR("saava"), EL_STR("saanut")); return r; } if (str_eq(dict_form, EL_STR("pit\xc3\xa4\xc3\xa4"))) { el_val_t r = el_list_new(18, EL_STR("pit\xc3\xa4\xc3\xa4"), EL_STR("pid\xc3\xa4n"), EL_STR("pid\xc3\xa4t"), EL_STR("pit\xc3\xa4\xc3\xa4"), EL_STR("pid\xc3\xa4mme"), EL_STR("pid\xc3\xa4tte"), EL_STR("pit\xc3\xa4v\xc3\xa4t"), EL_STR("pidin"), EL_STR("pidit"), EL_STR("piti"), EL_STR("pidimme"), EL_STR("piditte"), EL_STR("pitiv\xc3\xa4t"), EL_STR("pid\xc3\xa4"), EL_STR("pit\xc3\xa4is"), EL_STR("pid\xc3\xa4"), EL_STR("pit\xc3\xa4v\xc3\xa4"), EL_STR("pit\xc3\xa4nyt")); return r; } if (str_eq(dict_form, EL_STR("tiet\xc3\xa4\xc3\xa4"))) { el_val_t r = el_list_new(18, EL_STR("tiet\xc3\xa4\xc3\xa4"), EL_STR("tied\xc3\xa4n"), EL_STR("tied\xc3\xa4t"), EL_STR("tiet\xc3\xa4\xc3\xa4"), EL_STR("tied\xc3\xa4mme"), EL_STR("tied\xc3\xa4tte"), EL_STR("tiet\xc3\xa4v\xc3\xa4t"), EL_STR("tiesin"), EL_STR("tiesit"), EL_STR("tiesi"), EL_STR("tiesimme"), EL_STR("tiesitte"), EL_STR("tiesiv\xc3\xa4t"), EL_STR("tied\xc3\xa4"), EL_STR("tiet\xc3\xa4is"), EL_STR("tied\xc3\xa4"), EL_STR("tiet\xc3\xa4v\xc3\xa4"), EL_STR("tiennyt")); return r; } return empty; return 0; } el_val_t fi_present_ending(el_val_t stem, el_val_t person, el_val_t number, el_val_t harmony) { if (str_eq(number, EL_STR("singular"))) { if (str_eq(person, EL_STR("first"))) { return el_str_concat(stem, EL_STR("n")); } if (str_eq(person, EL_STR("second"))) { return el_str_concat(stem, EL_STR("t")); } if (str_eq(person, EL_STR("third"))) { el_val_t last = fi_str_last_char(stem); return el_str_concat(stem, last); } } if (str_eq(number, EL_STR("plural"))) { if (str_eq(person, EL_STR("first"))) { return el_str_concat(stem, EL_STR("mme")); } if (str_eq(person, EL_STR("second"))) { return el_str_concat(stem, EL_STR("tte")); } if (str_eq(person, EL_STR("third"))) { return el_str_concat(stem, fi_suffix(EL_STR("vat"), harmony)); } } return stem; return 0; } el_val_t fi_past_stem(el_val_t stem) { el_val_t last = fi_str_last_char(stem); if (str_eq(last, EL_STR("a"))) { return el_str_concat(str_drop_last(stem, 1), EL_STR("oi")); } if (str_eq(last, EL_STR("\xc3\xa4"))) { return el_str_concat(str_drop_last(stem, 1), EL_STR("\xc3\xb6i")); } return el_str_concat(stem, EL_STR("i")); return 0; } el_val_t fi_past_ending(el_val_t stem, el_val_t person, el_val_t number, el_val_t harmony) { el_val_t pstem = fi_past_stem(stem); if (str_eq(number, EL_STR("singular"))) { if (str_eq(person, EL_STR("first"))) { return el_str_concat(pstem, EL_STR("n")); } if (str_eq(person, EL_STR("second"))) { return el_str_concat(pstem, EL_STR("t")); } if (str_eq(person, EL_STR("third"))) { return str_drop_last(pstem, 1); } } if (str_eq(number, EL_STR("plural"))) { if (str_eq(person, EL_STR("first"))) { return el_str_concat(pstem, EL_STR("mme")); } if (str_eq(person, EL_STR("second"))) { return el_str_concat(pstem, EL_STR("tte")); } if (str_eq(person, EL_STR("third"))) { return el_str_concat(pstem, fi_suffix(EL_STR("vat"), harmony)); } } return pstem; return 0; } el_val_t fi_neg_aux(el_val_t person, el_val_t number) { if (str_eq(number, EL_STR("singular"))) { if (str_eq(person, EL_STR("first"))) { return EL_STR("en"); } if (str_eq(person, EL_STR("second"))) { return EL_STR("et"); } if (str_eq(person, EL_STR("third"))) { return EL_STR("ei"); } } if (str_eq(number, EL_STR("plural"))) { if (str_eq(person, EL_STR("first"))) { return EL_STR("emme"); } if (str_eq(person, EL_STR("second"))) { return EL_STR("ette"); } if (str_eq(person, EL_STR("third"))) { return EL_STR("eiv\xc3\xa4t"); } } return EL_STR("ei"); return 0; } el_val_t fi_negative(el_val_t verb, el_val_t person, el_val_t number) { el_val_t irreg = fi_irregular_verb(verb); el_val_t aux = fi_neg_aux(person, number); if (native_list_len(irreg) > 0) { el_val_t neg_stem = native_list_get(irreg, 13); return el_str_concat(el_str_concat(aux, EL_STR(" ")), neg_stem); } el_val_t stem = fi_verb_stem(verb); return el_str_concat(el_str_concat(aux, EL_STR(" ")), stem); return 0; } el_val_t fi_conjugate(el_val_t verb, el_val_t tense, el_val_t person, el_val_t number) { el_val_t harmony = fi_harmony(verb); el_val_t irreg = fi_irregular_verb(verb); if (native_list_len(irreg) > 0) { if (str_eq(tense, EL_STR("present"))) { if (str_eq(number, EL_STR("singular"))) { if (str_eq(person, EL_STR("first"))) { return native_list_get(irreg, 1); } if (str_eq(person, EL_STR("second"))) { return native_list_get(irreg, 2); } if (str_eq(person, EL_STR("third"))) { return native_list_get(irreg, 3); } } if (str_eq(number, EL_STR("plural"))) { if (str_eq(person, EL_STR("first"))) { return native_list_get(irreg, 4); } if (str_eq(person, EL_STR("second"))) { return native_list_get(irreg, 5); } if (str_eq(person, EL_STR("third"))) { return native_list_get(irreg, 6); } } } if (str_eq(tense, EL_STR("past"))) { if (str_eq(number, EL_STR("singular"))) { if (str_eq(person, EL_STR("first"))) { return native_list_get(irreg, 7); } if (str_eq(person, EL_STR("second"))) { return native_list_get(irreg, 8); } if (str_eq(person, EL_STR("third"))) { return native_list_get(irreg, 9); } } if (str_eq(number, EL_STR("plural"))) { if (str_eq(person, EL_STR("first"))) { return native_list_get(irreg, 10); } if (str_eq(person, EL_STR("second"))) { return native_list_get(irreg, 11); } if (str_eq(person, EL_STR("third"))) { return native_list_get(irreg, 12); } } } } el_val_t stem = fi_verb_stem(verb); if (str_eq(tense, EL_STR("present"))) { return fi_present_ending(stem, person, number, harmony); } if (str_eq(tense, EL_STR("past"))) { return fi_past_ending(stem, person, number, harmony); } return stem; return 0; } el_val_t fi_question_suffix(el_val_t harmony) { if (str_eq(harmony, EL_STR("front"))) { return EL_STR("k\xc3\xb6"); } return EL_STR("ko"); return 0; } el_val_t fi_make_question(el_val_t verb_form, el_val_t harmony) { return el_str_concat(verb_form, fi_question_suffix(harmony)); return 0; } el_val_t fi_full_paradigm(el_val_t noun) { el_val_t harmony = fi_harmony(noun); el_val_t r = el_list_empty(); el_val_t cases = el_list_new(15, EL_STR("nominative"), EL_STR("genitive"), EL_STR("accusative"), EL_STR("partitive"), EL_STR("inessive"), EL_STR("elative"), EL_STR("illative"), EL_STR("adessive"), EL_STR("ablative"), EL_STR("allative"), EL_STR("essive"), EL_STR("translative"), EL_STR("instructive"), EL_STR("abessive"), EL_STR("comitative")); el_val_t n = native_list_len(cases); el_val_t i = 0; while (i < n) { el_val_t c = native_list_get(cases, i); r = native_list_append(r, c); if (str_eq(c, EL_STR("instructive"))) { r = native_list_append(r, EL_STR("")); } else { if (str_eq(c, EL_STR("comitative"))) { r = native_list_append(r, EL_STR("")); } else { r = native_list_append(r, fi_noun_case(noun, c, EL_STR("singular"), harmony)); } } r = native_list_append(r, fi_noun_case(noun, c, EL_STR("plural"), harmony)); i = (i + 1); } return r; return 0; }