#include #include #include "el_runtime.h" el_val_t slots_get(el_val_t slots, el_val_t key); el_val_t slots_set(el_val_t slots, el_val_t key, el_val_t val); el_val_t make_slots(el_val_t k0, el_val_t v0); el_val_t make_slots2(el_val_t k0, el_val_t v0, el_val_t k1, el_val_t v1); el_val_t make_slots3(el_val_t k0, el_val_t v0, el_val_t k1, el_val_t v1, el_val_t k2, el_val_t v2); el_val_t make_slots4(el_val_t k0, el_val_t v0, el_val_t k1, el_val_t v1, el_val_t k2, el_val_t v2, el_val_t k3, el_val_t v3); el_val_t make_slots5(el_val_t k0, el_val_t v0, el_val_t k1, el_val_t v1, el_val_t k2, el_val_t v2, el_val_t k3, el_val_t v3, el_val_t k4, el_val_t v4); el_val_t rule_id(el_val_t rule); el_val_t rule_lhs(el_val_t rule); el_val_t rule_rhs_len(el_val_t rule); el_val_t rule_rhs(el_val_t rule, el_val_t idx); el_val_t make_rule(el_val_t id, el_val_t lhs, el_val_t r0); el_val_t make_rule2(el_val_t id, el_val_t lhs, el_val_t r0, el_val_t r1); el_val_t make_rule3(el_val_t id, el_val_t lhs, el_val_t r0, el_val_t r1, el_val_t r2); el_val_t make_rule4(el_val_t id, el_val_t lhs, el_val_t r0, el_val_t r1, el_val_t r2, el_val_t r3); el_val_t build_rules(void); el_val_t get_rules(void); el_val_t find_rule(el_val_t rule_id_str); el_val_t make_leaf(el_val_t label, el_val_t word); el_val_t make_node1(el_val_t label, el_val_t child0); el_val_t make_node2(el_val_t label, el_val_t child0, el_val_t child1); el_val_t make_node3(el_val_t label, el_val_t child0, el_val_t child1, el_val_t child2); el_val_t make_node4(el_val_t label, el_val_t child0, el_val_t child1, el_val_t child2, el_val_t child3); el_val_t nlg_is_ws(el_val_t c); el_val_t skip_ws(el_val_t s, el_val_t pos); el_val_t scan_token(el_val_t s, el_val_t start); el_val_t render_tree(el_val_t tree); el_val_t gram_word_order(el_val_t profile); el_val_t gram_order_constituents(el_val_t subj, el_val_t verb, el_val_t obj, el_val_t profile); el_val_t gram_build_vp(el_val_t verb, el_val_t aux, el_val_t profile); el_val_t gram_question_strategy(el_val_t profile); el_val_t is_pronoun(el_val_t word); el_val_t build_np(el_val_t referent, el_val_t slots); el_val_t build_pp(el_val_t loc); el_val_t build_vp_body(el_val_t slots); el_val_t build_vp_from_slots(el_val_t slots); el_val_t generate_tree(el_val_t rule_id_str, el_val_t slots); el_val_t slots_get(el_val_t slots, el_val_t key) { el_val_t n = native_list_len(slots); el_val_t i = 0; while (i < (n - 1)) { el_val_t k = native_list_get(slots, i); if (str_eq(k, key)) { return native_list_get(slots, (i + 1)); } i = (i + 2); } return EL_STR(""); return 0; } el_val_t slots_set(el_val_t slots, el_val_t key, el_val_t val) { el_val_t n = native_list_len(slots); el_val_t result = native_list_empty(); el_val_t found = 0; el_val_t i = 0; while (i < (n - 1)) { el_val_t k = native_list_get(slots, i); el_val_t v = native_list_get(slots, (i + 1)); if (str_eq(k, key)) { result = native_list_append(result, k); result = native_list_append(result, val); found = 1; } else { result = native_list_append(result, k); result = native_list_append(result, v); } i = (i + 2); } if (!found) { result = native_list_append(result, key); result = native_list_append(result, val); } return result; return 0; } el_val_t make_slots(el_val_t k0, el_val_t v0) { el_val_t r = native_list_empty(); r = native_list_append(r, k0); r = native_list_append(r, v0); return r; return 0; } el_val_t make_slots2(el_val_t k0, el_val_t v0, el_val_t k1, el_val_t v1) { el_val_t r = make_slots(k0, v0); r = native_list_append(r, k1); r = native_list_append(r, v1); return r; return 0; } el_val_t make_slots3(el_val_t k0, el_val_t v0, el_val_t k1, el_val_t v1, el_val_t k2, el_val_t v2) { el_val_t r = make_slots2(k0, v0, k1, v1); r = native_list_append(r, k2); r = native_list_append(r, v2); return r; return 0; } el_val_t make_slots4(el_val_t k0, el_val_t v0, el_val_t k1, el_val_t v1, el_val_t k2, el_val_t v2, el_val_t k3, el_val_t v3) { el_val_t r = make_slots3(k0, v0, k1, v1, k2, v2); r = native_list_append(r, k3); r = native_list_append(r, v3); return r; return 0; } el_val_t make_slots5(el_val_t k0, el_val_t v0, el_val_t k1, el_val_t v1, el_val_t k2, el_val_t v2, el_val_t k3, el_val_t v3, el_val_t k4, el_val_t v4) { el_val_t r = make_slots4(k0, v0, k1, v1, k2, v2, k3, v3); r = native_list_append(r, k4); r = native_list_append(r, v4); return r; return 0; } el_val_t rule_id(el_val_t rule) { return native_list_get(rule, 0); return 0; } el_val_t rule_lhs(el_val_t rule) { return native_list_get(rule, 1); return 0; } el_val_t rule_rhs_len(el_val_t rule) { el_val_t n = native_list_len(rule); return (n - 2); return 0; } el_val_t rule_rhs(el_val_t rule, el_val_t idx) { return native_list_get(rule, (idx + 2)); return 0; } el_val_t make_rule(el_val_t id, el_val_t lhs, el_val_t r0) { el_val_t r = native_list_empty(); r = native_list_append(r, id); r = native_list_append(r, lhs); r = native_list_append(r, r0); return r; return 0; } el_val_t make_rule2(el_val_t id, el_val_t lhs, el_val_t r0, el_val_t r1) { el_val_t r = make_rule(id, lhs, r0); r = native_list_append(r, r1); return r; return 0; } el_val_t make_rule3(el_val_t id, el_val_t lhs, el_val_t r0, el_val_t r1, el_val_t r2) { el_val_t r = make_rule2(id, lhs, r0, r1); r = native_list_append(r, r2); return r; return 0; } el_val_t make_rule4(el_val_t id, el_val_t lhs, el_val_t r0, el_val_t r1, el_val_t r2, el_val_t r3) { el_val_t r = make_rule3(id, lhs, r0, r1, r2); r = native_list_append(r, r3); return r; return 0; } el_val_t build_rules(void) { el_val_t rules = native_list_empty(); rules = native_list_append(rules, make_rule2(EL_STR("S-DECL"), EL_STR("S"), EL_STR("NP"), EL_STR("VP"))); rules = native_list_append(rules, make_rule3(EL_STR("S-QUEST"), EL_STR("S"), EL_STR("Aux"), EL_STR("NP"), EL_STR("VP"))); rules = native_list_append(rules, make_rule(EL_STR("S-IMP"), EL_STR("S"), EL_STR("VP"))); rules = native_list_append(rules, make_rule2(EL_STR("NP-DET-N"), EL_STR("NP"), EL_STR("Det"), EL_STR("N"))); rules = native_list_append(rules, make_rule3(EL_STR("NP-DET-ADJ-N"), EL_STR("NP"), EL_STR("Det"), EL_STR("Adj"), EL_STR("N"))); rules = native_list_append(rules, make_rule(EL_STR("NP-PRON"), EL_STR("NP"), EL_STR("Pron"))); rules = native_list_append(rules, make_rule(EL_STR("NP-N"), EL_STR("NP"), EL_STR("N"))); rules = native_list_append(rules, make_rule(EL_STR("VP-V"), EL_STR("VP"), EL_STR("V"))); rules = native_list_append(rules, make_rule2(EL_STR("VP-V-NP"), EL_STR("VP"), EL_STR("V"), EL_STR("NP"))); rules = native_list_append(rules, make_rule2(EL_STR("VP-V-PP"), EL_STR("VP"), EL_STR("V"), EL_STR("PP"))); rules = native_list_append(rules, make_rule3(EL_STR("VP-V-NP-PP"), EL_STR("VP"), EL_STR("V"), EL_STR("NP"), EL_STR("PP"))); rules = native_list_append(rules, make_rule2(EL_STR("VP-AUX-V"), EL_STR("VP"), EL_STR("Aux"), EL_STR("V"))); rules = native_list_append(rules, make_rule3(EL_STR("VP-AUX-V-NP"), EL_STR("VP"), EL_STR("Aux"), EL_STR("V"), EL_STR("NP"))); rules = native_list_append(rules, make_rule2(EL_STR("PP-P-NP"), EL_STR("PP"), EL_STR("P"), EL_STR("NP"))); return rules; return 0; } el_val_t get_rules(void) { return build_rules(); return 0; } el_val_t find_rule(el_val_t rule_id_str) { el_val_t rules = get_rules(); el_val_t n = native_list_len(rules); el_val_t i = 0; while (i < n) { el_val_t rule = native_list_get(rules, i); el_val_t id = native_list_get(rule, 0); if (str_eq(id, rule_id_str)) { return rule; } i = (i + 1); } el_val_t empty = native_list_empty(); return empty; return 0; } el_val_t make_leaf(el_val_t label, el_val_t word) { return el_str_concat(el_str_concat(el_str_concat(el_str_concat(EL_STR("("), label), EL_STR(" ")), word), EL_STR(")")); return 0; } el_val_t make_node1(el_val_t label, el_val_t child0) { return el_str_concat(el_str_concat(el_str_concat(el_str_concat(EL_STR("("), label), EL_STR(" _ ")), child0), EL_STR(")")); return 0; } el_val_t make_node2(el_val_t label, el_val_t child0, el_val_t child1) { return el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(EL_STR("("), label), EL_STR(" _ ")), child0), EL_STR(" ")), child1), EL_STR(")")); return 0; } el_val_t make_node3(el_val_t label, el_val_t child0, el_val_t child1, el_val_t child2) { return el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(EL_STR("("), label), EL_STR(" _ ")), child0), EL_STR(" ")), child1), EL_STR(" ")), child2), EL_STR(")")); return 0; } el_val_t make_node4(el_val_t label, el_val_t child0, el_val_t child1, el_val_t child2, el_val_t child3) { return el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(el_str_concat(EL_STR("("), label), EL_STR(" _ ")), child0), EL_STR(" ")), child1), EL_STR(" ")), child2), EL_STR(" ")), child3), EL_STR(")")); return 0; } el_val_t nlg_is_ws(el_val_t c) { if (str_eq(c, EL_STR(" "))) { return 1; } if (str_eq(c, EL_STR("\t"))) { return 1; } if (str_eq(c, EL_STR("\n"))) { return 1; } return 0; return 0; } el_val_t skip_ws(el_val_t s, el_val_t pos) { el_val_t n = str_len(s); el_val_t i = pos; el_val_t running = 1; while (running) { if (i >= n) { running = 0; } else { el_val_t c = str_slice(s, i, (i + 1)); if (nlg_is_ws(c)) { i = (i + 1); } else { running = 0; } } } return i; return 0; } el_val_t scan_token(el_val_t s, el_val_t start) { el_val_t n = str_len(s); el_val_t i = start; el_val_t running = 1; while (running) { if (i >= n) { running = 0; } else { el_val_t c = str_slice(s, i, (i + 1)); if (nlg_is_ws(c)) { running = 0; } else { if (str_eq(c, EL_STR("("))) { running = 0; } else { if (str_eq(c, EL_STR(")"))) { running = 0; } else { i = (i + 1); } } } } } el_val_t tok = str_slice(s, start, i); el_val_t result = native_list_empty(); result = native_list_append(result, tok); result = native_list_append(result, int_to_str(i)); return result; return 0; } el_val_t render_tree(el_val_t tree) { el_val_t words = native_list_empty(); el_val_t n = str_len(tree); el_val_t i = 0; el_val_t prev_was_open = 0; while (i < n) { el_val_t c = str_slice(tree, i, (i + 1)); if (str_eq(c, EL_STR("("))) { prev_was_open = 1; i = (i + 1); } else { if (str_eq(c, EL_STR(")"))) { prev_was_open = 0; i = (i + 1); } else { if (nlg_is_ws(c)) { i = (i + 1); } else { el_val_t tok_info = scan_token(tree, i); el_val_t tok = native_list_get(tok_info, 0); el_val_t new_i = str_to_int(native_list_get(tok_info, 1)); i = new_i; if (prev_was_open) { prev_was_open = 0; } else { if (!str_eq(tok, EL_STR("_"))) { words = native_list_append(words, tok); } } } } } } return str_join(words, EL_STR(" ")); return 0; } el_val_t gram_word_order(el_val_t profile) { return lang_word_order(profile); return 0; } el_val_t gram_order_constituents(el_val_t subj, el_val_t verb, el_val_t obj, el_val_t profile) { el_val_t order = gram_word_order(profile); el_val_t parts = native_list_empty(); if (str_eq(order, EL_STR("SVO"))) { if (!str_eq(subj, EL_STR(""))) { parts = native_list_append(parts, subj); } if (!str_eq(verb, EL_STR(""))) { parts = native_list_append(parts, verb); } if (!str_eq(obj, EL_STR(""))) { parts = native_list_append(parts, obj); } return str_join(parts, EL_STR(" ")); } if (str_eq(order, EL_STR("SOV"))) { if (!str_eq(subj, EL_STR(""))) { parts = native_list_append(parts, subj); } if (!str_eq(obj, EL_STR(""))) { parts = native_list_append(parts, obj); } if (!str_eq(verb, EL_STR(""))) { parts = native_list_append(parts, verb); } return str_join(parts, EL_STR(" ")); } if (str_eq(order, EL_STR("VSO"))) { if (!str_eq(verb, EL_STR(""))) { parts = native_list_append(parts, verb); } if (!str_eq(subj, EL_STR(""))) { parts = native_list_append(parts, subj); } if (!str_eq(obj, EL_STR(""))) { parts = native_list_append(parts, obj); } return str_join(parts, EL_STR(" ")); } if (str_eq(order, EL_STR("VOS"))) { if (!str_eq(verb, EL_STR(""))) { parts = native_list_append(parts, verb); } if (!str_eq(obj, EL_STR(""))) { parts = native_list_append(parts, obj); } if (!str_eq(subj, EL_STR(""))) { parts = native_list_append(parts, subj); } return str_join(parts, EL_STR(" ")); } if (str_eq(order, EL_STR("OVS"))) { if (!str_eq(obj, EL_STR(""))) { parts = native_list_append(parts, obj); } if (!str_eq(verb, EL_STR(""))) { parts = native_list_append(parts, verb); } if (!str_eq(subj, EL_STR(""))) { parts = native_list_append(parts, subj); } return str_join(parts, EL_STR(" ")); } if (str_eq(order, EL_STR("OSV"))) { if (!str_eq(obj, EL_STR(""))) { parts = native_list_append(parts, obj); } if (!str_eq(subj, EL_STR(""))) { parts = native_list_append(parts, subj); } if (!str_eq(verb, EL_STR(""))) { parts = native_list_append(parts, verb); } return str_join(parts, EL_STR(" ")); } if (!str_eq(subj, EL_STR(""))) { parts = native_list_append(parts, subj); } if (!str_eq(verb, EL_STR(""))) { parts = native_list_append(parts, verb); } if (!str_eq(obj, EL_STR(""))) { parts = native_list_append(parts, obj); } return str_join(parts, EL_STR(" ")); return 0; } el_val_t gram_build_vp(el_val_t verb, el_val_t aux, el_val_t profile) { if (str_eq(aux, EL_STR(""))) { return verb; } return el_str_concat(el_str_concat(aux, EL_STR(" ")), verb); return 0; } el_val_t gram_question_strategy(el_val_t profile) { el_val_t code = lang_get(profile, EL_STR("code")); if (str_eq(code, EL_STR("en"))) { return EL_STR("do-support"); } if (str_eq(code, EL_STR("ja"))) { return EL_STR("particle"); } if (str_eq(code, EL_STR("zh"))) { return EL_STR("intonation"); } if (str_eq(code, EL_STR("es"))) { return EL_STR("intonation"); } if (str_eq(code, EL_STR("fr"))) { return EL_STR("inversion"); } if (str_eq(code, EL_STR("de"))) { return EL_STR("inversion"); } if (str_eq(code, EL_STR("ar"))) { return EL_STR("intonation"); } if (str_eq(code, EL_STR("hi"))) { return EL_STR("particle"); } if (str_eq(code, EL_STR("ru"))) { return EL_STR("intonation"); } if (str_eq(code, EL_STR("fi"))) { return EL_STR("particle"); } if (str_eq(code, EL_STR("sw"))) { return EL_STR("intonation"); } if (str_eq(code, EL_STR("la"))) { return EL_STR("intonation"); } if (str_eq(code, EL_STR("he"))) { return EL_STR("intonation"); } if (str_eq(code, EL_STR("grc"))) { return EL_STR("intonation"); } if (str_eq(code, EL_STR("ang"))) { return EL_STR("intonation"); } if (str_eq(code, EL_STR("sa"))) { return EL_STR("intonation"); } if (str_eq(code, EL_STR("got"))) { return EL_STR("intonation"); } if (str_eq(code, EL_STR("non"))) { return EL_STR("intonation"); } if (str_eq(code, EL_STR("enm"))) { return EL_STR("do-support"); } if (str_eq(code, EL_STR("pi"))) { return EL_STR("intonation"); } return EL_STR("intonation"); return 0; } el_val_t is_pronoun(el_val_t word) { if (str_eq(word, EL_STR("I"))) { return 1; } if (str_eq(word, EL_STR("you"))) { return 1; } if (str_eq(word, EL_STR("he"))) { return 1; } if (str_eq(word, EL_STR("she"))) { return 1; } if (str_eq(word, EL_STR("it"))) { return 1; } if (str_eq(word, EL_STR("we"))) { return 1; } if (str_eq(word, EL_STR("they"))) { return 1; } if (str_eq(word, EL_STR("me"))) { return 1; } if (str_eq(word, EL_STR("him"))) { return 1; } if (str_eq(word, EL_STR("her"))) { return 1; } if (str_eq(word, EL_STR("us"))) { return 1; } if (str_eq(word, EL_STR("them"))) { return 1; } return 0; return 0; } el_val_t build_np(el_val_t referent, el_val_t slots) { if (is_pronoun(referent)) { return make_node1(EL_STR("NP"), make_leaf(EL_STR("Pron"), referent)); } el_val_t parts = str_split(referent, EL_STR(" ")); el_val_t np = native_list_len(parts); if (np == 1) { return make_node1(EL_STR("NP"), make_leaf(EL_STR("N"), referent)); } if (np == 2) { el_val_t det = native_list_get(parts, 0); el_val_t noun = native_list_get(parts, 1); return make_node2(EL_STR("NP"), make_leaf(EL_STR("Det"), det), make_leaf(EL_STR("N"), noun)); } if (np == 3) { el_val_t det = native_list_get(parts, 0); el_val_t adj = native_list_get(parts, 1); el_val_t noun = native_list_get(parts, 2); return make_node3(EL_STR("NP"), make_leaf(EL_STR("Det"), det), make_leaf(EL_STR("Adj"), adj), make_leaf(EL_STR("N"), noun)); } return make_node1(EL_STR("NP"), make_leaf(EL_STR("N"), referent)); return 0; } el_val_t build_pp(el_val_t loc) { el_val_t parts = str_split(loc, EL_STR(" ")); el_val_t n = native_list_len(parts); if (n < 2) { return make_leaf(EL_STR("PP"), loc); } el_val_t prep = native_list_get(parts, 0); el_val_t np_parts = native_list_empty(); el_val_t i = 1; while (i < n) { np_parts = native_list_append(np_parts, native_list_get(parts, i)); i = (i + 1); } el_val_t np_str = str_join(np_parts, EL_STR(" ")); el_val_t np_tree = build_np(np_str, native_list_empty()); return make_node2(EL_STR("PP"), make_leaf(EL_STR("P"), prep), np_tree); return 0; } el_val_t build_vp_body(el_val_t slots) { el_val_t verb_surf = slots_get(slots, EL_STR("verb_surf")); el_val_t patient = slots_get(slots, EL_STR("patient")); el_val_t loc = slots_get(slots, EL_STR("location")); if (!str_eq(patient, EL_STR(""))) { el_val_t obj_np = build_np(patient, slots); if (!str_eq(loc, EL_STR(""))) { el_val_t pp = build_pp(loc); return make_node3(EL_STR("VP"), make_leaf(EL_STR("V"), verb_surf), obj_np, pp); } return make_node2(EL_STR("VP"), make_leaf(EL_STR("V"), verb_surf), obj_np); } if (!str_eq(loc, EL_STR(""))) { el_val_t pp = build_pp(loc); return make_node2(EL_STR("VP"), make_leaf(EL_STR("V"), verb_surf), pp); } return make_node1(EL_STR("VP"), make_leaf(EL_STR("V"), verb_surf)); return 0; } el_val_t build_vp_from_slots(el_val_t slots) { el_val_t aux_surf = slots_get(slots, EL_STR("aux_surf")); if (!str_eq(aux_surf, EL_STR(""))) { el_val_t verb_surf = slots_get(slots, EL_STR("verb_surf")); el_val_t patient = slots_get(slots, EL_STR("patient")); el_val_t loc = slots_get(slots, EL_STR("location")); if (!str_eq(patient, EL_STR(""))) { el_val_t obj_np = build_np(patient, slots); return make_node3(EL_STR("VP"), make_leaf(EL_STR("Aux"), aux_surf), make_leaf(EL_STR("V"), verb_surf), obj_np); } return make_node2(EL_STR("VP"), make_leaf(EL_STR("Aux"), aux_surf), make_leaf(EL_STR("V"), verb_surf)); } return build_vp_body(slots); return 0; } el_val_t generate_tree(el_val_t rule_id_str, el_val_t slots) { el_val_t rule = find_rule(rule_id_str); el_val_t n = native_list_len(rule); if (n == 0) { return make_leaf(EL_STR("ERR"), EL_STR("unknown-rule")); } el_val_t lhs = native_list_get(rule, 1); if (str_eq(rule_id_str, EL_STR("S-DECL"))) { el_val_t agent = slots_get(slots, EL_STR("agent")); el_val_t np_tree = build_np(agent, slots); el_val_t vp_tree = build_vp_from_slots(slots); return make_node2(EL_STR("S"), np_tree, vp_tree); } if (str_eq(rule_id_str, EL_STR("S-QUEST"))) { el_val_t agent = slots_get(slots, EL_STR("agent")); el_val_t np_tree = build_np(agent, slots); el_val_t vp_tree = build_vp_body(slots); el_val_t aux_surf = slots_get(slots, EL_STR("aux_surf")); return make_node3(EL_STR("S"), make_leaf(EL_STR("Aux"), aux_surf), np_tree, vp_tree); } if (str_eq(rule_id_str, EL_STR("S-IMP"))) { el_val_t vp_tree = build_vp_from_slots(slots); return make_node1(EL_STR("S"), vp_tree); } return make_leaf(lhs, EL_STR("?")); return 0; }