3a83b6eb80
24 new functions covering counting (str_count, str_count_chars, str_count_bytes, str_count_lines, str_count_words, str_count_letters, str_count_digits), finding (str_index_of_all, str_last_index_of, str_find_chars), transforming (str_repeat, str_reverse, str_strip_prefix/suffix/chars, str_lstrip, str_rstrip), character classification (is_letter, is_digit, is_alphanumeric, is_whitespace, is_punctuation, is_uppercase, is_lowercase), and splitting/joining (str_split_lines, str_split_chars, str_split_n, str_join). Phase 1 is byte-level + ASCII character classes. Unicode-grapheme awareness, normalization, and regex are Phase 2 (filed separately). Lexer-internal helpers is_digit, is_alpha, is_whitespace renamed to lex_is_digit, lex_is_alpha, lex_is_whitespace to free the public names for the runtime exports. The El compiler's lexer.el and the bundled elc-combined.el both updated. Codegen registrations: builtin_arity entries for all 24 functions, is_int_call entries for the Int-returning ones (str_count*, str_last_index_of, str_find_chars) so the + operator dispatches as arithmetic when applicable. Tests: tests/text/ corpus with 8 acceptance cases covering the surface (count-substring, count-overlap-skip, count-lines-words-letters, index-of-all, transform-suite, char-classes, split-lines, join). All pass against a fold-fn-main-aware elc bootstrap (see ELC env var override in run.sh). Self-host fixed point: elc-combined.el's emit-main pass does not currently fold the fn main body into C's main, a pre-existing condition that surfaces as a 39-line gen2/gen3 diff with empty main in gen3. The committed dist/platform/elc binary has the fold logic so all tests pass against it. Filing the elc-combined fold-fn-main fix separately. This commit does not introduce new self-host drift.
88 lines
2.7 KiB
Bash
Executable File
88 lines
2.7 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# run.sh — build and execute the text/ acceptance corpus.
|
|
#
|
|
# Each examples/<case>.el is a self-contained El program with a fn main()
|
|
# that prints a single deterministic result line. The runner compiles each
|
|
# via the canonical native elc, links it against the shared C runtime, runs
|
|
# it, and asserts the output matches the expected value.
|
|
|
|
set -uo pipefail
|
|
cd "$(dirname "$0")"
|
|
|
|
EL_HOME="${EL_HOME:-$(cd ../.. && pwd)}"
|
|
ELC="${ELC:-${EL_HOME}/dist/platform/elc}"
|
|
RUNTIME_DIR="${EL_HOME}/el-compiler/runtime"
|
|
|
|
if [ ! -x "${ELC}" ]; then
|
|
echo "elc not found at ${ELC}" >&2
|
|
exit 1
|
|
fi
|
|
|
|
PASS=0
|
|
FAIL=0
|
|
FAILED_NAMES=()
|
|
|
|
run_runtime_case() {
|
|
local name="$1"
|
|
local src="$2"
|
|
local expected="$3"
|
|
|
|
local out_c
|
|
local out_bin
|
|
out_c="$(mktemp -t text_test.XXXXXX).c"
|
|
out_bin="$(mktemp -t text_test.XXXXXX)"
|
|
|
|
if ! "${ELC}" "${src}" > "${out_c}" 2>/tmp/text_test.elc.err; then
|
|
echo "FAIL ${name} — elc emit failed:"
|
|
cat /tmp/text_test.elc.err | sed 's/^/ /'
|
|
FAIL=$((FAIL+1))
|
|
FAILED_NAMES+=("${name}")
|
|
rm -f "${out_c}" "${out_bin}"
|
|
return
|
|
fi
|
|
|
|
if ! cc -O2 -I "${RUNTIME_DIR}" "${out_c}" "${RUNTIME_DIR}/el_runtime.c" \
|
|
-lcurl -lpthread -o "${out_bin}" 2>/tmp/text_test.cc.err; then
|
|
echo "FAIL ${name} — cc failed:"
|
|
cat /tmp/text_test.cc.err | sed 's/^/ /'
|
|
FAIL=$((FAIL+1))
|
|
FAILED_NAMES+=("${name}")
|
|
rm -f "${out_c}" "${out_bin}"
|
|
return
|
|
fi
|
|
|
|
local got
|
|
got="$("${out_bin}" 2>&1)"
|
|
|
|
if [ "${got}" = "${expected}" ]; then
|
|
echo "PASS ${name}"
|
|
PASS=$((PASS+1))
|
|
else
|
|
echo "FAIL ${name} expected: '${expected}', got: '${got}'"
|
|
FAIL=$((FAIL+1))
|
|
FAILED_NAMES+=("${name}")
|
|
fi
|
|
|
|
rm -f "${out_c}" "${out_bin}"
|
|
}
|
|
|
|
echo "==> Running text-primitives acceptance corpus"
|
|
echo
|
|
|
|
run_runtime_case "count-substring" examples/count-substring.el "3"
|
|
run_runtime_case "count-overlap-skip" examples/count-overlap-skip.el "2"
|
|
run_runtime_case "count-lines-words-letters" examples/count-lines-words-letters.el "2/4/22"
|
|
run_runtime_case "index-of-all" examples/index-of-all.el "2,5,8"
|
|
run_runtime_case "transform-suite" examples/transform-suite.el "ababab|olleh|bar|hello|hello"
|
|
run_runtime_case "char-classes" examples/char-classes.el "true"
|
|
run_runtime_case "split-lines" examples/split-lines.el "3"
|
|
run_runtime_case "join" examples/join.el "alpha, beta, gamma"
|
|
|
|
echo
|
|
echo "${PASS} passed, ${FAIL} failed"
|
|
if [ "${FAIL}" -gt 0 ]; then
|
|
echo "failed: ${FAILED_NAMES[*]}"
|
|
exit 1
|
|
fi
|
|
exit 0
|