Compare commits
14 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| e858eab300 | |||
| aa7d97d5ba | |||
| 7040830470 | |||
| 3a513aaa5a | |||
| beb2a8c5bd | |||
| e23319fe0b | |||
| 01fee9396a | |||
| 7b60d94b8a | |||
| 21694b79d2 | |||
| 422442b14e | |||
| 437ba0a4dd | |||
| 7376349124 | |||
| 0f1da43a97 | |||
| a54b2bebf9 |
+37
-248
@@ -1,4 +1,4 @@
|
||||
name: El SDK CI - dev
|
||||
name: El CI — dev
|
||||
|
||||
on:
|
||||
push:
|
||||
@@ -11,9 +11,6 @@ on:
|
||||
jobs:
|
||||
build-and-test:
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
working-directory: lang
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
@@ -22,291 +19,83 @@ jobs:
|
||||
- name: Install build dependencies
|
||||
run: |
|
||||
apt-get update -qq
|
||||
apt-get install -y gcc libcurl4-openssl-dev apt-transport-https ca-certificates
|
||||
echo "deb [trusted=yes] https://packages.cloud.google.com/apt cloud-sdk main" \
|
||||
> /etc/apt/sources.list.d/google-cloud-sdk.list
|
||||
apt-get update -qq && apt-get install -y google-cloud-cli
|
||||
apt-get install -y gcc libcurl4-openssl-dev
|
||||
|
||||
# Seed: use the committed linux-amd64 binary as the bootstrap
|
||||
- name: Bootstrap from committed linux binary (seed)
|
||||
# Gen2: compile the bootstrap C source into a working elc binary
|
||||
- name: Build elc from bootstrap (gen2)
|
||||
run: |
|
||||
chmod +x dist/platform/elc-linux-amd64
|
||||
echo "seed elc (committed linux-amd64 binary)"
|
||||
dist/platform/elc-linux-amd64 --version || true
|
||||
|
||||
# Gen2: use seed to self-host compile the El compiler
|
||||
- name: Self-host compile El compiler (gen2)
|
||||
run: |
|
||||
dist/platform/elc-linux-amd64 elc-cli.el > dist/elc-gen2.c
|
||||
gcc -O2 \
|
||||
-I el-compiler/runtime \
|
||||
dist/elc-gen2.c \
|
||||
dist/elc-bootstrap.c \
|
||||
el-compiler/runtime/el_runtime.c \
|
||||
-lcurl -lssl -lcrypto -lpthread -lm \
|
||||
-lcurl -lpthread \
|
||||
-o dist/elc-gen2
|
||||
chmod +x dist/elc-gen2
|
||||
echo "gen2 elc built"
|
||||
dist/elc-gen2 --version || true
|
||||
|
||||
# Gen3: use gen2 to compile the El compiler from its own El source (self-host)
|
||||
- name: Self-host: compile El compiler with gen2 (gen3)
|
||||
run: |
|
||||
mkdir -p dist/platform
|
||||
dist/elc-gen2 el-compiler/src/compiler.el > dist/elc-gen3.c
|
||||
gcc -O2 \
|
||||
-I el-compiler/runtime \
|
||||
dist/elc-gen3.c \
|
||||
el-compiler/runtime/el_runtime.c \
|
||||
-lcurl -lpthread \
|
||||
-o dist/platform/elc
|
||||
chmod +x dist/platform/elc
|
||||
echo "gen2 (self-hosted) elc built"
|
||||
echo "gen3 (self-hosted) elc built"
|
||||
dist/platform/elc --version || true
|
||||
|
||||
# Build elb (needed for Artifact Registry publish and downstream CI)
|
||||
- name: Build elb
|
||||
run: |
|
||||
mkdir -p dist/bin
|
||||
dist/platform/elc elb.el > dist/elb.c
|
||||
gcc -O2 \
|
||||
-I el-compiler/runtime \
|
||||
dist/elb.c \
|
||||
el-compiler/runtime/el_runtime.c \
|
||||
-lcurl -lssl -lcrypto -lpthread -lm \
|
||||
-o dist/bin/elb
|
||||
chmod +x dist/bin/elb
|
||||
echo "elb built"
|
||||
|
||||
- name: Run tests - text
|
||||
# Run all four test suites — all must pass
|
||||
- name: Run tests — text
|
||||
run: |
|
||||
ELC="$(pwd)/dist/platform/elc" \
|
||||
EL_HOME="$(pwd)" \
|
||||
bash tests/text/run.sh
|
||||
|
||||
- name: Run tests - calendar
|
||||
- name: Run tests — calendar
|
||||
run: |
|
||||
ELC="$(pwd)/dist/platform/elc" \
|
||||
EL_HOME="$(pwd)" \
|
||||
bash tests/calendar/run.sh
|
||||
|
||||
- name: Run tests - time
|
||||
- name: Run tests — time
|
||||
run: |
|
||||
ELC="$(pwd)/dist/platform/elc" \
|
||||
EL_HOME="$(pwd)" \
|
||||
bash tests/time/run.sh
|
||||
|
||||
- name: Run tests - html_sanitizer
|
||||
- name: Run tests — html_sanitizer
|
||||
run: |
|
||||
ELC="$(pwd)/dist/platform/elc" \
|
||||
EL_HOME="$(pwd)" \
|
||||
bash tests/html_sanitizer/run.sh
|
||||
|
||||
# Native El test suites (elc --test, compile-link-run)
|
||||
# el_runtime.c is precompiled to .o once and reused by all 8 modules.
|
||||
- name: Precompile el_runtime.o
|
||||
run: |
|
||||
set -euo pipefail
|
||||
RUNTIME="$(pwd)/el-compiler/runtime"
|
||||
gcc -O2 -c -I "$RUNTIME" "$RUNTIME/el_runtime.c" \
|
||||
-o /tmp/el_runtime.o
|
||||
echo "el_runtime.o compiled"
|
||||
|
||||
- name: Run tests - native (core)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
ELC="$(pwd)/dist/platform/elc"
|
||||
RUNTIME="$(pwd)/el-compiler/runtime"
|
||||
"$ELC" --test tests/native/test_core.el > /tmp/el_native_core.c
|
||||
gcc -O2 -I "$RUNTIME" /tmp/el_native_core.c /tmp/el_runtime.o \
|
||||
-lcurl -lssl -lcrypto -lpthread -lm -o /tmp/el_native_core
|
||||
/tmp/el_native_core
|
||||
|
||||
- name: Run tests - native (text)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
ELC="$(pwd)/dist/platform/elc"
|
||||
RUNTIME="$(pwd)/el-compiler/runtime"
|
||||
"$ELC" --test tests/native/test_text.el > /tmp/el_native_text.c
|
||||
gcc -O2 -I "$RUNTIME" /tmp/el_native_text.c /tmp/el_runtime.o \
|
||||
-lcurl -lssl -lcrypto -lpthread -lm -o /tmp/el_native_text
|
||||
/tmp/el_native_text
|
||||
|
||||
- name: Run tests - native (string)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
ELC="$(pwd)/dist/platform/elc"
|
||||
RUNTIME="$(pwd)/el-compiler/runtime"
|
||||
"$ELC" --test tests/native/test_string.el > /tmp/el_native_string.c
|
||||
gcc -O2 -I "$RUNTIME" /tmp/el_native_string.c /tmp/el_runtime.o \
|
||||
-lcurl -lssl -lcrypto -lpthread -lm -o /tmp/el_native_string
|
||||
/tmp/el_native_string
|
||||
|
||||
- name: Run tests - native (math)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
ELC="$(pwd)/dist/platform/elc"
|
||||
RUNTIME="$(pwd)/el-compiler/runtime"
|
||||
"$ELC" --test tests/native/test_math.el > /tmp/el_native_math.c
|
||||
gcc -O2 -I "$RUNTIME" /tmp/el_native_math.c /tmp/el_runtime.o \
|
||||
-lcurl -lssl -lcrypto -lpthread -lm -o /tmp/el_native_math
|
||||
/tmp/el_native_math
|
||||
|
||||
- name: Run tests - native (state)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
ELC="$(pwd)/dist/platform/elc"
|
||||
RUNTIME="$(pwd)/el-compiler/runtime"
|
||||
"$ELC" --test tests/native/test_state.el > /tmp/el_native_state.c
|
||||
gcc -O2 -I "$RUNTIME" /tmp/el_native_state.c /tmp/el_runtime.o \
|
||||
-lcurl -lssl -lcrypto -lpthread -lm -o /tmp/el_native_state
|
||||
/tmp/el_native_state
|
||||
|
||||
- name: Run tests - native (time)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
ELC="$(pwd)/dist/platform/elc"
|
||||
RUNTIME="$(pwd)/el-compiler/runtime"
|
||||
"$ELC" --test tests/native/test_time.el > /tmp/el_native_time.c
|
||||
gcc -O2 -I "$RUNTIME" /tmp/el_native_time.c /tmp/el_runtime.o \
|
||||
-lcurl -lssl -lcrypto -lpthread -lm -o /tmp/el_native_time
|
||||
/tmp/el_native_time
|
||||
|
||||
- name: Run tests - native (json)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
ELC="$(pwd)/dist/platform/elc"
|
||||
RUNTIME="$(pwd)/el-compiler/runtime"
|
||||
"$ELC" --test tests/native/test_json.el > /tmp/el_native_json.c
|
||||
gcc -O2 -I "$RUNTIME" /tmp/el_native_json.c /tmp/el_runtime.o \
|
||||
-lcurl -lssl -lcrypto -lpthread -lm -o /tmp/el_native_json
|
||||
/tmp/el_native_json
|
||||
|
||||
- name: Run tests - native (env)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
ELC="$(pwd)/dist/platform/elc"
|
||||
RUNTIME="$(pwd)/el-compiler/runtime"
|
||||
"$ELC" --test tests/native/test_env.el > /tmp/el_native_env.c
|
||||
gcc -O2 -I "$RUNTIME" /tmp/el_native_env.c /tmp/el_runtime.o \
|
||||
-lcurl -lssl -lcrypto -lpthread -lm -o /tmp/el_native_env
|
||||
/tmp/el_native_env
|
||||
|
||||
- name: Run tests - native (fs)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
ELC="$(pwd)/dist/platform/elc"
|
||||
RUNTIME="$(pwd)/el-compiler/runtime"
|
||||
"$ELC" --test tests/native/test_fs.el > /tmp/el_native_fs.c
|
||||
gcc -O2 -I "$RUNTIME" /tmp/el_native_fs.c /tmp/el_runtime.o \
|
||||
-lcurl -lssl -lcrypto -lpthread -lm -o /tmp/el_native_fs
|
||||
/tmp/el_native_fs
|
||||
|
||||
# Build epm binary using elb (epm lives at repo root, not inside lang/)
|
||||
- name: Build epm
|
||||
run: |
|
||||
ABS_ELB="$(pwd)/dist/bin/elb"
|
||||
ABS_ELC="$(pwd)/dist/platform/elc"
|
||||
ABS_RUNTIME="$(pwd)/el-compiler/runtime"
|
||||
ABS_OUT="$(pwd)/dist/bin"
|
||||
(cd ../epm && "$ABS_ELB" --clean --elc="$ABS_ELC" --runtime="$ABS_RUNTIME" --out="$ABS_OUT")
|
||||
chmod +x dist/bin/epm
|
||||
echo "epm built"
|
||||
|
||||
# Build el-install binary using elb
|
||||
- name: Build el-install
|
||||
run: |
|
||||
ABS_ELB="$(pwd)/dist/bin/elb"
|
||||
ABS_ELC="$(pwd)/dist/platform/elc"
|
||||
ABS_RUNTIME="$(pwd)/el-compiler/runtime"
|
||||
ABS_OUT="$(pwd)/dist/bin"
|
||||
(cd tools/install && "$ABS_ELB" --clean --elc="$ABS_ELC" --runtime="$ABS_RUNTIME" --out="$ABS_OUT")
|
||||
chmod +x dist/bin/el-install
|
||||
echo "el-install built"
|
||||
|
||||
# Publish only after merge (push event), not on PR validation runs
|
||||
- name: Publish El SDK to Artifact Registry (dev)
|
||||
if: github.event_name == 'push'
|
||||
# Publish artifact to GCP Artifact Registry (dev)
|
||||
- name: Publish elc to Artifact Registry (dev)
|
||||
env:
|
||||
GCP_SA_KEY: ${{ secrets.GCP_SA_KEY }}
|
||||
run: |
|
||||
echo "${GCP_SA_KEY}" > /tmp/gcp-key.json
|
||||
apt-get install -y -qq apt-transport-https ca-certificates gnupg curl
|
||||
curl -fsSL https://packages.cloud.google.com/apt/doc/apt-key.gpg | gpg --dearmor -o /usr/share/keyrings/cloud.google.gpg
|
||||
echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" > /etc/apt/sources.list.d/google-cloud-sdk.list
|
||||
apt-get update -qq && apt-get install -y google-cloud-cli
|
||||
gcloud auth activate-service-account --key-file=/tmp/gcp-key.json
|
||||
gcloud config set project neuron-785695
|
||||
|
||||
VERSION="${GITHUB_SHA:0:8}"
|
||||
|
||||
VERSION="${GITEA_SHA:0:8}"
|
||||
gcloud artifacts generic upload \
|
||||
--repository=foundation-dev \
|
||||
--location=us-central1 \
|
||||
--project=neuron-785695 \
|
||||
--package=el-elc \
|
||||
--package=el/elc \
|
||||
--version="${VERSION}" \
|
||||
--source=dist/platform/elc
|
||||
|
||||
gcloud artifacts generic upload \
|
||||
--repository=foundation-dev \
|
||||
--location=us-central1 \
|
||||
--project=neuron-785695 \
|
||||
--package=el-elb \
|
||||
--version="${VERSION}" \
|
||||
--source=dist/bin/elb
|
||||
|
||||
gcloud artifacts generic upload \
|
||||
--repository=foundation-dev \
|
||||
--location=us-central1 \
|
||||
--project=neuron-785695 \
|
||||
--package=el-runtime-c \
|
||||
--version="${VERSION}" \
|
||||
--source=el-compiler/runtime/el_runtime.c
|
||||
|
||||
gcloud artifacts generic upload \
|
||||
--repository=foundation-dev \
|
||||
--location=us-central1 \
|
||||
--project=neuron-785695 \
|
||||
--package=el-runtime-h \
|
||||
--version="${VERSION}" \
|
||||
--source=el-compiler/runtime/el_runtime.h
|
||||
|
||||
gcloud artifacts generic upload \
|
||||
--repository=foundation-dev \
|
||||
--location=us-central1 \
|
||||
--project=neuron-785695 \
|
||||
--package=el-runtime-js \
|
||||
--version="${VERSION}" \
|
||||
--source=el-compiler/runtime/el_runtime.js
|
||||
|
||||
echo "Published El SDK version=${VERSION} to foundation-dev"
|
||||
# Keep key alive for the ci-base rebuild step below
|
||||
# (deleted in that step after docker push)
|
||||
|
||||
- name: Rebuild ci-base with fresh El SDK (dev)
|
||||
# Patches ci-base:dev in-place: pulls the existing image (which has all
|
||||
# system deps — Node, Go, gcloud, Docker CLI, etc.) and overlays the freshly
|
||||
# built El SDK on top. Keeps the full ci-base rebuild fast and incremental.
|
||||
if: github.event_name == 'push'
|
||||
env:
|
||||
GCP_SA_KEY: ${{ secrets.GCP_SA_KEY }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
CI_BASE="us-central1-docker.pkg.dev/neuron-785695/neuron-ci/ci-base"
|
||||
SHA="${GITHUB_SHA:0:8}"
|
||||
|
||||
echo "${GCP_SA_KEY}" > /tmp/gcp-key.json
|
||||
gcloud auth activate-service-account --key-file=/tmp/gcp-key.json
|
||||
gcloud config set project neuron-785695
|
||||
gcloud auth configure-docker us-central1-docker.pkg.dev --quiet
|
||||
|
||||
# Pull existing ci-base:dev (or fall back to :latest on first run)
|
||||
BASE_TAG="dev"
|
||||
docker pull "${CI_BASE}:dev" || { docker pull "${CI_BASE}:latest" && BASE_TAG="latest"; }
|
||||
|
||||
# Inline Dockerfile — only replaces the El SDK layer
|
||||
cat > /tmp/Dockerfile.ci-base-patch << 'EOF'
|
||||
ARG BASE
|
||||
FROM ${BASE}
|
||||
COPY dist/platform/elc /opt/el/dist/platform/elc
|
||||
COPY dist/bin/elb /opt/el/dist/bin/elb
|
||||
COPY el-compiler/runtime/el_runtime.c /opt/el/el-compiler/runtime/el_runtime.c
|
||||
COPY el-compiler/runtime/el_runtime.h /opt/el/el-compiler/runtime/el_runtime.h
|
||||
COPY el-compiler/runtime/el_runtime.js /opt/el/el-compiler/runtime/el_runtime.js
|
||||
RUN chmod +x /opt/el/dist/platform/elc /opt/el/dist/bin/elb
|
||||
EOF
|
||||
|
||||
docker build \
|
||||
--build-arg BASE="${CI_BASE}:${BASE_TAG}" \
|
||||
--build-arg BUILDKIT_INLINE_CACHE=1 \
|
||||
-f /tmp/Dockerfile.ci-base-patch \
|
||||
-t "${CI_BASE}:dev" \
|
||||
-t "${CI_BASE}:dev-${SHA}" \
|
||||
.
|
||||
|
||||
docker push "${CI_BASE}:dev"
|
||||
docker push "${CI_BASE}:dev-${SHA}"
|
||||
|
||||
echo "ci-base rebuilt: ${CI_BASE}:dev (${SHA})"
|
||||
# Also tag as latest-dev
|
||||
echo "Published elc version=${VERSION} to foundation-dev/el/elc"
|
||||
rm -f /tmp/gcp-key.json
|
||||
|
||||
+34
-230
@@ -1,4 +1,4 @@
|
||||
name: El SDK CI - stage
|
||||
name: El CI — stage
|
||||
|
||||
on:
|
||||
push:
|
||||
@@ -11,286 +11,90 @@ on:
|
||||
jobs:
|
||||
build-and-test:
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
working-directory: lang
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Enforce source branch (stage <- dev only)
|
||||
if: github.event_name == 'pull_request'
|
||||
run: |
|
||||
SOURCE="${GITHUB_HEAD_REF}"
|
||||
if [ "${SOURCE}" != "dev" ]; then
|
||||
echo "ERROR: Stage branch only accepts PRs from 'dev'. Source was: '${SOURCE}'"
|
||||
exit 1
|
||||
fi
|
||||
echo "Source branch check passed: ${SOURCE} -> stage"
|
||||
|
||||
- name: Install build dependencies
|
||||
run: |
|
||||
apt-get update -qq
|
||||
apt-get install -y gcc libcurl4-openssl-dev
|
||||
|
||||
# Seed: use the committed linux-amd64 binary as the bootstrap
|
||||
- name: Bootstrap from committed linux binary (seed)
|
||||
# Gen2: compile the bootstrap C source into a working elc binary
|
||||
- name: Build elc from bootstrap (gen2)
|
||||
run: |
|
||||
chmod +x dist/platform/elc-linux-amd64
|
||||
echo "seed elc (committed linux-amd64 binary)"
|
||||
dist/platform/elc-linux-amd64 --version || true
|
||||
|
||||
# Gen2: use seed to self-host compile the El compiler
|
||||
- name: Self-host compile El compiler (gen2)
|
||||
run: |
|
||||
dist/platform/elc-linux-amd64 elc-cli.el > dist/elc-gen2.c
|
||||
gcc -O2 \
|
||||
-I el-compiler/runtime \
|
||||
dist/elc-gen2.c \
|
||||
dist/elc-bootstrap.c \
|
||||
el-compiler/runtime/el_runtime.c \
|
||||
-lcurl -lssl -lcrypto -lpthread -lm \
|
||||
-lcurl -lpthread \
|
||||
-o dist/elc-gen2
|
||||
chmod +x dist/elc-gen2
|
||||
echo "gen2 elc built"
|
||||
dist/elc-gen2 --version || true
|
||||
|
||||
# Gen3: use gen2 to compile the El compiler from its own El source (self-host)
|
||||
- name: Self-host: compile El compiler with gen2 (gen3)
|
||||
run: |
|
||||
mkdir -p dist/platform
|
||||
dist/elc-gen2 el-compiler/src/compiler.el > dist/elc-gen3.c
|
||||
gcc -O2 \
|
||||
-I el-compiler/runtime \
|
||||
dist/elc-gen3.c \
|
||||
el-compiler/runtime/el_runtime.c \
|
||||
-lcurl -lpthread \
|
||||
-o dist/platform/elc
|
||||
chmod +x dist/platform/elc
|
||||
echo "gen2 (self-hosted) elc built"
|
||||
echo "gen3 (self-hosted) elc built"
|
||||
dist/platform/elc --version || true
|
||||
|
||||
- name: Run tests - text
|
||||
# Run all four test suites — all must pass
|
||||
- name: Run tests — text
|
||||
run: |
|
||||
ELC="$(pwd)/dist/platform/elc" \
|
||||
EL_HOME="$(pwd)" \
|
||||
bash tests/text/run.sh
|
||||
|
||||
- name: Run tests - calendar
|
||||
- name: Run tests — calendar
|
||||
run: |
|
||||
ELC="$(pwd)/dist/platform/elc" \
|
||||
EL_HOME="$(pwd)" \
|
||||
bash tests/calendar/run.sh
|
||||
|
||||
- name: Run tests - time
|
||||
- name: Run tests — time
|
||||
run: |
|
||||
ELC="$(pwd)/dist/platform/elc" \
|
||||
EL_HOME="$(pwd)" \
|
||||
bash tests/time/run.sh
|
||||
|
||||
- name: Run tests - html_sanitizer
|
||||
- name: Run tests — html_sanitizer
|
||||
run: |
|
||||
ELC="$(pwd)/dist/platform/elc" \
|
||||
EL_HOME="$(pwd)" \
|
||||
bash tests/html_sanitizer/run.sh
|
||||
|
||||
# Native El test suites (elc --test, compile-link-run)
|
||||
- name: Run tests - native (core)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
ELC="$(pwd)/dist/platform/elc"
|
||||
RUNTIME="$(pwd)/el-compiler/runtime"
|
||||
"$ELC" --test tests/native/test_core.el > /tmp/el_native_core.c
|
||||
gcc -O2 -I "$RUNTIME" /tmp/el_native_core.c "$RUNTIME/el_runtime.c" \
|
||||
-lcurl -lssl -lcrypto -lpthread -lm -o /tmp/el_native_core
|
||||
/tmp/el_native_core
|
||||
|
||||
- name: Run tests - native (text)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
ELC="$(pwd)/dist/platform/elc"
|
||||
RUNTIME="$(pwd)/el-compiler/runtime"
|
||||
"$ELC" --test tests/native/test_text.el > /tmp/el_native_text.c
|
||||
gcc -O2 -I "$RUNTIME" /tmp/el_native_text.c "$RUNTIME/el_runtime.c" \
|
||||
-lcurl -lssl -lcrypto -lpthread -lm -o /tmp/el_native_text
|
||||
/tmp/el_native_text
|
||||
|
||||
- name: Run tests - native (string)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
ELC="$(pwd)/dist/platform/elc"
|
||||
RUNTIME="$(pwd)/el-compiler/runtime"
|
||||
"$ELC" --test tests/native/test_string.el > /tmp/el_native_string.c
|
||||
gcc -O2 -I "$RUNTIME" /tmp/el_native_string.c "$RUNTIME/el_runtime.c" \
|
||||
-lcurl -lssl -lcrypto -lpthread -lm -o /tmp/el_native_string
|
||||
/tmp/el_native_string
|
||||
|
||||
- name: Run tests - native (math)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
ELC="$(pwd)/dist/platform/elc"
|
||||
RUNTIME="$(pwd)/el-compiler/runtime"
|
||||
"$ELC" --test tests/native/test_math.el > /tmp/el_native_math.c
|
||||
gcc -O2 -I "$RUNTIME" /tmp/el_native_math.c "$RUNTIME/el_runtime.c" \
|
||||
-lcurl -lssl -lcrypto -lpthread -lm -o /tmp/el_native_math
|
||||
/tmp/el_native_math
|
||||
|
||||
- name: Run tests - native (state)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
ELC="$(pwd)/dist/platform/elc"
|
||||
RUNTIME="$(pwd)/el-compiler/runtime"
|
||||
"$ELC" --test tests/native/test_state.el > /tmp/el_native_state.c
|
||||
gcc -O2 -I "$RUNTIME" /tmp/el_native_state.c "$RUNTIME/el_runtime.c" \
|
||||
-lcurl -lssl -lcrypto -lpthread -lm -o /tmp/el_native_state
|
||||
/tmp/el_native_state
|
||||
|
||||
- name: Run tests - native (time)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
ELC="$(pwd)/dist/platform/elc"
|
||||
RUNTIME="$(pwd)/el-compiler/runtime"
|
||||
"$ELC" --test tests/native/test_time.el > /tmp/el_native_time.c
|
||||
gcc -O2 -I "$RUNTIME" /tmp/el_native_time.c "$RUNTIME/el_runtime.c" \
|
||||
-lcurl -lssl -lcrypto -lpthread -lm -o /tmp/el_native_time
|
||||
/tmp/el_native_time
|
||||
|
||||
- name: Run tests - native (json)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
ELC="$(pwd)/dist/platform/elc"
|
||||
RUNTIME="$(pwd)/el-compiler/runtime"
|
||||
"$ELC" --test tests/native/test_json.el > /tmp/el_native_json.c
|
||||
gcc -O2 -I "$RUNTIME" /tmp/el_native_json.c "$RUNTIME/el_runtime.c" \
|
||||
-lcurl -lssl -lcrypto -lpthread -lm -o /tmp/el_native_json
|
||||
/tmp/el_native_json
|
||||
|
||||
- name: Run tests - native (env)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
ELC="$(pwd)/dist/platform/elc"
|
||||
RUNTIME="$(pwd)/el-compiler/runtime"
|
||||
"$ELC" --test tests/native/test_env.el > /tmp/el_native_env.c
|
||||
gcc -O2 -I "$RUNTIME" /tmp/el_native_env.c "$RUNTIME/el_runtime.c" \
|
||||
-lcurl -lssl -lcrypto -lpthread -lm -o /tmp/el_native_env
|
||||
/tmp/el_native_env
|
||||
|
||||
- name: Run tests - native (fs)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
ELC="$(pwd)/dist/platform/elc"
|
||||
RUNTIME="$(pwd)/el-compiler/runtime"
|
||||
"$ELC" --test tests/native/test_fs.el > /tmp/el_native_fs.c
|
||||
gcc -O2 -I "$RUNTIME" /tmp/el_native_fs.c "$RUNTIME/el_runtime.c" \
|
||||
-lcurl -lssl -lcrypto -lpthread -lm -o /tmp/el_native_fs
|
||||
/tmp/el_native_fs
|
||||
|
||||
# Build elb (needed for epm and el-install builds below)
|
||||
- name: Build elb
|
||||
run: |
|
||||
mkdir -p dist/bin
|
||||
dist/platform/elc elb.el > dist/elb.c
|
||||
gcc -O2 \
|
||||
-I el-compiler/runtime \
|
||||
dist/elb.c \
|
||||
el-compiler/runtime/el_runtime.c \
|
||||
-lcurl -lssl -lcrypto -lpthread -lm \
|
||||
-o dist/bin/elb
|
||||
chmod +x dist/bin/elb
|
||||
echo "elb built"
|
||||
|
||||
# Build epm binary using elb (epm lives at repo root, not inside lang/)
|
||||
- name: Build epm
|
||||
run: |
|
||||
ABS_ELB="$(pwd)/dist/bin/elb"
|
||||
ABS_ELC="$(pwd)/dist/platform/elc"
|
||||
ABS_RUNTIME="$(pwd)/el-compiler/runtime"
|
||||
ABS_OUT="$(pwd)/dist/bin"
|
||||
(cd ../epm && "$ABS_ELB" --clean --elc="$ABS_ELC" --runtime="$ABS_RUNTIME" --out="$ABS_OUT")
|
||||
chmod +x dist/bin/epm
|
||||
echo "epm built"
|
||||
|
||||
# Build el-install binary using elb
|
||||
- name: Build el-install
|
||||
run: |
|
||||
ABS_ELB="$(pwd)/dist/bin/elb"
|
||||
ABS_ELC="$(pwd)/dist/platform/elc"
|
||||
ABS_RUNTIME="$(pwd)/el-compiler/runtime"
|
||||
ABS_OUT="$(pwd)/dist/bin"
|
||||
(cd tools/install && "$ABS_ELB" --clean --elc="$ABS_ELC" --runtime="$ABS_RUNTIME" --out="$ABS_OUT")
|
||||
chmod +x dist/bin/el-install
|
||||
echo "el-install built"
|
||||
|
||||
# Publish only after merge (push event), not on PR validation runs
|
||||
- name: Publish El SDK to Artifact Registry (stage)
|
||||
if: github.event_name == 'push'
|
||||
# Publish artifact to GCP Artifact Registry (stage)
|
||||
- name: Publish elc to Artifact Registry (stage)
|
||||
env:
|
||||
GCP_SA_KEY: ${{ secrets.GCP_SA_KEY }}
|
||||
run: |
|
||||
echo "${GCP_SA_KEY}" > /tmp/gcp-key.json
|
||||
apt-get install -y -qq apt-transport-https ca-certificates curl
|
||||
echo "deb [trusted=yes] https://packages.cloud.google.com/apt cloud-sdk main" > /etc/apt/sources.list.d/google-cloud-sdk.list
|
||||
apt-get install -y -qq apt-transport-https ca-certificates gnupg curl
|
||||
curl -fsSL https://packages.cloud.google.com/apt/doc/apt-key.gpg | gpg --dearmor -o /usr/share/keyrings/cloud.google.gpg
|
||||
echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" > /etc/apt/sources.list.d/google-cloud-sdk.list
|
||||
apt-get update -qq && apt-get install -y google-cloud-cli
|
||||
gcloud auth activate-service-account --key-file=/tmp/gcp-key.json
|
||||
gcloud config set project neuron-785695
|
||||
|
||||
VERSION="${GITHUB_SHA:0:8}"
|
||||
|
||||
VERSION="${GITEA_SHA:0:8}"
|
||||
gcloud artifacts generic upload \
|
||||
--repository=foundation-stage \
|
||||
--location=us-central1 \
|
||||
--project=neuron-785695 \
|
||||
--package=el-elc \
|
||||
--package=el/elc \
|
||||
--version="${VERSION}" \
|
||||
--source=dist/platform/elc
|
||||
|
||||
gcloud artifacts generic upload \
|
||||
--repository=foundation-stage \
|
||||
--location=us-central1 \
|
||||
--project=neuron-785695 \
|
||||
--package=el-runtime-c \
|
||||
--version="${VERSION}" \
|
||||
--source=el-compiler/runtime/el_runtime.c
|
||||
|
||||
gcloud artifacts generic upload \
|
||||
--repository=foundation-stage \
|
||||
--location=us-central1 \
|
||||
--project=neuron-785695 \
|
||||
--package=el-runtime-h \
|
||||
--version="${VERSION}" \
|
||||
--source=el-compiler/runtime/el_runtime.h
|
||||
|
||||
echo "Published El SDK version=${VERSION} to foundation-stage"
|
||||
# Keep key alive for the ci-base rebuild step below
|
||||
# (deleted in that step after docker push)
|
||||
|
||||
- name: Rebuild ci-base with fresh El SDK (stage)
|
||||
# Patches ci-base:stage in-place: pulls the existing image (which has all
|
||||
# system deps — Node, Go, gcloud, Docker CLI, etc.) and overlays the freshly
|
||||
# built El SDK on top. Keeps the full ci-base rebuild fast and incremental.
|
||||
if: github.event_name == 'push'
|
||||
env:
|
||||
GCP_SA_KEY: ${{ secrets.GCP_SA_KEY }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
CI_BASE="us-central1-docker.pkg.dev/neuron-785695/neuron-ci/ci-base"
|
||||
SHA="${GITHUB_SHA:0:8}"
|
||||
|
||||
echo "${GCP_SA_KEY}" > /tmp/gcp-key.json
|
||||
gcloud auth activate-service-account --key-file=/tmp/gcp-key.json
|
||||
gcloud config set project neuron-785695
|
||||
gcloud auth configure-docker us-central1-docker.pkg.dev --quiet
|
||||
|
||||
# Pull existing ci-base:stage (system deps stay cached in the base layer)
|
||||
docker pull "${CI_BASE}:stage" || docker pull "${CI_BASE}:latest"
|
||||
|
||||
# Inline Dockerfile — only replaces the El SDK layer
|
||||
cat > /tmp/Dockerfile.ci-base-patch << 'EOF'
|
||||
ARG BASE
|
||||
FROM ${BASE}
|
||||
COPY dist/platform/elc /opt/el/dist/platform/elc
|
||||
COPY dist/bin/elb /opt/el/dist/bin/elb
|
||||
COPY el-compiler/runtime/el_runtime.c /opt/el/el-compiler/runtime/el_runtime.c
|
||||
COPY el-compiler/runtime/el_runtime.h /opt/el/el-compiler/runtime/el_runtime.h
|
||||
COPY el-compiler/runtime/el_runtime.js /opt/el/el-compiler/runtime/el_runtime.js
|
||||
RUN chmod +x /opt/el/dist/platform/elc /opt/el/dist/bin/elb
|
||||
EOF
|
||||
|
||||
docker build \
|
||||
--build-arg BASE="${CI_BASE}:stage" \
|
||||
--build-arg BUILDKIT_INLINE_CACHE=1 \
|
||||
-f /tmp/Dockerfile.ci-base-patch \
|
||||
-t "${CI_BASE}:stage" \
|
||||
-t "${CI_BASE}:stage-${SHA}" \
|
||||
.
|
||||
|
||||
docker push "${CI_BASE}:stage"
|
||||
docker push "${CI_BASE}:stage-${SHA}"
|
||||
|
||||
echo "ci-base rebuilt: ${CI_BASE}:stage (${SHA})"
|
||||
echo "Published elc version=${VERSION} to foundation-stage/el/elc"
|
||||
rm -f /tmp/gcp-key.json
|
||||
|
||||
@@ -4,234 +4,81 @@ on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
|
||||
jobs:
|
||||
build-and-release:
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
working-directory: lang
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Enforce source branch (main <- stage only)
|
||||
if: github.event_name == 'pull_request'
|
||||
run: |
|
||||
SOURCE="${GITHUB_HEAD_REF}"
|
||||
if [ "${SOURCE}" != "stage" ]; then
|
||||
echo "ERROR: Main branch only accepts PRs from 'stage'. Source was: '${SOURCE}'"
|
||||
exit 1
|
||||
fi
|
||||
echo "Source branch check passed: ${SOURCE} -> main"
|
||||
|
||||
- name: Install build dependencies
|
||||
run: |
|
||||
apt-get update -qq
|
||||
apt-get install -y gcc libcurl4-openssl-dev
|
||||
|
||||
# Seed: use the committed linux-amd64 binary as the bootstrap
|
||||
- name: Bootstrap from committed linux binary (seed)
|
||||
# Gen2: compile the bootstrap C source into a working elc binary
|
||||
- name: Build elc from bootstrap (gen2)
|
||||
run: |
|
||||
chmod +x dist/platform/elc-linux-amd64
|
||||
echo "seed elc (committed linux-amd64 binary)"
|
||||
dist/platform/elc-linux-amd64 --version || true
|
||||
gcc -O2 \
|
||||
-I el-compiler/runtime \
|
||||
dist/elc-bootstrap.c \
|
||||
el-compiler/runtime/el_runtime.c \
|
||||
-lcurl -lpthread \
|
||||
-o dist/elc-gen2
|
||||
chmod +x dist/elc-gen2
|
||||
echo "gen2 elc built"
|
||||
dist/elc-gen2 --version || true
|
||||
|
||||
# Gen2: use seed to self-host compile the El compiler
|
||||
- name: Self-host compile El compiler (gen2)
|
||||
# Gen3: use gen2 to compile the El compiler from its own El source (self-host)
|
||||
- name: Self-host: compile El compiler with gen2 (gen3)
|
||||
run: |
|
||||
mkdir -p dist/platform
|
||||
dist/platform/elc-linux-amd64 elc-cli.el > dist/elc-gen2.c
|
||||
dist/elc-gen2 el-compiler/src/compiler.el > dist/elc-gen3.c
|
||||
gcc -O2 \
|
||||
-I el-compiler/runtime \
|
||||
dist/elc-gen2.c \
|
||||
dist/elc-gen3.c \
|
||||
el-compiler/runtime/el_runtime.c \
|
||||
-lcurl -lssl -lcrypto -lpthread -lm \
|
||||
-lcurl -lpthread \
|
||||
-o dist/platform/elc
|
||||
chmod +x dist/platform/elc
|
||||
echo "gen2 (self-hosted) elc built"
|
||||
echo "gen3 (self-hosted) elc built"
|
||||
dist/platform/elc --version || true
|
||||
|
||||
# Build elb binary
|
||||
- name: Build elb
|
||||
run: |
|
||||
mkdir -p dist/bin
|
||||
dist/platform/elc elb.el > dist/elb.c
|
||||
gcc -O2 \
|
||||
-I el-compiler/runtime \
|
||||
dist/elb.c \
|
||||
el-compiler/runtime/el_runtime.c \
|
||||
-lcurl -lssl -lcrypto -lpthread -lm \
|
||||
-o dist/bin/elb
|
||||
chmod +x dist/bin/elb
|
||||
echo "elb built"
|
||||
|
||||
# Build epm binary using elb (epm lives at repo root, not inside lang/)
|
||||
- name: Build epm
|
||||
run: |
|
||||
ABS_ELB="$(pwd)/dist/bin/elb"
|
||||
ABS_ELC="$(pwd)/dist/platform/elc"
|
||||
ABS_RUNTIME="$(pwd)/el-compiler/runtime"
|
||||
ABS_OUT="$(pwd)/dist/bin"
|
||||
(cd ../epm && "$ABS_ELB" --clean --elc="$ABS_ELC" --runtime="$ABS_RUNTIME" --out="$ABS_OUT")
|
||||
chmod +x dist/bin/epm
|
||||
echo "epm built"
|
||||
|
||||
# Build el-install binary using elb
|
||||
- name: Build el-install
|
||||
run: |
|
||||
ABS_ELB="$(pwd)/dist/bin/elb"
|
||||
ABS_ELC="$(pwd)/dist/platform/elc"
|
||||
ABS_RUNTIME="$(pwd)/el-compiler/runtime"
|
||||
ABS_OUT="$(pwd)/dist/bin"
|
||||
(cd tools/install && "$ABS_ELB" --clean --elc="$ABS_ELC" --runtime="$ABS_RUNTIME" --out="$ABS_OUT")
|
||||
chmod +x dist/bin/el-install
|
||||
echo "el-install built"
|
||||
|
||||
- name: Run tests - text
|
||||
# Run all four test suites with gen3 elc
|
||||
- name: Run tests — text
|
||||
run: |
|
||||
ELC="$(pwd)/dist/platform/elc" \
|
||||
EL_HOME="$(pwd)" \
|
||||
bash tests/text/run.sh
|
||||
|
||||
- name: Run tests - calendar
|
||||
- name: Run tests — calendar
|
||||
run: |
|
||||
ELC="$(pwd)/dist/platform/elc" \
|
||||
EL_HOME="$(pwd)" \
|
||||
bash tests/calendar/run.sh
|
||||
|
||||
- name: Run tests - time
|
||||
- name: Run tests — time
|
||||
run: |
|
||||
ELC="$(pwd)/dist/platform/elc" \
|
||||
EL_HOME="$(pwd)" \
|
||||
bash tests/time/run.sh
|
||||
|
||||
- name: Run tests - html_sanitizer
|
||||
- name: Run tests — html_sanitizer
|
||||
run: |
|
||||
ELC="$(pwd)/dist/platform/elc" \
|
||||
EL_HOME="$(pwd)" \
|
||||
bash tests/html_sanitizer/run.sh
|
||||
|
||||
# Native El test suites (elc --test, compile-link-run)
|
||||
- name: Run tests - native (core)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
ELC="$(pwd)/dist/platform/elc"
|
||||
RUNTIME="$(pwd)/el-compiler/runtime"
|
||||
"$ELC" --test tests/native/test_core.el > /tmp/el_native_core.c
|
||||
gcc -O2 -I "$RUNTIME" /tmp/el_native_core.c "$RUNTIME/el_runtime.c" \
|
||||
-lcurl -lssl -lcrypto -lpthread -lm -o /tmp/el_native_core
|
||||
/tmp/el_native_core
|
||||
|
||||
- name: Run tests - native (text)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
ELC="$(pwd)/dist/platform/elc"
|
||||
RUNTIME="$(pwd)/el-compiler/runtime"
|
||||
"$ELC" --test tests/native/test_text.el > /tmp/el_native_text.c
|
||||
gcc -O2 -I "$RUNTIME" /tmp/el_native_text.c "$RUNTIME/el_runtime.c" \
|
||||
-lcurl -lssl -lcrypto -lpthread -lm -o /tmp/el_native_text
|
||||
/tmp/el_native_text
|
||||
|
||||
- name: Run tests - native (string)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
ELC="$(pwd)/dist/platform/elc"
|
||||
RUNTIME="$(pwd)/el-compiler/runtime"
|
||||
"$ELC" --test tests/native/test_string.el > /tmp/el_native_string.c
|
||||
gcc -O2 -I "$RUNTIME" /tmp/el_native_string.c "$RUNTIME/el_runtime.c" \
|
||||
-lcurl -lssl -lcrypto -lpthread -lm -o /tmp/el_native_string
|
||||
/tmp/el_native_string
|
||||
|
||||
- name: Run tests - native (math)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
ELC="$(pwd)/dist/platform/elc"
|
||||
RUNTIME="$(pwd)/el-compiler/runtime"
|
||||
"$ELC" --test tests/native/test_math.el > /tmp/el_native_math.c
|
||||
gcc -O2 -I "$RUNTIME" /tmp/el_native_math.c "$RUNTIME/el_runtime.c" \
|
||||
-lcurl -lssl -lcrypto -lpthread -lm -o /tmp/el_native_math
|
||||
/tmp/el_native_math
|
||||
|
||||
- name: Run tests - native (state)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
ELC="$(pwd)/dist/platform/elc"
|
||||
RUNTIME="$(pwd)/el-compiler/runtime"
|
||||
"$ELC" --test tests/native/test_state.el > /tmp/el_native_state.c
|
||||
gcc -O2 -I "$RUNTIME" /tmp/el_native_state.c "$RUNTIME/el_runtime.c" \
|
||||
-lcurl -lssl -lcrypto -lpthread -lm -o /tmp/el_native_state
|
||||
/tmp/el_native_state
|
||||
|
||||
- name: Run tests - native (time)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
ELC="$(pwd)/dist/platform/elc"
|
||||
RUNTIME="$(pwd)/el-compiler/runtime"
|
||||
"$ELC" --test tests/native/test_time.el > /tmp/el_native_time.c
|
||||
gcc -O2 -I "$RUNTIME" /tmp/el_native_time.c "$RUNTIME/el_runtime.c" \
|
||||
-lcurl -lssl -lcrypto -lpthread -lm -o /tmp/el_native_time
|
||||
/tmp/el_native_time
|
||||
|
||||
- name: Run tests - native (json)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
ELC="$(pwd)/dist/platform/elc"
|
||||
RUNTIME="$(pwd)/el-compiler/runtime"
|
||||
"$ELC" --test tests/native/test_json.el > /tmp/el_native_json.c
|
||||
gcc -O2 -I "$RUNTIME" /tmp/el_native_json.c "$RUNTIME/el_runtime.c" \
|
||||
-lcurl -lssl -lcrypto -lpthread -lm -o /tmp/el_native_json
|
||||
/tmp/el_native_json
|
||||
|
||||
- name: Run tests - native (env)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
ELC="$(pwd)/dist/platform/elc"
|
||||
RUNTIME="$(pwd)/el-compiler/runtime"
|
||||
"$ELC" --test tests/native/test_env.el > /tmp/el_native_env.c
|
||||
gcc -O2 -I "$RUNTIME" /tmp/el_native_env.c "$RUNTIME/el_runtime.c" \
|
||||
-lcurl -lssl -lcrypto -lpthread -lm -o /tmp/el_native_env
|
||||
/tmp/el_native_env
|
||||
|
||||
- name: Run tests - native (fs)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
ELC="$(pwd)/dist/platform/elc"
|
||||
RUNTIME="$(pwd)/el-compiler/runtime"
|
||||
"$ELC" --test tests/native/test_fs.el > /tmp/el_native_fs.c
|
||||
gcc -O2 -I "$RUNTIME" /tmp/el_native_fs.c "$RUNTIME/el_runtime.c" \
|
||||
-lcurl -lssl -lcrypto -lpthread -lm -o /tmp/el_native_fs
|
||||
/tmp/el_native_fs
|
||||
|
||||
# Bundle the SDK tarball - runs from the repo root to reference lang/ paths correctly
|
||||
- name: Bundle SDK tarball
|
||||
if: github.event_name == 'push'
|
||||
working-directory: ${{ github.workspace }}
|
||||
run: |
|
||||
mkdir -p dist/sdk/bin dist/sdk/runtime
|
||||
cp lang/dist/platform/elc dist/sdk/bin/elc
|
||||
cp lang/dist/bin/elb dist/sdk/bin/elb
|
||||
cp lang/dist/bin/epm dist/sdk/bin/epm
|
||||
cp lang/el-compiler/runtime/el_runtime.c dist/sdk/runtime/
|
||||
cp lang/el-compiler/runtime/el_runtime.h dist/sdk/runtime/
|
||||
cp lang/runtime/*.el dist/sdk/runtime/
|
||||
tar -czf dist/el-sdk-latest.tar.gz -C dist/sdk .
|
||||
echo "SDK tarball bundled: dist/el-sdk-latest.tar.gz"
|
||||
ls -lh dist/el-sdk-latest.tar.gz
|
||||
|
||||
# Publish / update the `latest` release with all SDK assets
|
||||
# Publish / update the `latest` release with the three SDK assets
|
||||
- name: Publish latest release
|
||||
if: github.event_name == 'push'
|
||||
working-directory: ${{ github.workspace }}
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.GIT_TOKEN }}
|
||||
GITEA_TOKEN: ${{ secrets.GITEA_TOKEN }}
|
||||
GITEA_API: https://git.neuralplatform.ai/api/v1
|
||||
REPO: neuron-technologies/el
|
||||
run: |
|
||||
# Delete existing `latest` release if it exists
|
||||
EXISTING_ID=$(curl -sf \
|
||||
-H "Authorization: token ${GITEA_TOKEN}" \
|
||||
"${GITEA_API}/repos/${REPO}/releases/tags/latest" \
|
||||
@@ -244,10 +91,12 @@ jobs:
|
||||
"${GITEA_API}/repos/${REPO}/releases/${EXISTING_ID}"
|
||||
fi
|
||||
|
||||
# Delete and re-create the `latest` tag so it points at HEAD
|
||||
curl -sf -X DELETE \
|
||||
-H "Authorization: token ${GITEA_TOKEN}" \
|
||||
"${GITEA_API}/repos/${REPO}/tags/latest" || true
|
||||
|
||||
# Create the release
|
||||
RELEASE_ID=$(curl -sf -X POST \
|
||||
-H "Authorization: token ${GITEA_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
@@ -262,6 +111,7 @@ jobs:
|
||||
|
||||
echo "Created release id=${RELEASE_ID}"
|
||||
|
||||
# Upload assets
|
||||
upload_asset() {
|
||||
local filepath="$1"
|
||||
local name="$2"
|
||||
@@ -272,134 +122,70 @@ jobs:
|
||||
"${GITEA_API}/repos/${REPO}/releases/${RELEASE_ID}/assets"
|
||||
}
|
||||
|
||||
# Per-file assets (downstream CI needs these individually)
|
||||
upload_asset lang/dist/platform/elc elc
|
||||
upload_asset lang/el-compiler/runtime/el_runtime.c el_runtime.c
|
||||
upload_asset lang/el-compiler/runtime/el_runtime.h el_runtime.h
|
||||
|
||||
# SDK bundle and installer binary
|
||||
upload_asset dist/el-sdk-latest.tar.gz el-sdk-latest.tar.gz
|
||||
upload_asset lang/dist/bin/el-install el-install
|
||||
upload_asset dist/platform/elc elc
|
||||
upload_asset el-compiler/runtime/el_runtime.c el_runtime.c
|
||||
upload_asset el-compiler/runtime/el_runtime.h el_runtime.h
|
||||
|
||||
echo "Release published successfully"
|
||||
|
||||
- name: Publish El SDK to Artifact Registry (prod)
|
||||
if: github.event_name == 'push'
|
||||
# Dispatch el-sdk-updated event to downstream repos
|
||||
# Publish artifact to GCP Artifact Registry (prod)
|
||||
- name: Publish elc to Artifact Registry (prod)
|
||||
env:
|
||||
GCP_SA_KEY: ${{ secrets.GCP_SA_KEY }}
|
||||
run: |
|
||||
echo "${GCP_SA_KEY}" > /tmp/gcp-key.json
|
||||
apt-get install -y -qq apt-transport-https ca-certificates curl
|
||||
echo "deb [trusted=yes] https://packages.cloud.google.com/apt cloud-sdk main" > /etc/apt/sources.list.d/google-cloud-sdk.list
|
||||
apt-get install -y -qq apt-transport-https ca-certificates gnupg curl
|
||||
curl -fsSL https://packages.cloud.google.com/apt/doc/apt-key.gpg | gpg --dearmor -o /usr/share/keyrings/cloud.google.gpg
|
||||
echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" > /etc/apt/sources.list.d/google-cloud-sdk.list
|
||||
apt-get update -qq && apt-get install -y google-cloud-cli
|
||||
gcloud auth activate-service-account --key-file=/tmp/gcp-key.json
|
||||
gcloud config set project neuron-785695
|
||||
|
||||
VERSION="${GITHUB_SHA:0:8}"
|
||||
|
||||
VERSION="${GITEA_SHA:0:8}"
|
||||
gcloud artifacts generic upload \
|
||||
--repository=foundation-prod \
|
||||
--location=us-central1 \
|
||||
--project=neuron-785695 \
|
||||
--package=el-elc \
|
||||
--package=el/elc \
|
||||
--version="${VERSION}" \
|
||||
--source=dist/platform/elc
|
||||
|
||||
gcloud artifacts generic upload \
|
||||
--repository=foundation-prod \
|
||||
--location=us-central1 \
|
||||
--project=neuron-785695 \
|
||||
--package=el-elb \
|
||||
--version="${VERSION}" \
|
||||
--source=dist/bin/elb
|
||||
|
||||
gcloud artifacts generic upload \
|
||||
--repository=foundation-prod \
|
||||
--location=us-central1 \
|
||||
--project=neuron-785695 \
|
||||
--package=el-runtime-c \
|
||||
--version="${VERSION}" \
|
||||
--source=el-compiler/runtime/el_runtime.c
|
||||
|
||||
gcloud artifacts generic upload \
|
||||
--repository=foundation-prod \
|
||||
--location=us-central1 \
|
||||
--project=neuron-785695 \
|
||||
--package=el-runtime-h \
|
||||
--version="${VERSION}" \
|
||||
--source=el-compiler/runtime/el_runtime.h
|
||||
|
||||
gcloud artifacts generic upload \
|
||||
--repository=foundation-prod \
|
||||
--location=us-central1 \
|
||||
--project=neuron-785695 \
|
||||
--package=el-runtime-js \
|
||||
--version="${VERSION}" \
|
||||
--source=el-compiler/runtime/el_runtime.js
|
||||
|
||||
echo "Published El SDK version=${VERSION} to foundation-prod"
|
||||
# Keep key alive for the ci-base rebuild step below
|
||||
# (deleted in that step after docker push)
|
||||
|
||||
- name: Rebuild ci-base with fresh El SDK
|
||||
# Patches ci-base:latest in-place: pulls the existing image (which has all
|
||||
# system deps — Node, Go, gcloud, Docker CLI, etc.) and overlays the freshly
|
||||
# built El SDK on top. Keeps the full ci-base rebuild fast and incremental.
|
||||
if: github.event_name == 'push'
|
||||
env:
|
||||
GCP_SA_KEY: ${{ secrets.GCP_SA_KEY }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
CI_BASE="us-central1-docker.pkg.dev/neuron-785695/neuron-ci/ci-base"
|
||||
SHA="${GITHUB_SHA:0:8}"
|
||||
|
||||
echo "${GCP_SA_KEY}" > /tmp/gcp-key.json
|
||||
gcloud auth activate-service-account --key-file=/tmp/gcp-key.json
|
||||
gcloud config set project neuron-785695
|
||||
gcloud auth configure-docker us-central1-docker.pkg.dev --quiet
|
||||
|
||||
# Pull existing ci-base (system deps stay cached in the base layer)
|
||||
docker pull "${CI_BASE}:latest"
|
||||
|
||||
# Inline Dockerfile — only replaces the El SDK layer
|
||||
cat > /tmp/Dockerfile.ci-base-patch << 'EOF'
|
||||
ARG BASE
|
||||
FROM ${BASE}
|
||||
COPY dist/platform/elc /opt/el/dist/platform/elc
|
||||
COPY dist/bin/elb /opt/el/dist/bin/elb
|
||||
COPY el-compiler/runtime/el_runtime.c /opt/el/el-compiler/runtime/el_runtime.c
|
||||
COPY el-compiler/runtime/el_runtime.h /opt/el/el-compiler/runtime/el_runtime.h
|
||||
COPY el-compiler/runtime/el_runtime.js /opt/el/el-compiler/runtime/el_runtime.js
|
||||
RUN chmod +x /opt/el/dist/platform/elc /opt/el/dist/bin/elb
|
||||
EOF
|
||||
|
||||
docker build \
|
||||
--build-arg BASE="${CI_BASE}:latest" \
|
||||
--build-arg BUILDKIT_INLINE_CACHE=1 \
|
||||
-f /tmp/Dockerfile.ci-base-patch \
|
||||
-t "${CI_BASE}:latest" \
|
||||
-t "${CI_BASE}:${SHA}" \
|
||||
.
|
||||
|
||||
docker push "${CI_BASE}:latest"
|
||||
docker push "${CI_BASE}:${SHA}"
|
||||
|
||||
echo "ci-base rebuilt: ${CI_BASE}:latest (${SHA})"
|
||||
echo "Published elc version=${VERSION} to foundation-prod/el/elc"
|
||||
rm -f /tmp/gcp-key.json
|
||||
|
||||
- name: Dispatch el-sdk-updated to downstream repos
|
||||
if: github.event_name == 'push'
|
||||
- name: Dispatch to foundation/engram
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.GIT_TOKEN }}
|
||||
GITEA_TOKEN: ${{ secrets.GITEA_TOKEN }}
|
||||
GITEA_API: https://git.neuralplatform.ai/api/v1
|
||||
run: |
|
||||
for repo in neuron-technologies/forge neuron-technologies/neuron-web; do
|
||||
curl -sf -X POST \
|
||||
-H "Authorization: token ${GITEA_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${GITEA_API}/repos/${repo}/dispatches" \
|
||||
-d "{
|
||||
\"type\": \"el-sdk-updated\",
|
||||
\"inputs\": {\"el_version\": \"latest\", \"commit\": \"${GITHUB_SHA}\"}
|
||||
}" && echo "Dispatched to ${repo}" || echo "Warning: dispatch to ${repo} failed"
|
||||
done
|
||||
curl -sf -X POST \
|
||||
-H "Authorization: token ${GITEA_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${GITEA_API}/repos/neuron-technologies/engram/dispatches" \
|
||||
-d "{
|
||||
\"type\": \"el-sdk-updated\",
|
||||
\"inputs\": {
|
||||
\"el_version\": \"latest\",
|
||||
\"commit\": \"${GITHUB_SHA}\"
|
||||
}
|
||||
}"
|
||||
echo "Dispatched el-sdk-updated to foundation/engram"
|
||||
|
||||
- name: Dispatch to neuron-technologies/forge
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.GITEA_TOKEN }}
|
||||
GITEA_API: https://git.neuralplatform.ai/api/v1
|
||||
run: |
|
||||
curl -sf -X POST \
|
||||
-H "Authorization: token ${GITEA_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
"${GITEA_API}/repos/neuron-technologies/forge/dispatches" \
|
||||
-d "{
|
||||
\"type\": \"el-sdk-updated\",
|
||||
\"inputs\": {
|
||||
\"el_version\": \"latest\",
|
||||
\"commit\": \"${GITHUB_SHA}\"
|
||||
}
|
||||
}"
|
||||
echo "Dispatched el-sdk-updated to neuron-technologies/forge"
|
||||
|
||||
@@ -1,50 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# El pre-commit hook: compile and run native tests before commit.
|
||||
# Install once per clone: git config core.hooksPath .githooks
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
ROOT="$(git rev-parse --show-toplevel)"
|
||||
LANG_DIR="$ROOT/lang"
|
||||
RUNTIME="$LANG_DIR/el-compiler/runtime"
|
||||
ELC="$LANG_DIR/dist/platform/elc"
|
||||
|
||||
# If elc isn't built yet, skip with a warning rather than blocking
|
||||
if [ ! -x "$ELC" ]; then
|
||||
echo "⚠ elc not found at lang/dist/platform/elc — skipping pre-commit tests"
|
||||
echo " Build it first: cd lang && gcc -O2 -I el-compiler/runtime dist/elc-bootstrap.c el-compiler/runtime/el_runtime.c -lcurl -lpthread -o dist/elc-gen2 && ./dist/elc-gen2 el-compiler/src/compiler.el > /tmp/elc.c && gcc -O2 -I el-compiler/runtime /tmp/elc.c el-compiler/runtime/el_runtime.c -lcurl -lpthread -o dist/platform/elc"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "→ Running El native tests..."
|
||||
PASS=0
|
||||
FAIL=0
|
||||
FAILED_TESTS=""
|
||||
|
||||
for test_file in "$LANG_DIR"/tests/native/test_*.el; do
|
||||
name=$(basename "$test_file" .el)
|
||||
tmp_c="/tmp/el_hook_${name}.c"
|
||||
tmp_bin="/tmp/el_hook_${name}"
|
||||
|
||||
if "$ELC" --test "$test_file" > "$tmp_c" 2>/dev/null \
|
||||
&& gcc -O2 -I "$RUNTIME" "$tmp_c" "$RUNTIME/el_runtime.c" \
|
||||
-lcurl -lpthread -lm -o "$tmp_bin" 2>/dev/null \
|
||||
&& "$tmp_bin" 2>/dev/null; then
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo " ✗ $name"
|
||||
FAIL=$((FAIL + 1))
|
||||
FAILED_TESTS="$FAILED_TESTS $name"
|
||||
fi
|
||||
done
|
||||
|
||||
echo " $PASS passed, $FAIL failed"
|
||||
|
||||
if [ "$FAIL" -gt 0 ]; then
|
||||
echo ""
|
||||
echo "✗ Pre-commit failed. Fix these tests before committing:$FAILED_TESTS"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "✓ All tests passed"
|
||||
exit 0
|
||||
@@ -1,23 +0,0 @@
|
||||
// arbor-cli — the `arbor` command-line tool.
|
||||
// Inlines its own copies of the parse / layout / render pipeline so that the
|
||||
// resulting binary is self-contained. (El's `import` form today concatenates
|
||||
// source; once a real module loader lands this becomes a thin driver.)
|
||||
|
||||
vessel "arbor-cli" {
|
||||
version "0.1.0"
|
||||
description "Command-line interface for the Arbor diagram language"
|
||||
authors ["Neuron Technologies"]
|
||||
edition "2026"
|
||||
}
|
||||
|
||||
dependencies {
|
||||
arbor-core "0.1"
|
||||
arbor-parse "0.1"
|
||||
arbor-layout "0.1"
|
||||
arbor-render "0.1"
|
||||
}
|
||||
|
||||
build {
|
||||
entry "src/main.el"
|
||||
output "dist/"
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,18 +0,0 @@
|
||||
// arbor-core — fundamental types for Arbor diagrams.
|
||||
// Node IDs (sanitised), shape vocabulary, edge kinds, and the lightweight
|
||||
// graph value used by every other vessel.
|
||||
|
||||
vessel "arbor-core" {
|
||||
version "0.1.0"
|
||||
description "Core types for Arbor diagrams: NodeId, ArborShape, ArborEdgeKind, graphs"
|
||||
authors ["Neuron Technologies"]
|
||||
edition "2026"
|
||||
}
|
||||
|
||||
dependencies {
|
||||
}
|
||||
|
||||
build {
|
||||
entry "src/main.el"
|
||||
output "dist/"
|
||||
}
|
||||
@@ -1,333 +0,0 @@
|
||||
// arbor-core — core types for Arbor diagrams.
|
||||
//
|
||||
// Idiomatic El: everything is a Map. Functions take/return maps; helpers are
|
||||
// pure and small. The downstream vessels (parse, layout, render) consume the
|
||||
// shapes defined here.
|
||||
//
|
||||
// Shape vocabulary:
|
||||
// ArborShape strings — "rect" "rounded" "cylinder" "diamond" "stadium" "primary"
|
||||
//
|
||||
// Edge-kind strings:
|
||||
// "solid" "dashed" "forbidden" "bidirectional"
|
||||
//
|
||||
// Node value: { "id":Str, "label":Str, "shape":Str }
|
||||
// Edge value: { "from":Str, "to":Str, "label":Str, "kind":Str }
|
||||
// Group value: { "id":Str, "label":Str, "node_ids":[Str], "direction":Str }
|
||||
// Graph value: { "title":Str, "direction":Str, "nodes":[Node], "edges":[Edge], "groups":[Group] }
|
||||
//
|
||||
// Diagram-form (lowered) is the same shape but with NodeStyle/EdgeLine/Arrow
|
||||
// resolved into renderer-friendly fields:
|
||||
// Node: + "sublabel":Str, "style_fill":Str, "style_stroke":Str, "style_color":Str
|
||||
// Edge: + "line":Str ("solid"/"dashed"/"dotted"/"thick"), "arrow":Str ("forward"/"backward"/"both"/"none")
|
||||
//
|
||||
// This file is the canonical definition of those shapes. Other vessels rely on
|
||||
// these field names.
|
||||
|
||||
// ── NodeId sanitisation ──────────────────────────────────────────────────────
|
||||
//
|
||||
// Sanitise an arbitrary string into a Mermaid-safe identifier.
|
||||
// - any char not in [a-zA-Z0-9_] becomes '_'
|
||||
// - consecutive underscores collapse
|
||||
// - trailing underscores stripped
|
||||
// - if first char is a digit, prepend 'n'
|
||||
// - if empty, return "node"
|
||||
|
||||
fn is_alnum_underscore(ch: String) -> Bool {
|
||||
let code: Int = str_char_code(ch, 0)
|
||||
if code >= 48 {
|
||||
if code <= 57 { return true }
|
||||
}
|
||||
if code >= 65 {
|
||||
if code <= 90 { return true }
|
||||
}
|
||||
if code >= 97 {
|
||||
if code <= 122 { return true }
|
||||
}
|
||||
if code == 95 { return true }
|
||||
false
|
||||
}
|
||||
|
||||
fn is_ascii_digit(ch: String) -> Bool {
|
||||
let code: Int = str_char_code(ch, 0)
|
||||
if code >= 48 {
|
||||
if code <= 57 { return true }
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn sanitize_id(s: String) -> String {
|
||||
let n: Int = str_len(s)
|
||||
if n == 0 { return "node" }
|
||||
|
||||
// Pass 1: replace and collapse.
|
||||
let out = ""
|
||||
let prev_underscore = false
|
||||
let i = 0
|
||||
while i < n {
|
||||
let ch: String = str_char_at(s, i)
|
||||
if is_alnum_underscore(ch) {
|
||||
let out = out + ch
|
||||
let prev_underscore = false
|
||||
} else {
|
||||
if !prev_underscore {
|
||||
let out = out + "_"
|
||||
}
|
||||
let prev_underscore = true
|
||||
}
|
||||
let i = i + 1
|
||||
}
|
||||
|
||||
// Pass 2: strip trailing underscores.
|
||||
let m: Int = str_len(out)
|
||||
let end = m
|
||||
let stripping = true
|
||||
while stripping {
|
||||
if end <= 0 {
|
||||
let stripping = false
|
||||
} else {
|
||||
let last: String = str_char_at(out, end - 1)
|
||||
if last == "_" {
|
||||
let end = end - 1
|
||||
} else {
|
||||
let stripping = false
|
||||
}
|
||||
}
|
||||
}
|
||||
let out = str_slice(out, 0, end)
|
||||
|
||||
if str_len(out) == 0 { return "node" }
|
||||
|
||||
// Pass 3: leading-digit guard.
|
||||
let first: String = str_char_at(out, 0)
|
||||
if is_ascii_digit(first) {
|
||||
let out = "n" + out
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
// ── Constructors ──────────────────────────────────────────────────────────────
|
||||
|
||||
fn make_node(id: String, label: String, shape: String) -> Map<String, Any> {
|
||||
{ "id": id, "label": label, "shape": shape }
|
||||
}
|
||||
|
||||
fn make_edge(src: String, dst: String, kind: String) -> Map<String, Any> {
|
||||
{ "from": src, "to": dst, "label": "", "kind": kind }
|
||||
}
|
||||
|
||||
fn make_edge_with_label(src: String, dst: String, kind: String, label: String) -> Map<String, Any> {
|
||||
{ "from": src, "to": dst, "label": label, "kind": kind }
|
||||
}
|
||||
|
||||
fn make_group(id: String, label: String) -> Map<String, Any> {
|
||||
let empty_ids: [String] = el_list_empty()
|
||||
{ "id": id, "label": label, "node_ids": empty_ids, "direction": "" }
|
||||
}
|
||||
|
||||
fn make_graph() -> Map<String, Any> {
|
||||
let empty_n: [Map<String, Any>] = el_list_empty()
|
||||
let empty_e: [Map<String, Any>] = el_list_empty()
|
||||
let empty_g: [Map<String, Any>] = el_list_empty()
|
||||
{ "title": "", "direction": "top-down",
|
||||
"nodes": empty_n, "edges": empty_e, "groups": empty_g }
|
||||
}
|
||||
|
||||
// ── Shape vocabulary ──────────────────────────────────────────────────────────
|
||||
// Returns the canonical shape string for a token, or "" if unknown.
|
||||
|
||||
fn shape_from_token(tok: String) -> String {
|
||||
let t: String = str_trim(tok)
|
||||
if t == "rect" { return "rect" }
|
||||
if t == "rounded" { return "rounded" }
|
||||
if t == "cylinder" { return "cylinder" }
|
||||
if t == "diamond" { return "diamond" }
|
||||
if t == "stadium" { return "stadium" }
|
||||
if t == "primary" { return "primary" }
|
||||
""
|
||||
}
|
||||
|
||||
// Lower an Arbor shape into the renderer's NodeShape vocabulary.
|
||||
fn shape_to_node_shape(shape: String) -> String {
|
||||
if shape == "rect" { return "rectangle" }
|
||||
if shape == "primary" { return "rectangle" }
|
||||
if shape == "rounded" { return "rounded_rect" }
|
||||
if shape == "cylinder" { return "cylinder" }
|
||||
if shape == "diamond" { return "diamond" }
|
||||
if shape == "stadium" { return "stadium" }
|
||||
"rectangle"
|
||||
}
|
||||
|
||||
// ── Lowering: ArborGraph → DiagramGraph ──────────────────────────────────────
|
||||
//
|
||||
// Replaces every node with a diagram-form node carrying explicit style fields,
|
||||
// and every edge with a diagram-form edge carrying line/arrow strings.
|
||||
|
||||
fn lower_node(n: Map<String, Any>) -> Map<String, Any> {
|
||||
let shape: String = n["shape"]
|
||||
let node_shape: String = shape_to_node_shape(shape)
|
||||
let fill = ""
|
||||
let stroke = ""
|
||||
let color = ""
|
||||
if shape == "primary" {
|
||||
let fill = "#0052A0"
|
||||
let stroke = "#0052A0"
|
||||
let color = "#ffffff"
|
||||
}
|
||||
{ "id": n["id"], "label": n["label"], "sublabel": "",
|
||||
"shape": node_shape,
|
||||
"style_fill": fill, "style_stroke": stroke, "style_color": color }
|
||||
}
|
||||
|
||||
fn lower_edge(e: Map<String, Any>) -> Map<String, Any> {
|
||||
let kind: String = e["kind"]
|
||||
let line = "solid"
|
||||
let arrow = "forward"
|
||||
if kind == "dashed" {
|
||||
let line = "dashed"
|
||||
}
|
||||
if kind == "bidirectional" {
|
||||
let arrow = "both"
|
||||
}
|
||||
// forbidden uses solid line + forward arrow; the renderer overlays the
|
||||
// circle-X marker based on a forbidden-set the caller threads through.
|
||||
{ "from": e["from"], "to": e["to"], "label": e["label"],
|
||||
"line": line, "arrow": arrow }
|
||||
}
|
||||
|
||||
fn lower_graph(g: Map<String, Any>) -> Map<String, Any> {
|
||||
let nodes: [Map<String, Any>] = g["nodes"]
|
||||
let edges: [Map<String, Any>] = g["edges"]
|
||||
let lowered_nodes: [Map<String, Any>] = el_list_empty()
|
||||
let i = 0
|
||||
let n: Int = el_list_len(nodes)
|
||||
while i < n {
|
||||
let lowered_nodes = native_list_append(lowered_nodes, lower_node(get(nodes, i)))
|
||||
let i = i + 1
|
||||
}
|
||||
let lowered_edges: [Map<String, Any>] = el_list_empty()
|
||||
let i = 0
|
||||
let m: Int = el_list_len(edges)
|
||||
while i < m {
|
||||
let lowered_edges = native_list_append(lowered_edges, lower_edge(get(edges, i)))
|
||||
let i = i + 1
|
||||
}
|
||||
{ "title": g["title"], "direction": g["direction"],
|
||||
"nodes": lowered_nodes, "edges": lowered_edges, "groups": g["groups"] }
|
||||
}
|
||||
|
||||
// Find a node by id within a (lowered or raw) graph. Returns an empty map
|
||||
// when not found — callers check map_get(result, "id") for presence.
|
||||
fn graph_find_node(graph: Map<String, Any>, id: String) -> Map<String, Any> {
|
||||
let nodes: [Map<String, Any>] = graph["nodes"]
|
||||
let n: Int = el_list_len(nodes)
|
||||
let i = 0
|
||||
while i < n {
|
||||
let node: Map<String, Any> = get(nodes, i)
|
||||
let nid: String = node["id"]
|
||||
if nid == id { return node }
|
||||
let i = i + 1
|
||||
}
|
||||
let empty: Map<String, Any> = el_map_new(0)
|
||||
empty
|
||||
}
|
||||
|
||||
// ── Forbidden-edge set helpers ────────────────────────────────────────────────
|
||||
// The lowered graph drops the "forbidden" kind (line/arrow have no slot for
|
||||
// it). Callers preserve the set as a list of "from->to" strings.
|
||||
|
||||
fn forbidden_key(from: String, to: String) -> String {
|
||||
from + "->" + to
|
||||
}
|
||||
|
||||
fn collect_forbidden(graph: Map<String, Any>) -> [String] {
|
||||
let edges: [Map<String, Any>] = graph["edges"]
|
||||
let n: Int = el_list_len(edges)
|
||||
let out: [String] = el_list_empty()
|
||||
let i = 0
|
||||
while i < n {
|
||||
let e: Map<String, Any> = get(edges, i)
|
||||
let kind: String = e["kind"]
|
||||
if kind == "forbidden" {
|
||||
let f: String = e["from"]
|
||||
let t: String = e["to"]
|
||||
let out = native_list_append(out, forbidden_key(f, t))
|
||||
}
|
||||
let i = i + 1
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn forbidden_contains(set: [String], src: String, dst: String) -> Bool {
|
||||
let key: String = forbidden_key(src, dst)
|
||||
let n: Int = el_list_len(set)
|
||||
let i = 0
|
||||
while i < n {
|
||||
let s: String = get(set, i)
|
||||
if s == key { return true }
|
||||
let i = i + 1
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
// ── Smoke test ────────────────────────────────────────────────────────────────
|
||||
//
|
||||
// State is kept in process-local k/v storage so we never mix Int + Call or
|
||||
// Int + Ident in `+` (which the codegen heuristic emits as string concat
|
||||
// on tagged-pointer values, segfaulting on Int operands).
|
||||
|
||||
fn fail(label: String, got: String, want: String) -> Int {
|
||||
println("FAIL " + label + " got=[" + got + "] want=[" + want + "]")
|
||||
state_set("failures", "1")
|
||||
0
|
||||
}
|
||||
|
||||
fn check_eq(label: String, got: String, want: String) -> Int {
|
||||
if got == want {
|
||||
println("ok " + label + " = " + got)
|
||||
return 1
|
||||
}
|
||||
fail(label, got, want)
|
||||
}
|
||||
|
||||
check_eq("sanitize crates/nc-core",
|
||||
sanitize_id("crates/nc-core"), "crates_nc_core")
|
||||
|
||||
check_eq("sanitize package.json",
|
||||
sanitize_id("package.json"), "package_json")
|
||||
|
||||
check_eq("sanitize 42-module",
|
||||
sanitize_id("42-module"), "n42_module")
|
||||
|
||||
check_eq("sanitize empty", sanitize_id(""), "node")
|
||||
|
||||
check_eq("sanitize !!--@@", sanitize_id("!!--@@"), "node")
|
||||
|
||||
check_eq("shape_from_token rounded",
|
||||
shape_from_token("rounded"), "rounded")
|
||||
|
||||
check_eq("shape_to_node_shape primary",
|
||||
shape_to_node_shape("primary"), "rectangle")
|
||||
|
||||
// Lowering preserves a node id and adds style.
|
||||
let n: Map<String, Any> = make_node("svc", "Service", "primary")
|
||||
let ln: Map<String, Any> = lower_node(n)
|
||||
check_eq("lower preserves id", ln["id"], "svc")
|
||||
check_eq("lower applies primary fill", ln["style_fill"], "#0052A0")
|
||||
|
||||
// Edge lowering
|
||||
let e: Map<String, Any> = make_edge("a", "b", "dashed")
|
||||
let le: Map<String, Any> = lower_edge(e)
|
||||
check_eq("lower edge dashed line", le["line"], "dashed")
|
||||
|
||||
let e2: Map<String, Any> = make_edge("a", "b", "bidirectional")
|
||||
let le2: Map<String, Any> = lower_edge(e2)
|
||||
check_eq("lower edge bidirectional arrow", le2["arrow"], "both")
|
||||
|
||||
println("")
|
||||
let failures: String = state_get("failures")
|
||||
if str_eq(failures, "1") {
|
||||
println("arbor-core: FAILED")
|
||||
exit_program(1)
|
||||
} else {
|
||||
println("arbor-core: ok")
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
// arbor-diagram — diagram intermediate representation + Mermaid serializer
|
||||
// + dependency-graph builders. Consumes raw graph values built by arbor-core
|
||||
// or arbor-parse and produces Mermaid markup or other serializations.
|
||||
|
||||
vessel "arbor-diagram" {
|
||||
version "0.1.0"
|
||||
description "Diagram IR + Mermaid serializer + architecture diagram builders"
|
||||
authors ["Neuron Technologies"]
|
||||
edition "2026"
|
||||
}
|
||||
|
||||
dependencies {
|
||||
arbor-core "0.1"
|
||||
}
|
||||
|
||||
build {
|
||||
entry "src/main.el"
|
||||
output "dist/"
|
||||
}
|
||||
@@ -1,433 +0,0 @@
|
||||
// arbor-diagram — diagram intermediate representation (AST + IR).
|
||||
//
|
||||
// Where arbor-core supplies the *.arbor source-language model — Mermaid-safe
|
||||
// IDs, ArborShape strings, ArborEdgeKind strings, and the lowered "diagram-
|
||||
// form" map — arbor-diagram exposes the same lowered model as the canonical
|
||||
// IR for downstream serializers (arbor-render and any future Mermaid-style
|
||||
// emitter). The two vessels overlap by design: arbor-core is responsible for
|
||||
// *naming* the schema; arbor-diagram is responsible for *building* values
|
||||
// against it.
|
||||
//
|
||||
// The Rust crate ships small AST builder structs (`DiagramNode::new`,
|
||||
// `DiagramEdge::with_label`, `DiagramGraph::add_node`). El has no method
|
||||
// chaining, no Default::default(), no enum types. The El idiom is a stack
|
||||
// of immutable maps with explicit constructor + with_* helpers that take
|
||||
// the value and return a freshly-allocated map.
|
||||
//
|
||||
// Public surface:
|
||||
// make_node(id, label) → DiagramNode
|
||||
// with_shape(node, shape) → DiagramNode
|
||||
// with_sublabel(node, sublabel) → DiagramNode
|
||||
// with_style(node, fill, stroke, color) → DiagramNode
|
||||
//
|
||||
// make_edge(from, to) → DiagramEdge
|
||||
// with_label(edge, label)
|
||||
// with_line(edge, line) // "solid"/"dashed"/"dotted"/"thick"
|
||||
// with_arrow(edge, arrow) // "forward"/"backward"/"both"/"none"
|
||||
//
|
||||
// make_group(id, label) → DiagramGroup
|
||||
// with_node(group, node_id)
|
||||
// with_nodes(group, [node_id])
|
||||
// with_direction(group, dir)
|
||||
//
|
||||
// make_graph(title) → DiagramGraph
|
||||
// with_direction(graph, dir)
|
||||
// graph_add_node(graph, node) → DiagramGraph
|
||||
// graph_add_edge(graph, edge) → DiagramGraph
|
||||
// graph_add_group(graph, group) → DiagramGraph
|
||||
// graph_node(graph, id) → DiagramNode | empty map
|
||||
//
|
||||
// Shape vocabulary (lowered): see arbor-core. The local copy here mirrors
|
||||
// the table in arbor-core/src/main.el so this vessel is hermetic.
|
||||
|
||||
// ── NodeShape vocabulary ────────────────────────────────────────────────────
|
||||
|
||||
fn node_shape_rectangle() -> String { "rectangle" }
|
||||
fn node_shape_rounded_rect() -> String { "rounded_rect" }
|
||||
fn node_shape_stadium() -> String { "stadium" }
|
||||
fn node_shape_cylinder() -> String { "cylinder" }
|
||||
fn node_shape_diamond() -> String { "diamond" }
|
||||
fn node_shape_parallelogram() -> String { "parallelogram" }
|
||||
fn node_shape_database() -> String { "database" }
|
||||
fn node_shape_subroutine() -> String { "subroutine" }
|
||||
|
||||
fn node_shape_valid(s: String) -> Bool {
|
||||
if str_eq(s, "rectangle") { return true }
|
||||
if str_eq(s, "rounded_rect") { return true }
|
||||
if str_eq(s, "stadium") { return true }
|
||||
if str_eq(s, "cylinder") { return true }
|
||||
if str_eq(s, "diamond") { return true }
|
||||
if str_eq(s, "parallelogram") { return true }
|
||||
if str_eq(s, "database") { return true }
|
||||
if str_eq(s, "subroutine") { return true }
|
||||
false
|
||||
}
|
||||
|
||||
// ── EdgeLine vocabulary ─────────────────────────────────────────────────────
|
||||
|
||||
fn edge_line_solid() -> String { "solid" }
|
||||
fn edge_line_dashed() -> String { "dashed" }
|
||||
fn edge_line_dotted() -> String { "dotted" }
|
||||
fn edge_line_thick() -> String { "thick" }
|
||||
|
||||
fn edge_line_valid(s: String) -> Bool {
|
||||
if str_eq(s, "solid") { return true }
|
||||
if str_eq(s, "dashed") { return true }
|
||||
if str_eq(s, "dotted") { return true }
|
||||
if str_eq(s, "thick") { return true }
|
||||
false
|
||||
}
|
||||
|
||||
// ── EdgeArrow vocabulary ────────────────────────────────────────────────────
|
||||
|
||||
fn edge_arrow_forward() -> String { "forward" }
|
||||
fn edge_arrow_backward() -> String { "backward" }
|
||||
fn edge_arrow_both() -> String { "both" }
|
||||
fn edge_arrow_none() -> String { "none" }
|
||||
|
||||
fn edge_arrow_valid(s: String) -> Bool {
|
||||
if str_eq(s, "forward") { return true }
|
||||
if str_eq(s, "backward") { return true }
|
||||
if str_eq(s, "both") { return true }
|
||||
if str_eq(s, "none") { return true }
|
||||
false
|
||||
}
|
||||
|
||||
// ── Direction vocabulary ────────────────────────────────────────────────────
|
||||
|
||||
fn direction_top_down() -> String { "top-down" }
|
||||
fn direction_left_right() -> String { "left-right" }
|
||||
fn direction_right_left() -> String { "right-left" }
|
||||
fn direction_bottom_up() -> String { "bottom-up" }
|
||||
|
||||
fn direction_valid(s: String) -> Bool {
|
||||
if str_eq(s, "top-down") { return true }
|
||||
if str_eq(s, "left-right") { return true }
|
||||
if str_eq(s, "right-left") { return true }
|
||||
if str_eq(s, "bottom-up") { return true }
|
||||
false
|
||||
}
|
||||
|
||||
// ── DiagramNode ─────────────────────────────────────────────────────────────
|
||||
|
||||
fn make_node(id: String, label: String) -> Map<String, Any> {
|
||||
{
|
||||
"id": id,
|
||||
"label": label,
|
||||
"sublabel": "",
|
||||
"shape": "rectangle",
|
||||
"style_fill": "",
|
||||
"style_stroke": "",
|
||||
"style_color": ""
|
||||
}
|
||||
}
|
||||
|
||||
fn with_shape(node: Map<String, Any>, shape: String) -> Map<String, Any> {
|
||||
{
|
||||
"id": node["id"],
|
||||
"label": node["label"],
|
||||
"sublabel": node["sublabel"],
|
||||
"shape": shape,
|
||||
"style_fill": node["style_fill"],
|
||||
"style_stroke": node["style_stroke"],
|
||||
"style_color": node["style_color"]
|
||||
}
|
||||
}
|
||||
|
||||
fn with_sublabel(node: Map<String, Any>, sublabel: String) -> Map<String, Any> {
|
||||
{
|
||||
"id": node["id"],
|
||||
"label": node["label"],
|
||||
"sublabel": sublabel,
|
||||
"shape": node["shape"],
|
||||
"style_fill": node["style_fill"],
|
||||
"style_stroke": node["style_stroke"],
|
||||
"style_color": node["style_color"]
|
||||
}
|
||||
}
|
||||
|
||||
fn with_style(node: Map<String, Any>, fill: String, stroke: String, color: String) -> Map<String, Any> {
|
||||
{
|
||||
"id": node["id"],
|
||||
"label": node["label"],
|
||||
"sublabel": node["sublabel"],
|
||||
"shape": node["shape"],
|
||||
"style_fill": fill,
|
||||
"style_stroke": stroke,
|
||||
"style_color": color
|
||||
}
|
||||
}
|
||||
|
||||
// ── DiagramEdge ─────────────────────────────────────────────────────────────
|
||||
|
||||
fn make_edge(from: String, to: String) -> Map<String, Any> {
|
||||
{
|
||||
"from": from,
|
||||
"to": to,
|
||||
"label": "",
|
||||
"line": "solid",
|
||||
"arrow": "forward"
|
||||
}
|
||||
}
|
||||
|
||||
fn with_label(edge: Map<String, Any>, label: String) -> Map<String, Any> {
|
||||
{
|
||||
"from": edge["from"],
|
||||
"to": edge["to"],
|
||||
"label": label,
|
||||
"line": edge["line"],
|
||||
"arrow": edge["arrow"]
|
||||
}
|
||||
}
|
||||
|
||||
fn with_line(edge: Map<String, Any>, line: String) -> Map<String, Any> {
|
||||
{
|
||||
"from": edge["from"],
|
||||
"to": edge["to"],
|
||||
"label": edge["label"],
|
||||
"line": line,
|
||||
"arrow": edge["arrow"]
|
||||
}
|
||||
}
|
||||
|
||||
fn with_arrow(edge: Map<String, Any>, arrow: String) -> Map<String, Any> {
|
||||
{
|
||||
"from": edge["from"],
|
||||
"to": edge["to"],
|
||||
"label": edge["label"],
|
||||
"line": edge["line"],
|
||||
"arrow": arrow
|
||||
}
|
||||
}
|
||||
|
||||
// ── DiagramGroup ────────────────────────────────────────────────────────────
|
||||
|
||||
fn make_group(id: String, label: String) -> Map<String, Any> {
|
||||
let empty: [String] = native_list_empty()
|
||||
{
|
||||
"id": id,
|
||||
"label": label,
|
||||
"node_ids": empty,
|
||||
"direction": ""
|
||||
}
|
||||
}
|
||||
|
||||
fn with_node(group: Map<String, Any>, node_id: String) -> Map<String, Any> {
|
||||
let cur: [String] = group["node_ids"]
|
||||
let next: [String] = native_list_append(cur, node_id)
|
||||
{
|
||||
"id": group["id"],
|
||||
"label": group["label"],
|
||||
"node_ids": next,
|
||||
"direction": group["direction"]
|
||||
}
|
||||
}
|
||||
|
||||
fn with_nodes(group: Map<String, Any>, ids: [String]) -> Map<String, Any> {
|
||||
let cur: [String] = group["node_ids"]
|
||||
let n: Int = el_list_len(ids)
|
||||
let i = 0
|
||||
while i < n {
|
||||
let cur = native_list_append(cur, get(ids, i))
|
||||
let i = i + 1
|
||||
}
|
||||
{
|
||||
"id": group["id"],
|
||||
"label": group["label"],
|
||||
"node_ids": cur,
|
||||
"direction": group["direction"]
|
||||
}
|
||||
}
|
||||
|
||||
fn with_group_direction(group: Map<String, Any>, dir: String) -> Map<String, Any> {
|
||||
{
|
||||
"id": group["id"],
|
||||
"label": group["label"],
|
||||
"node_ids": group["node_ids"],
|
||||
"direction": dir
|
||||
}
|
||||
}
|
||||
|
||||
// ── DiagramGraph ────────────────────────────────────────────────────────────
|
||||
|
||||
fn make_graph(title: String) -> Map<String, Any> {
|
||||
let empty_n: [Map<String, Any>] = native_list_empty()
|
||||
let empty_e: [Map<String, Any>] = native_list_empty()
|
||||
let empty_g: [Map<String, Any>] = native_list_empty()
|
||||
{
|
||||
"title": title,
|
||||
"direction": "top-down",
|
||||
"nodes": empty_n,
|
||||
"edges": empty_e,
|
||||
"groups": empty_g
|
||||
}
|
||||
}
|
||||
|
||||
fn with_direction(graph: Map<String, Any>, dir: String) -> Map<String, Any> {
|
||||
{
|
||||
"title": graph["title"],
|
||||
"direction": dir,
|
||||
"nodes": graph["nodes"],
|
||||
"edges": graph["edges"],
|
||||
"groups": graph["groups"]
|
||||
}
|
||||
}
|
||||
|
||||
fn graph_add_node(graph: Map<String, Any>, node: Map<String, Any>) -> Map<String, Any> {
|
||||
let cur: [Map<String, Any>] = graph["nodes"]
|
||||
let next: [Map<String, Any>] = native_list_append(cur, node)
|
||||
{
|
||||
"title": graph["title"],
|
||||
"direction": graph["direction"],
|
||||
"nodes": next,
|
||||
"edges": graph["edges"],
|
||||
"groups": graph["groups"]
|
||||
}
|
||||
}
|
||||
|
||||
fn graph_add_edge(graph: Map<String, Any>, edge: Map<String, Any>) -> Map<String, Any> {
|
||||
let cur: [Map<String, Any>] = graph["edges"]
|
||||
let next: [Map<String, Any>] = native_list_append(cur, edge)
|
||||
{
|
||||
"title": graph["title"],
|
||||
"direction": graph["direction"],
|
||||
"nodes": graph["nodes"],
|
||||
"edges": next,
|
||||
"groups": graph["groups"]
|
||||
}
|
||||
}
|
||||
|
||||
fn graph_add_group(graph: Map<String, Any>, group: Map<String, Any>) -> Map<String, Any> {
|
||||
let cur: [Map<String, Any>] = graph["groups"]
|
||||
let next: [Map<String, Any>] = native_list_append(cur, group)
|
||||
{
|
||||
"title": graph["title"],
|
||||
"direction": graph["direction"],
|
||||
"nodes": graph["nodes"],
|
||||
"edges": graph["edges"],
|
||||
"groups": next
|
||||
}
|
||||
}
|
||||
|
||||
// Find a node by id. Returns an empty map (no "id" field) when not present.
|
||||
fn graph_node(graph: Map<String, Any>, id: String) -> Map<String, Any> {
|
||||
let nodes: [Map<String, Any>] = graph["nodes"]
|
||||
let n: Int = el_list_len(nodes)
|
||||
let i = 0
|
||||
while i < n {
|
||||
let nd: Map<String, Any> = get(nodes, i)
|
||||
let nid: String = nd["id"]
|
||||
if str_eq(nid, id) { return nd }
|
||||
let i = i + 1
|
||||
}
|
||||
let empty: Map<String, Any> = el_map_new(0)
|
||||
empty
|
||||
}
|
||||
|
||||
// ── Smoke test ──────────────────────────────────────────────────────────────
|
||||
|
||||
fn fail(label: String, got: String, want: String) -> Int {
|
||||
println("FAIL " + label + " got=[" + got + "] want=[" + want + "]")
|
||||
state_set("smoke_failures", "1")
|
||||
0
|
||||
}
|
||||
|
||||
fn check_eq(label: String, got: String, want: String) -> Int {
|
||||
if got == want {
|
||||
println("ok " + label + " = " + got)
|
||||
return 1
|
||||
}
|
||||
fail(label, got, want)
|
||||
}
|
||||
|
||||
// Vocabulary self-checks
|
||||
check_eq("shape rectangle valid",
|
||||
bool_to_str(node_shape_valid("rectangle")), "true")
|
||||
check_eq("shape hexagon invalid",
|
||||
bool_to_str(node_shape_valid("hexagon")), "false")
|
||||
check_eq("line dashed valid",
|
||||
bool_to_str(edge_line_valid("dashed")), "true")
|
||||
check_eq("arrow both valid",
|
||||
bool_to_str(edge_arrow_valid("both")), "true")
|
||||
check_eq("dir top-down valid",
|
||||
bool_to_str(direction_valid("top-down")), "true")
|
||||
|
||||
// Node builder
|
||||
let n0: Map<String, Any> = make_node("svc", "Service")
|
||||
check_eq("node default shape", n0["shape"], "rectangle")
|
||||
check_eq("node default sublabel empty", n0["sublabel"], "")
|
||||
|
||||
let n1: Map<String, Any> = with_shape(n0, "cylinder")
|
||||
check_eq("node with_shape", n1["shape"], "cylinder")
|
||||
check_eq("node id preserved", n1["id"], "svc")
|
||||
|
||||
let n2: Map<String, Any> = with_sublabel(n1, "v0.1.0")
|
||||
check_eq("node with_sublabel", n2["sublabel"], "v0.1.0")
|
||||
|
||||
let n3: Map<String, Any> = with_style(n2, "#0052A0", "#0052A0", "#ffffff")
|
||||
check_eq("node style fill", n3["style_fill"], "#0052A0")
|
||||
check_eq("node style color", n3["style_color"], "#ffffff")
|
||||
|
||||
// Edge builder
|
||||
let e0: Map<String, Any> = make_edge("a", "b")
|
||||
check_eq("edge default line", e0["line"], "solid")
|
||||
check_eq("edge default arrow", e0["arrow"], "forward")
|
||||
let e1: Map<String, Any> = with_line(e0, "dashed")
|
||||
let e2: Map<String, Any> = with_arrow(e1, "both")
|
||||
let e3: Map<String, Any> = with_label(e2, "calls")
|
||||
check_eq("edge line", e3["line"], "dashed")
|
||||
check_eq("edge arrow", e3["arrow"], "both")
|
||||
check_eq("edge label", e3["label"], "calls")
|
||||
|
||||
// Group builder
|
||||
let g0: Map<String, Any> = make_group("core", "Application Core")
|
||||
let g1: Map<String, Any> = with_node(g0, "api")
|
||||
let g2: Map<String, Any> = with_node(g1, "svc")
|
||||
let ids2: [String] = g2["node_ids"]
|
||||
check_eq("group with two nodes", int_to_str(el_list_len(ids2)), "2")
|
||||
|
||||
let g3: Map<String, Any> = make_group("infra", "Infrastructure")
|
||||
let extras: [String] = native_list_empty()
|
||||
let extras = native_list_append(extras, "db")
|
||||
let extras = native_list_append(extras, "cache")
|
||||
let g4: Map<String, Any> = with_nodes(g3, extras)
|
||||
let ids4: [String] = g4["node_ids"]
|
||||
check_eq("group with_nodes appends", int_to_str(el_list_len(ids4)), "2")
|
||||
|
||||
// Graph builder + lookup
|
||||
let G0: Map<String, Any> = make_graph("System")
|
||||
let G1: Map<String, Any> = with_direction(G0, "left-right")
|
||||
let G2: Map<String, Any> = graph_add_node(G1, n3)
|
||||
let nb: Map<String, Any> = make_node("b", "Backend")
|
||||
let G3: Map<String, Any> = graph_add_node(G2, nb)
|
||||
let G4: Map<String, Any> = graph_add_edge(G3, e3)
|
||||
let G5: Map<String, Any> = graph_add_group(G4, g4)
|
||||
|
||||
check_eq("graph title", G5["title"], "System")
|
||||
check_eq("graph direction", G5["direction"], "left-right")
|
||||
let gn: [Map<String, Any>] = G5["nodes"]
|
||||
let ge: [Map<String, Any>] = G5["edges"]
|
||||
let gg: [Map<String, Any>] = G5["groups"]
|
||||
check_eq("graph nodes count", int_to_str(el_list_len(gn)), "2")
|
||||
check_eq("graph edges count", int_to_str(el_list_len(ge)), "1")
|
||||
check_eq("graph groups count", int_to_str(el_list_len(gg)), "1")
|
||||
|
||||
let found: Map<String, Any> = graph_node(G5, "svc")
|
||||
check_eq("graph_node found", found["id"], "svc")
|
||||
let missing: Map<String, Any> = graph_node(G5, "nonexistent")
|
||||
let missing_id: String = missing["id"]
|
||||
if str_len(missing_id) == 0 {
|
||||
println("ok graph_node missing returns empty")
|
||||
} else {
|
||||
println("FAIL graph_node missing returned: " + missing_id)
|
||||
state_set("smoke_failures", "1")
|
||||
}
|
||||
|
||||
println("")
|
||||
let failures: String = state_get("smoke_failures")
|
||||
if str_eq(failures, "1") {
|
||||
println("arbor-diagram: FAILED")
|
||||
exit_program(1)
|
||||
} else {
|
||||
println("arbor-diagram: ok")
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
// arbor-layout — hierarchical layout engine. Assigns (x, y) positions to
|
||||
// every node, computes group bounding boxes, and the canvas size. Consumes
|
||||
// a diagram graph; produces a layout-result value.
|
||||
|
||||
vessel "arbor-layout" {
|
||||
version "0.1.0"
|
||||
description "Hierarchical layout engine — rank assignment, positioning, group bounds"
|
||||
authors ["Neuron Technologies"]
|
||||
edition "2026"
|
||||
}
|
||||
|
||||
dependencies {
|
||||
arbor-core "0.1"
|
||||
}
|
||||
|
||||
build {
|
||||
entry "src/main.el"
|
||||
output "dist/"
|
||||
}
|
||||
@@ -1,591 +0,0 @@
|
||||
// arbor-layout — hierarchical layout for diagram graphs.
|
||||
//
|
||||
// Public entry point:
|
||||
// fn arbor_layout(graph: Map<String, Any>) -> Map<String, Any>
|
||||
//
|
||||
// The graph is the lowered (diagram-form) shape. The result map has:
|
||||
// "node_pos_<id>" → { "x":Float, "y":Float } centre point
|
||||
// "node_size_<id>" → { "w":Float, "h":Float }
|
||||
// "group_bounds_<id>" → { "x":Float, "y":Float, "w":Float, "h":Float }
|
||||
// "node_ids" → [String] iteration order
|
||||
// "group_ids" → [String] iteration order
|
||||
// "canvas" → { "w":Float, "h":Float }
|
||||
//
|
||||
// Floats are El-encoded — store via the runtime's bit-cast convention.
|
||||
// All arithmetic on positions/sizes is done in Float; integers (rank index)
|
||||
// stay as Int.
|
||||
//
|
||||
// Algorithm (simplified Sugiyama):
|
||||
// 1. Assign ranks via topological propagation (longest path from sources).
|
||||
// 2. Group nodes by rank, preserving declaration order.
|
||||
// 3. Position each rank as a row (top-down/bottom-up) or column (LR/RL).
|
||||
// 4. Compute group bounding boxes from member positions.
|
||||
// 5. Compute canvas size to enclose everything.
|
||||
//
|
||||
// The current implementation is the same simplified Sugiyama as the Rust
|
||||
// version; perfectly identical numerical output is not promised but the
|
||||
// relative ordering and bounding-box semantics match.
|
||||
|
||||
// ── Spacing constants (declared as float-bit-cast helpers) ──────────────────
|
||||
|
||||
fn k_node_base_w() -> el_val_t { int_to_float(120) }
|
||||
fn k_node_base_h() -> el_val_t { int_to_float(40) }
|
||||
fn k_node_char_extra() -> el_val_t { int_to_float(8) }
|
||||
fn k_h_gap() -> el_val_t { int_to_float(60) }
|
||||
fn k_v_gap() -> el_val_t { int_to_float(80) }
|
||||
fn k_group_pad() -> el_val_t { int_to_float(20) }
|
||||
fn k_margin() -> el_val_t { int_to_float(40) }
|
||||
|
||||
// Float-aware max/min via int_to_float / float arithmetic — but el_max
|
||||
// works in raw int comparison space, so we bit-cast carefully.
|
||||
// For our purposes we only need monotonic comparisons on positive values,
|
||||
// which IEEE 754 doubles + sign-magnitude bit patterns happen to preserve
|
||||
// for non-negative floats — but it's safer to do the comparison via the
|
||||
// math layer. We use a helper that decodes both, picks the bigger, and
|
||||
// re-encodes.
|
||||
//
|
||||
// Implemented in C terms: math_max(a, b) — but el_runtime doesn't expose
|
||||
// a float-aware max, so we synthesise one.
|
||||
|
||||
fn fmax(a: el_val_t, b: el_val_t) -> el_val_t {
|
||||
// Compare via float subtraction's sign: a - b. Float subtraction is the
|
||||
// multiply chain implemented via the C code generator. But el's `-` on
|
||||
// bit-cast doubles doesn't perform IEEE arithmetic — it's a 64-bit int
|
||||
// subtract. Workaround: round-trip through format_float and str_to_float.
|
||||
// For our layout numbers (small non-negative integers stored as floats)
|
||||
// we can compare via the raw bits: a positive float's bit pattern is
|
||||
// monotonically ordered, so `a > b` on the int reinterpretation gives
|
||||
// the same result as on the actual double for non-negative values.
|
||||
if a > b { return a }
|
||||
b
|
||||
}
|
||||
|
||||
fn fadd(a: el_val_t, b: el_val_t) -> el_val_t {
|
||||
// a, b are bit-cast doubles. Safe addition: int-to-float, format, parse.
|
||||
// For the small positive integers we work with, we reconstruct the
|
||||
// numeric value via format_float → str_to_float, perform addition by
|
||||
// pulling them through str representations. Costly but correct on the
|
||||
// current runtime. Fast path: if both are exact ints stored as floats
|
||||
// we can also keep an Int "shadow" — but the simpler approach is to
|
||||
// route through the printf-based formatter once per layout pass.
|
||||
let as: String = format_float(a, 6)
|
||||
let bs: String = format_float(b, 6)
|
||||
// Parse back to numeric.
|
||||
let af: el_val_t = str_to_float(as)
|
||||
let bf: el_val_t = str_to_float(bs)
|
||||
// No real-add primitive; build the sum from int parts where possible.
|
||||
// Convert to int at full resolution: float_to_int truncates towards zero,
|
||||
// which for our values (always integer-valued) is exact.
|
||||
let ai: Int = float_to_int(af)
|
||||
let bi: Int = float_to_int(bf)
|
||||
int_to_float(ai + bi)
|
||||
}
|
||||
|
||||
fn fsub(a: el_val_t, b: el_val_t) -> el_val_t {
|
||||
let ai: Int = float_to_int(a)
|
||||
let bi: Int = float_to_int(b)
|
||||
int_to_float(ai - bi)
|
||||
}
|
||||
|
||||
fn fmul(a: el_val_t, b: el_val_t) -> el_val_t {
|
||||
let ai: Int = float_to_int(a)
|
||||
let bi: Int = float_to_int(b)
|
||||
int_to_float(ai * bi)
|
||||
}
|
||||
|
||||
fn fdiv2(a: el_val_t) -> el_val_t {
|
||||
let ai: Int = float_to_int(a)
|
||||
int_to_float(ai / 2)
|
||||
}
|
||||
|
||||
// ── Node size based on label width ──────────────────────────────────────────
|
||||
|
||||
fn node_size_for(label: String) -> Map<String, Any> {
|
||||
let len: Int = str_len(label)
|
||||
let extra: Int = 0
|
||||
if len > 10 {
|
||||
let extra = len - 10
|
||||
}
|
||||
let w_int: Int = 120 + 8 * extra
|
||||
let w: el_val_t = int_to_float(w_int)
|
||||
let h: el_val_t = int_to_float(40)
|
||||
{ "w": w, "h": h }
|
||||
}
|
||||
|
||||
// ── Adjacency-list construction ─────────────────────────────────────────────
|
||||
//
|
||||
// Builds successor and in-degree maps keyed by node id.
|
||||
|
||||
fn build_succ_indeg(graph: Map<String, Any>) -> Map<String, Any> {
|
||||
let nodes: [Map<String, Any>] = graph["nodes"]
|
||||
let edges: [Map<String, Any>] = graph["edges"]
|
||||
let n: Int = el_list_len(nodes)
|
||||
let m: Int = el_list_len(edges)
|
||||
|
||||
let succ: Map<String, Any> = el_map_new(0)
|
||||
let indeg: Map<String, Any> = el_map_new(0)
|
||||
|
||||
let i = 0
|
||||
while i < n {
|
||||
let nd: Map<String, Any> = get(nodes, i)
|
||||
let nid: String = nd["id"]
|
||||
let empty: [String] = el_list_empty()
|
||||
let succ = el_map_set(succ, nid, empty)
|
||||
let indeg = el_map_set(indeg, nid, 0)
|
||||
let i = i + 1
|
||||
}
|
||||
|
||||
let i = 0
|
||||
while i < m {
|
||||
let e: Map<String, Any> = get(edges, i)
|
||||
let src: String = e["from"]
|
||||
let dst: String = e["to"]
|
||||
let cur_succ: [String] = el_map_get(succ, src)
|
||||
let new_succ: [String] = native_list_append(cur_succ, dst)
|
||||
let succ = el_map_set(succ, src, new_succ)
|
||||
let prev: Int = el_map_get(indeg, dst)
|
||||
let indeg = el_map_set(indeg, dst, prev + 1)
|
||||
let i = i + 1
|
||||
}
|
||||
|
||||
{ "succ": succ, "indeg": indeg }
|
||||
}
|
||||
|
||||
// ── Topological rank assignment ─────────────────────────────────────────────
|
||||
//
|
||||
// Returns a map: node_id → rank.
|
||||
|
||||
fn assign_ranks(graph: Map<String, Any>) -> Map<String, Any> {
|
||||
let nodes: [Map<String, Any>] = graph["nodes"]
|
||||
let n: Int = el_list_len(nodes)
|
||||
let adj: Map<String, Any> = build_succ_indeg(graph)
|
||||
let succ: Map<String, Any> = adj["succ"]
|
||||
let indeg: Map<String, Any> = adj["indeg"]
|
||||
|
||||
let ranks: Map<String, Any> = el_map_new(0)
|
||||
let i = 0
|
||||
while i < n {
|
||||
let nd: Map<String, Any> = get(nodes, i)
|
||||
let nid: String = nd["id"]
|
||||
let ranks = el_map_set(ranks, nid, 0)
|
||||
let i = i + 1
|
||||
}
|
||||
|
||||
// Initialise queue with all nodes whose in-degree is 0 (in declaration
|
||||
// order, mirroring the Rust implementation's ordering guarantee).
|
||||
let queue: [String] = el_list_empty()
|
||||
let i = 0
|
||||
while i < n {
|
||||
let nd: Map<String, Any> = get(nodes, i)
|
||||
let nid: String = nd["id"]
|
||||
let d: Int = el_map_get(indeg, nid)
|
||||
if d == 0 {
|
||||
let queue = native_list_append(queue, nid)
|
||||
}
|
||||
let i = i + 1
|
||||
}
|
||||
|
||||
let head = 0
|
||||
let running = true
|
||||
while running {
|
||||
if head >= el_list_len(queue) {
|
||||
let running = false
|
||||
} else {
|
||||
let cur: String = get(queue, head)
|
||||
let head = head + 1
|
||||
let cur_rank: Int = el_map_get(ranks, cur)
|
||||
let neighbours: [String] = el_map_get(succ, cur)
|
||||
let nn: Int = el_list_len(neighbours)
|
||||
let j = 0
|
||||
while j < nn {
|
||||
let nb: String = get(neighbours, j)
|
||||
let nb_rank: Int = el_map_get(ranks, nb)
|
||||
let cand: Int = cur_rank + 1
|
||||
if cand > nb_rank {
|
||||
let ranks = el_map_set(ranks, nb, cand)
|
||||
}
|
||||
let cur_d: Int = el_map_get(indeg, nb)
|
||||
let new_d: Int = cur_d - 1
|
||||
let indeg = el_map_set(indeg, nb, new_d)
|
||||
if new_d <= 0 {
|
||||
let queue = native_list_append(queue, nb)
|
||||
}
|
||||
let j = j + 1
|
||||
}
|
||||
}
|
||||
}
|
||||
ranks
|
||||
}
|
||||
|
||||
// ── Layout pass ─────────────────────────────────────────────────────────────
|
||||
|
||||
fn arbor_layout(graph: Map<String, Any>) -> Map<String, Any> {
|
||||
let nodes: [Map<String, Any>] = graph["nodes"]
|
||||
let n: Int = el_list_len(nodes)
|
||||
let direction: String = graph["direction"]
|
||||
|
||||
let result: Map<String, Any> = el_map_new(0)
|
||||
let result = el_map_set(result, "node_ids", el_list_empty())
|
||||
let result = el_map_set(result, "group_ids", el_list_empty())
|
||||
|
||||
if n == 0 {
|
||||
let canvas: Map<String, Any> = { "w": int_to_float(200), "h": int_to_float(100) }
|
||||
let result = el_map_set(result, "canvas", canvas)
|
||||
return result
|
||||
}
|
||||
|
||||
let ranks: Map<String, Any> = assign_ranks(graph)
|
||||
let max_rank = 0
|
||||
let i = 0
|
||||
while i < n {
|
||||
let nd: Map<String, Any> = get(nodes, i)
|
||||
let nid: String = nd["id"]
|
||||
let r: Int = el_map_get(ranks, nid)
|
||||
if r > max_rank { let max_rank = r }
|
||||
let i = i + 1
|
||||
}
|
||||
|
||||
// Group nodes by rank, preserving declaration order. Buckets are stored
|
||||
// in process state so we can iterate without nested-list mutation.
|
||||
let i = 0
|
||||
while i <= max_rank {
|
||||
state_set("rank_bucket_" + int_to_str(i), "")
|
||||
let i = i + 1
|
||||
}
|
||||
let i = 0
|
||||
while i < n {
|
||||
let nd: Map<String, Any> = get(nodes, i)
|
||||
let nid: String = nd["id"]
|
||||
let r: Int = el_map_get(ranks, nid)
|
||||
let key = "rank_bucket_" + int_to_str(r)
|
||||
let prev: String = state_get(key)
|
||||
if str_eq(prev, "") {
|
||||
state_set(key, nid)
|
||||
} else {
|
||||
state_set(key, prev + "" + nid)
|
||||
}
|
||||
let i = i + 1
|
||||
}
|
||||
|
||||
// Pre-compute sizes and stash a label-keyed cache.
|
||||
let id_list: [String] = el_list_empty()
|
||||
let i = 0
|
||||
while i < n {
|
||||
let nd: Map<String, Any> = get(nodes, i)
|
||||
let nid: String = nd["id"]
|
||||
let lbl: String = nd["label"]
|
||||
let sz: Map<String, Any> = node_size_for(lbl)
|
||||
let result = el_map_set(result, "node_size_" + nid, sz)
|
||||
let id_list = native_list_append(id_list, nid)
|
||||
let i = i + 1
|
||||
}
|
||||
let result = el_map_set(result, "node_ids", id_list)
|
||||
|
||||
// Position pass.
|
||||
let is_vertical = true
|
||||
if str_eq(direction, "left-right") { let is_vertical = false }
|
||||
if str_eq(direction, "right-left") { let is_vertical = false }
|
||||
|
||||
let cursor: el_val_t = k_margin()
|
||||
|
||||
let r = 0
|
||||
while r <= max_rank {
|
||||
let bucket_str: String = state_get("rank_bucket_" + int_to_str(r))
|
||||
if !str_eq(bucket_str, "") {
|
||||
let ids: [String] = str_split(bucket_str, "")
|
||||
let ids_n: Int = el_list_len(ids)
|
||||
|
||||
// Track row height (for vertical) or column width (for horizontal).
|
||||
let cross_max: el_val_t = int_to_float(40)
|
||||
let j = 0
|
||||
while j < ids_n {
|
||||
let nid: String = get(ids, j)
|
||||
let sz: Map<String, Any> = el_map_get(result, "node_size_" + nid)
|
||||
if is_vertical {
|
||||
let h: el_val_t = sz["h"]
|
||||
let cross_max = fmax(cross_max, h)
|
||||
} else {
|
||||
let w: el_val_t = sz["w"]
|
||||
let cross_max = fmax(cross_max, w)
|
||||
}
|
||||
let j = j + 1
|
||||
}
|
||||
|
||||
if is_vertical {
|
||||
let row_h: el_val_t = cross_max
|
||||
let y_center: el_val_t = fadd(cursor, fdiv2(row_h))
|
||||
let x_cursor: el_val_t = k_margin()
|
||||
let j = 0
|
||||
while j < ids_n {
|
||||
let nid: String = get(ids, j)
|
||||
let sz: Map<String, Any> = el_map_get(result, "node_size_" + nid)
|
||||
let w: el_val_t = sz["w"]
|
||||
let cx: el_val_t = fadd(x_cursor, fdiv2(w))
|
||||
let pos: Map<String, Any> = { "x": cx, "y": y_center }
|
||||
let result = el_map_set(result, "node_pos_" + nid, pos)
|
||||
let x_cursor = fadd(fadd(x_cursor, w), k_h_gap())
|
||||
let j = j + 1
|
||||
}
|
||||
let cursor = fadd(fadd(cursor, row_h), k_v_gap())
|
||||
} else {
|
||||
let col_w: el_val_t = cross_max
|
||||
let x_center: el_val_t = fadd(cursor, fdiv2(col_w))
|
||||
let y_cursor: el_val_t = k_margin()
|
||||
let j = 0
|
||||
while j < ids_n {
|
||||
let nid: String = get(ids, j)
|
||||
let sz: Map<String, Any> = el_map_get(result, "node_size_" + nid)
|
||||
let h: el_val_t = sz["h"]
|
||||
let cy: el_val_t = fadd(y_cursor, fdiv2(h))
|
||||
let pos: Map<String, Any> = { "x": x_center, "y": cy }
|
||||
let result = el_map_set(result, "node_pos_" + nid, pos)
|
||||
let y_cursor = fadd(fadd(y_cursor, h), k_v_gap())
|
||||
let j = j + 1
|
||||
}
|
||||
let cursor = fadd(fadd(cursor, col_w), k_h_gap())
|
||||
}
|
||||
} else {
|
||||
// Empty bucket — advance cursor by a default node size.
|
||||
if is_vertical {
|
||||
let cursor = fadd(cursor, fadd(int_to_float(40), k_v_gap()))
|
||||
} else {
|
||||
let cursor = fadd(cursor, fadd(k_node_base_w(), k_h_gap()))
|
||||
}
|
||||
}
|
||||
let r = r + 1
|
||||
}
|
||||
|
||||
// Direction inversions for BU / RL.
|
||||
let need_flip_y = false
|
||||
let need_flip_x = false
|
||||
if str_eq(direction, "bottom-up") { let need_flip_y = true }
|
||||
if str_eq(direction, "right-left") { let need_flip_x = true }
|
||||
|
||||
if need_flip_y {
|
||||
let max_y: el_val_t = fadd(fsub(cursor, k_v_gap()), k_margin())
|
||||
let i = 0
|
||||
while i < n {
|
||||
let nid: String = get(id_list, i)
|
||||
let pos: Map<String, Any> = el_map_get(result, "node_pos_" + nid)
|
||||
let y: el_val_t = pos["y"]
|
||||
let new_y: el_val_t = fadd(fsub(max_y, y), k_margin())
|
||||
let new_pos: Map<String, Any> = { "x": pos["x"], "y": new_y }
|
||||
let result = el_map_set(result, "node_pos_" + nid, new_pos)
|
||||
let i = i + 1
|
||||
}
|
||||
}
|
||||
if need_flip_x {
|
||||
let max_x: el_val_t = fadd(fsub(cursor, k_h_gap()), k_margin())
|
||||
let i = 0
|
||||
while i < n {
|
||||
let nid: String = get(id_list, i)
|
||||
let pos: Map<String, Any> = el_map_get(result, "node_pos_" + nid)
|
||||
let x: el_val_t = pos["x"]
|
||||
let new_x: el_val_t = fadd(fsub(max_x, x), k_margin())
|
||||
let new_pos: Map<String, Any> = { "x": new_x, "y": pos["y"] }
|
||||
let result = el_map_set(result, "node_pos_" + nid, new_pos)
|
||||
let i = i + 1
|
||||
}
|
||||
}
|
||||
|
||||
// Group bounds.
|
||||
let groups: [Map<String, Any>] = graph["groups"]
|
||||
let gn: Int = el_list_len(groups)
|
||||
let gid_list: [String] = el_list_empty()
|
||||
let g = 0
|
||||
while g < gn {
|
||||
let grp: Map<String, Any> = get(groups, g)
|
||||
let gid: String = grp["id"]
|
||||
let member_ids: [String] = grp["node_ids"]
|
||||
let mn: Int = el_list_len(member_ids)
|
||||
if mn > 0 {
|
||||
let big: Int = 1000000000
|
||||
let neg: Int = 0 - 1000000000
|
||||
let min_x: el_val_t = int_to_float(big)
|
||||
let min_y: el_val_t = int_to_float(big)
|
||||
let max_x: el_val_t = int_to_float(neg)
|
||||
let max_y: el_val_t = int_to_float(neg)
|
||||
let mi = 0
|
||||
while mi < mn {
|
||||
let mid: String = get(member_ids, mi)
|
||||
let mpos: Map<String, Any> = el_map_get(result, "node_pos_" + mid)
|
||||
let msz: Map<String, Any> = el_map_get(result, "node_size_" + mid)
|
||||
let mid_present: String = mpos["x"]
|
||||
if str_len(mid_present) >= 0 {
|
||||
let cx: el_val_t = mpos["x"]
|
||||
let cy: el_val_t = mpos["y"]
|
||||
let mw: el_val_t = msz["w"]
|
||||
let mh: el_val_t = msz["h"]
|
||||
let left: el_val_t = fsub(cx, fdiv2(mw))
|
||||
let right: el_val_t = fadd(cx, fdiv2(mw))
|
||||
let top: el_val_t = fsub(cy, fdiv2(mh))
|
||||
let bot: el_val_t = fadd(cy, fdiv2(mh))
|
||||
if left < min_x { let min_x = left }
|
||||
if top < min_y { let min_y = top }
|
||||
if right > max_x { let max_x = right }
|
||||
if bot > max_y { let max_y = bot }
|
||||
}
|
||||
let mi = mi + 1
|
||||
}
|
||||
let bx: el_val_t = fsub(min_x, k_group_pad())
|
||||
let by: el_val_t = fsub(min_y, k_group_pad())
|
||||
let bw: el_val_t = fadd(fsub(max_x, min_x), fmul(k_group_pad(), int_to_float(2)))
|
||||
let bh: el_val_t = fadd(fsub(max_y, min_y), fmul(k_group_pad(), int_to_float(2)))
|
||||
let bounds: Map<String, Any> = { "x": bx, "y": by, "w": bw, "h": bh }
|
||||
let result = el_map_set(result, "group_bounds_" + gid, bounds)
|
||||
let gid_list = native_list_append(gid_list, gid)
|
||||
}
|
||||
let g = g + 1
|
||||
}
|
||||
let result = el_map_set(result, "group_ids", gid_list)
|
||||
|
||||
// Canvas size = max node-right / node-bottom + group-right / group-bottom.
|
||||
let canvas_w: el_val_t = int_to_float(0)
|
||||
let canvas_h: el_val_t = int_to_float(0)
|
||||
let i = 0
|
||||
while i < n {
|
||||
let nid: String = get(id_list, i)
|
||||
let pos: Map<String, Any> = el_map_get(result, "node_pos_" + nid)
|
||||
let sz: Map<String, Any> = el_map_get(result, "node_size_" + nid)
|
||||
let right: el_val_t = fadd(pos["x"], fdiv2(sz["w"]))
|
||||
let bottom: el_val_t = fadd(pos["y"], fdiv2(sz["h"]))
|
||||
if right > canvas_w { let canvas_w = right }
|
||||
if bottom > canvas_h { let canvas_h = bottom }
|
||||
let i = i + 1
|
||||
}
|
||||
let i = 0
|
||||
while i < el_list_len(gid_list) {
|
||||
let gid: String = get(gid_list, i)
|
||||
let b: Map<String, Any> = el_map_get(result, "group_bounds_" + gid)
|
||||
let r: el_val_t = fadd(b["x"], b["w"])
|
||||
let bt: el_val_t = fadd(b["y"], b["h"])
|
||||
if r > canvas_w { let canvas_w = r }
|
||||
if bt > canvas_h { let canvas_h = bt }
|
||||
let i = i + 1
|
||||
}
|
||||
let canvas: Map<String, Any> = {
|
||||
"w": fadd(canvas_w, k_margin()),
|
||||
"h": fadd(canvas_h, k_margin())
|
||||
}
|
||||
let result = el_map_set(result, "canvas", canvas)
|
||||
result
|
||||
}
|
||||
|
||||
// ── Smoke test ──────────────────────────────────────────────────────────────
|
||||
|
||||
fn fl_to_str(v: el_val_t) -> String {
|
||||
int_to_str(float_to_int(v))
|
||||
}
|
||||
|
||||
fn smoke_fail(label: String, msg: String) -> Int {
|
||||
println("FAIL " + label + ": " + msg)
|
||||
state_set("smoke_failures", "1")
|
||||
0
|
||||
}
|
||||
|
||||
fn make_test_node(id: String, label: String) -> Map<String, Any> {
|
||||
{
|
||||
"id": id, "label": label, "sublabel": "",
|
||||
"shape": "rectangle",
|
||||
"style_fill": "", "style_stroke": "", "style_color": ""
|
||||
}
|
||||
}
|
||||
|
||||
fn make_test_edge(src: String, dst: String) -> Map<String, Any> {
|
||||
{ "from": src, "to": dst, "label": "", "line": "solid", "arrow": "forward" }
|
||||
}
|
||||
|
||||
fn make_test_graph(direction: String, ids: [String], src_dst: [String]) -> Map<String, Any> {
|
||||
let nodes: [Map<String, Any>] = el_list_empty()
|
||||
let i = 0
|
||||
while i < el_list_len(ids) {
|
||||
let nid: String = get(ids, i)
|
||||
let nodes = native_list_append(nodes, make_test_node(nid, nid))
|
||||
let i = i + 1
|
||||
}
|
||||
let edges: [Map<String, Any>] = el_list_empty()
|
||||
let i = 0
|
||||
while i + 1 < el_list_len(src_dst) {
|
||||
let s: String = get(src_dst, i)
|
||||
let d: String = get(src_dst, i + 1)
|
||||
let edges = native_list_append(edges, make_test_edge(s, d))
|
||||
let i = i + 2
|
||||
}
|
||||
{
|
||||
"title": "T", "direction": direction,
|
||||
"nodes": nodes, "edges": edges, "groups": el_list_empty()
|
||||
}
|
||||
}
|
||||
|
||||
// Empty graph.
|
||||
let g_empty: Map<String, Any> = {
|
||||
"title": "e", "direction": "top-down",
|
||||
"nodes": el_list_empty(), "edges": el_list_empty(), "groups": el_list_empty()
|
||||
}
|
||||
let r_empty: Map<String, Any> = arbor_layout(g_empty)
|
||||
let canvas_empty: Map<String, Any> = r_empty["canvas"]
|
||||
println("empty canvas w=" + fl_to_str(canvas_empty["w"]))
|
||||
|
||||
// Single node.
|
||||
let g_one: Map<String, Any> = make_test_graph("top-down",
|
||||
["solo"], el_list_empty())
|
||||
let r_one: Map<String, Any> = arbor_layout(g_one)
|
||||
let pos_solo: Map<String, Any> = el_map_get(r_one, "node_pos_solo")
|
||||
let x_solo: el_val_t = pos_solo["x"]
|
||||
let y_solo: el_val_t = pos_solo["y"]
|
||||
println("solo at x=" + fl_to_str(x_solo) + " y=" + fl_to_str(y_solo))
|
||||
if float_to_int(x_solo) <= 0 { smoke_fail("solo x", "expected > 0") }
|
||||
if float_to_int(y_solo) <= 0 { smoke_fail("solo y", "expected > 0") }
|
||||
|
||||
// Linear chain a→b→c top-down: ya < yb < yc.
|
||||
let g_chain: Map<String, Any> = make_test_graph("top-down",
|
||||
["a", "b", "c"], ["a", "b", "b", "c"])
|
||||
let r_chain: Map<String, Any> = arbor_layout(g_chain)
|
||||
let pa: Map<String, Any> = el_map_get(r_chain, "node_pos_a")
|
||||
let pb: Map<String, Any> = el_map_get(r_chain, "node_pos_b")
|
||||
let pc: Map<String, Any> = el_map_get(r_chain, "node_pos_c")
|
||||
let ya: el_val_t = pa["y"]
|
||||
let yb: el_val_t = pb["y"]
|
||||
let yc: el_val_t = pc["y"]
|
||||
println("td a.y=" + fl_to_str(ya) + " b.y=" + fl_to_str(yb) + " c.y=" + fl_to_str(yc))
|
||||
if float_to_int(ya) >= float_to_int(yb) { smoke_fail("td order", "a.y >= b.y") }
|
||||
if float_to_int(yb) >= float_to_int(yc) { smoke_fail("td order", "b.y >= c.y") }
|
||||
|
||||
// LR direction
|
||||
let g_lr: Map<String, Any> = make_test_graph("left-right",
|
||||
["a", "b", "c"], ["a", "b", "b", "c"])
|
||||
let r_lr: Map<String, Any> = arbor_layout(g_lr)
|
||||
let pa2: Map<String, Any> = el_map_get(r_lr, "node_pos_a")
|
||||
let pc2: Map<String, Any> = el_map_get(r_lr, "node_pos_c")
|
||||
let xa: el_val_t = pa2["x"]
|
||||
let xc: el_val_t = pc2["x"]
|
||||
println("lr a.x=" + fl_to_str(xa) + " c.x=" + fl_to_str(xc))
|
||||
if float_to_int(xa) >= float_to_int(xc) { smoke_fail("lr order", "a.x >= c.x") }
|
||||
|
||||
// Bottom-up: a is below c.
|
||||
let g_bu: Map<String, Any> = make_test_graph("bottom-up",
|
||||
["a", "b", "c"], ["a", "b", "b", "c"])
|
||||
let r_bu: Map<String, Any> = arbor_layout(g_bu)
|
||||
let pa3: Map<String, Any> = el_map_get(r_bu, "node_pos_a")
|
||||
let pc3: Map<String, Any> = el_map_get(r_bu, "node_pos_c")
|
||||
let ya3: el_val_t = pa3["y"]
|
||||
let yc3: el_val_t = pc3["y"]
|
||||
println("bu a.y=" + fl_to_str(ya3) + " c.y=" + fl_to_str(yc3))
|
||||
if float_to_int(ya3) <= float_to_int(yc3) { smoke_fail("bu order", "a.y <= c.y") }
|
||||
|
||||
// Canvas covers all nodes.
|
||||
let canvas_chain: Map<String, Any> = r_chain["canvas"]
|
||||
let cw: el_val_t = canvas_chain["w"]
|
||||
let ch: el_val_t = canvas_chain["h"]
|
||||
println("chain canvas w=" + fl_to_str(cw) + " h=" + fl_to_str(ch))
|
||||
if float_to_int(cw) <= 0 { smoke_fail("canvas w", "non-positive") }
|
||||
if float_to_int(ch) <= 0 { smoke_fail("canvas h", "non-positive") }
|
||||
|
||||
println("")
|
||||
let f: String = state_get("smoke_failures")
|
||||
if str_eq(f, "1") {
|
||||
println("arbor-layout: FAILED")
|
||||
exit_program(1)
|
||||
} else {
|
||||
println("arbor-layout: ok")
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
// arbor-parse — hand-written recursive-descent parser for the .arbor source
|
||||
// language. Produces an Arbor graph value consumable by arbor-layout and
|
||||
// arbor-render.
|
||||
|
||||
vessel "arbor-parse" {
|
||||
version "0.1.0"
|
||||
description "Recursive-descent parser for the .arbor diagram language"
|
||||
authors ["Neuron Technologies"]
|
||||
edition "2026"
|
||||
}
|
||||
|
||||
dependencies {
|
||||
arbor-core "0.1"
|
||||
}
|
||||
|
||||
build {
|
||||
entry "src/main.el"
|
||||
output "dist/"
|
||||
}
|
||||
@@ -1,763 +0,0 @@
|
||||
// arbor-parse — recursive-descent parser for the .arbor source language.
|
||||
//
|
||||
// This vessel inlines a private copy of the small set of arbor-core helpers
|
||||
// it needs (sanitize_id and constructors). El's import form today is purely
|
||||
// syntactic concatenation, so each vessel that wants to be its own buildable
|
||||
// unit carries its own copy of these helpers. They're tiny (well under 100
|
||||
// lines) and the duplication keeps each vessel hermetic.
|
||||
//
|
||||
// Public entry point: fn arbor_parse(source: String) -> Map<String, Any>
|
||||
//
|
||||
// Returns either a graph value or a parse-error map. Callers test for the
|
||||
// "error" field:
|
||||
// { "error": "..." , "line": Int, "text": "...source line..." } on failure
|
||||
// { "title", "direction", "nodes", "edges", "groups" } on success
|
||||
|
||||
// ── Sanitisation (copy of arbor-core's sanitize_id) ──────────────────────────
|
||||
|
||||
fn is_alnum_underscore(ch: String) -> Bool {
|
||||
let code: Int = str_char_code(ch, 0)
|
||||
if code >= 48 {
|
||||
if code <= 57 { return true }
|
||||
}
|
||||
if code >= 65 {
|
||||
if code <= 90 { return true }
|
||||
}
|
||||
if code >= 97 {
|
||||
if code <= 122 { return true }
|
||||
}
|
||||
if code == 95 { return true }
|
||||
false
|
||||
}
|
||||
|
||||
fn is_ascii_digit(ch: String) -> Bool {
|
||||
let code: Int = str_char_code(ch, 0)
|
||||
if code >= 48 {
|
||||
if code <= 57 { return true }
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn sanitize_id(s: String) -> String {
|
||||
let n: Int = str_len(s)
|
||||
if n == 0 { return "node" }
|
||||
|
||||
let out = ""
|
||||
let prev_underscore = false
|
||||
let i = 0
|
||||
while i < n {
|
||||
let ch: String = str_char_at(s, i)
|
||||
if is_alnum_underscore(ch) {
|
||||
let out = out + ch
|
||||
let prev_underscore = false
|
||||
} else {
|
||||
if !prev_underscore {
|
||||
let out = out + "_"
|
||||
}
|
||||
let prev_underscore = true
|
||||
}
|
||||
let i = i + 1
|
||||
}
|
||||
|
||||
let m: Int = str_len(out)
|
||||
let end = m
|
||||
let stripping = true
|
||||
while stripping {
|
||||
if end <= 0 {
|
||||
let stripping = false
|
||||
} else {
|
||||
let last: String = str_char_at(out, end - 1)
|
||||
if last == "_" {
|
||||
let end = end - 1
|
||||
} else {
|
||||
let stripping = false
|
||||
}
|
||||
}
|
||||
}
|
||||
let out = str_slice(out, 0, end)
|
||||
|
||||
if str_len(out) == 0 { return "node" }
|
||||
|
||||
let first: String = str_char_at(out, 0)
|
||||
if is_ascii_digit(first) {
|
||||
let out = "n" + out
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn shape_from_token(tok: String) -> String {
|
||||
let t: String = str_trim(tok)
|
||||
if t == "rect" { return "rect" }
|
||||
if t == "rounded" { return "rounded" }
|
||||
if t == "cylinder" { return "cylinder" }
|
||||
if t == "diamond" { return "diamond" }
|
||||
if t == "stadium" { return "stadium" }
|
||||
if t == "primary" { return "primary" }
|
||||
""
|
||||
}
|
||||
|
||||
// ── Line preprocessing ──────────────────────────────────────────────────────
|
||||
//
|
||||
// Strip inline `// ...` comments, trim, drop empties. Returns a list of maps
|
||||
// { "no": Int, "text": String }.
|
||||
|
||||
fn preprocess(source: String) -> [Map<String, Any>] {
|
||||
let lines: [String] = str_split(source, "\n")
|
||||
let n: Int = el_list_len(lines)
|
||||
let out: [Map<String, Any>] = el_list_empty()
|
||||
let i = 0
|
||||
while i < n {
|
||||
let raw: String = get(lines, i)
|
||||
let cidx: Int = str_index_of(raw, "//")
|
||||
let stripped = raw
|
||||
if cidx >= 0 {
|
||||
let stripped = str_slice(raw, 0, cidx)
|
||||
}
|
||||
let trimmed: String = str_trim(stripped)
|
||||
if str_len(trimmed) > 0 {
|
||||
let row: Map<String, Any> = { "no": i + 1, "text": trimmed }
|
||||
let out = native_list_append(out, row)
|
||||
}
|
||||
let i = i + 1
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
// ── Quoted-string extraction ────────────────────────────────────────────────
|
||||
//
|
||||
// Parses `"text"`-prefix from a string. Returns `{ "ok": Bool, "value": Str,
|
||||
// "rest": Str }`. The `rest` field carries everything after the closing quote
|
||||
// (so the caller can continue tokenising).
|
||||
|
||||
fn parse_quoted(s: String) -> Map<String, Any> {
|
||||
let t: String = str_trim(s)
|
||||
if str_len(t) < 2 {
|
||||
return { "ok": false, "value": "", "rest": s }
|
||||
}
|
||||
let first: String = str_char_at(t, 0)
|
||||
if first != "\"" {
|
||||
return { "ok": false, "value": "", "rest": s }
|
||||
}
|
||||
let body: String = str_slice(t, 1, str_len(t))
|
||||
let close: Int = str_index_of(body, "\"")
|
||||
if close < 0 {
|
||||
return { "ok": false, "value": "", "rest": s }
|
||||
}
|
||||
let inner: String = str_slice(body, 0, close)
|
||||
let rest: String = str_slice(body, close + 1, str_len(body))
|
||||
{ "ok": true, "value": inner, "rest": rest }
|
||||
}
|
||||
|
||||
// ── Identifier prefix split ─────────────────────────────────────────────────
|
||||
//
|
||||
// `split_identifier("foo bar")` → { "id": "foo", "rest": " bar" }.
|
||||
// `split_identifier("a-b")` → { "id": "a", "rest": "-b" }.
|
||||
|
||||
fn split_identifier(s: String) -> Map<String, Any> {
|
||||
let n: Int = str_len(s)
|
||||
let i = 0
|
||||
while i < n {
|
||||
let ch: String = str_char_at(s, i)
|
||||
if !is_alnum_underscore(ch) {
|
||||
return { "id": str_slice(s, 0, i), "rest": str_slice(s, i, n) }
|
||||
}
|
||||
let i = i + 1
|
||||
}
|
||||
{ "id": s, "rest": "" }
|
||||
}
|
||||
|
||||
// ── Direction parsing ───────────────────────────────────────────────────────
|
||||
|
||||
fn parse_direction(s: String) -> String {
|
||||
let t: String = str_trim(s)
|
||||
if t == "top-down" { return "top-down" }
|
||||
if t == "TD" { return "top-down" }
|
||||
if t == "left-right" { return "left-right" }
|
||||
if t == "LR" { return "left-right" }
|
||||
if t == "right-left" { return "right-left" }
|
||||
if t == "RL" { return "right-left" }
|
||||
if t == "bottom-up" { return "bottom-up" }
|
||||
if t == "BU" { return "bottom-up" }
|
||||
""
|
||||
}
|
||||
|
||||
// ── Edge-arrow detection ────────────────────────────────────────────────────
|
||||
//
|
||||
// Detects the longest matching arrow token in a line, returning
|
||||
// { "ok": Bool, "from_str": Str, "kind": Str, "rest": Str }
|
||||
|
||||
fn extract_edge_parts(line: String) -> Map<String, Any> {
|
||||
// Order: longest first to avoid partial matches.
|
||||
let f1: Int = str_index_of(line, "-/->")
|
||||
if f1 >= 0 {
|
||||
return { "ok": true,
|
||||
"from_str": str_slice(line, 0, f1),
|
||||
"kind": "forbidden",
|
||||
"rest": str_slice(line, f1 + 4, str_len(line)) }
|
||||
}
|
||||
let f2: Int = str_index_of(line, "<->")
|
||||
if f2 >= 0 {
|
||||
return { "ok": true,
|
||||
"from_str": str_slice(line, 0, f2),
|
||||
"kind": "bidirectional",
|
||||
"rest": str_slice(line, f2 + 3, str_len(line)) }
|
||||
}
|
||||
let f3: Int = str_index_of(line, "-->")
|
||||
if f3 >= 0 {
|
||||
return { "ok": true,
|
||||
"from_str": str_slice(line, 0, f3),
|
||||
"kind": "dashed",
|
||||
"rest": str_slice(line, f3 + 3, str_len(line)) }
|
||||
}
|
||||
let f4: Int = str_index_of(line, "->")
|
||||
if f4 >= 0 {
|
||||
return { "ok": true,
|
||||
"from_str": str_slice(line, 0, f4),
|
||||
"kind": "solid",
|
||||
"rest": str_slice(line, f4 + 2, str_len(line)) }
|
||||
}
|
||||
{ "ok": false, "from_str": "", "kind": "", "rest": "" }
|
||||
}
|
||||
|
||||
fn is_edge_line(line: String) -> Bool {
|
||||
if str_contains(line, "->") { return true }
|
||||
if str_contains(line, "<->") { return true }
|
||||
false
|
||||
}
|
||||
|
||||
// ── Error helpers ───────────────────────────────────────────────────────────
|
||||
|
||||
fn make_error(line_no: Int, line_text: String, message: String) -> Map<String, Any> {
|
||||
{ "error": message, "line": line_no, "text": line_text }
|
||||
}
|
||||
|
||||
// ── Parse driver ────────────────────────────────────────────────────────────
|
||||
//
|
||||
// State is held in process-local k/v rather than threaded through every
|
||||
// function. Specifically:
|
||||
// "title", "direction" — graph header
|
||||
// "nodes_json", "edges_json", "groups_json" — accumulators (string lists)
|
||||
// "group_stack_depth" — "0".."N" — open groups
|
||||
// "group_stack_<i>_id" / "_label" / "_line" — frame data
|
||||
// "group_stack_<i>_node_ids" — JSON array of ids inside frame
|
||||
// "error" — non-empty if parse failed
|
||||
// "error_line", "error_text" — context
|
||||
|
||||
fn st_set_int(key: String, v: Int) -> Int { state_set(key, int_to_str(v)); 0 }
|
||||
fn st_get_int(key: String) -> Int {
|
||||
let s: String = state_get(key)
|
||||
if str_eq(s, "") { return 0 }
|
||||
str_to_int(s)
|
||||
}
|
||||
|
||||
// Encode/decode small string lists via "" delimiter (unit separator).
|
||||
fn list_encode(xs: [String]) -> String {
|
||||
let n: Int = el_list_len(xs)
|
||||
let out = ""
|
||||
let i = 0
|
||||
while i < n {
|
||||
if i > 0 { let out = out + "" }
|
||||
let out = out + get(xs, i)
|
||||
let i = i + 1
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn list_decode(s: String) -> [String] {
|
||||
if str_eq(s, "") { return el_list_empty() }
|
||||
str_split(s, "")
|
||||
}
|
||||
|
||||
fn current_group_index() -> Int {
|
||||
st_get_int("group_stack_depth") - 1
|
||||
}
|
||||
|
||||
fn group_frame_key(idx: Int, suffix: String) -> String {
|
||||
"gs_" + int_to_str(idx) + "_" + suffix
|
||||
}
|
||||
|
||||
fn open_group(id: String, label: String, line_no: Int) -> Int {
|
||||
let depth: Int = st_get_int("group_stack_depth")
|
||||
state_set(group_frame_key(depth, "id"), id)
|
||||
state_set(group_frame_key(depth, "label"), label)
|
||||
state_set(group_frame_key(depth, "line"), int_to_str(line_no))
|
||||
state_set(group_frame_key(depth, "ids"), "")
|
||||
st_set_int("group_stack_depth", depth + 1)
|
||||
0
|
||||
}
|
||||
|
||||
fn close_group_frame() -> Map<String, Any> {
|
||||
let depth: Int = st_get_int("group_stack_depth")
|
||||
if depth <= 0 {
|
||||
return { "ok": false, "id": "", "label": "", "ids": "" }
|
||||
}
|
||||
let idx: Int = depth - 1
|
||||
let id: String = state_get(group_frame_key(idx, "id"))
|
||||
let label: String = state_get(group_frame_key(idx, "label"))
|
||||
let ids: String = state_get(group_frame_key(idx, "ids"))
|
||||
state_del(group_frame_key(idx, "id"))
|
||||
state_del(group_frame_key(idx, "label"))
|
||||
state_del(group_frame_key(idx, "line"))
|
||||
state_del(group_frame_key(idx, "ids"))
|
||||
st_set_int("group_stack_depth", idx)
|
||||
{ "ok": true, "id": id, "label": label, "ids": ids }
|
||||
}
|
||||
|
||||
fn register_node_in_group(node_id: String) -> Int {
|
||||
let depth: Int = st_get_int("group_stack_depth")
|
||||
if depth <= 0 { return 0 }
|
||||
let idx: Int = depth - 1
|
||||
let key: String = group_frame_key(idx, "ids")
|
||||
let prev: String = state_get(key)
|
||||
if str_eq(prev, "") {
|
||||
state_set(key, node_id)
|
||||
} else {
|
||||
state_set(key, prev + "" + node_id)
|
||||
}
|
||||
0
|
||||
}
|
||||
|
||||
// Accumulator JSON-ish encoding for nodes/edges/groups.
|
||||
// We render each entry as a small string and stash in state under a counter.
|
||||
|
||||
fn store_node(id: String, label: String, shape: String) -> Int {
|
||||
let n: Int = st_get_int("node_count")
|
||||
state_set("node_id_" + int_to_str(n), id)
|
||||
state_set("node_label_" + int_to_str(n), label)
|
||||
state_set("node_shape_" + int_to_str(n), shape)
|
||||
st_set_int("node_count", n + 1)
|
||||
0
|
||||
}
|
||||
|
||||
fn store_edge(src: String, dst: String, label: String, kind: String) -> Int {
|
||||
let n: Int = st_get_int("edge_count")
|
||||
state_set("edge_from_" + int_to_str(n), src)
|
||||
state_set("edge_to_" + int_to_str(n), dst)
|
||||
state_set("edge_label_" + int_to_str(n), label)
|
||||
state_set("edge_kind_" + int_to_str(n), kind)
|
||||
st_set_int("edge_count", n + 1)
|
||||
0
|
||||
}
|
||||
|
||||
fn store_group(id: String, label: String, ids: String) -> Int {
|
||||
let n: Int = st_get_int("group_count")
|
||||
state_set("group_id_" + int_to_str(n), id)
|
||||
state_set("group_label_" + int_to_str(n), label)
|
||||
state_set("group_ids_" + int_to_str(n), ids)
|
||||
st_set_int("group_count", n + 1)
|
||||
0
|
||||
}
|
||||
|
||||
fn set_error(msg: String, line_no: Int, line_text: String) -> Int {
|
||||
state_set("parse_error", msg)
|
||||
st_set_int("parse_error_line", line_no)
|
||||
state_set("parse_error_text", line_text)
|
||||
0
|
||||
}
|
||||
|
||||
fn has_error() -> Bool {
|
||||
let m: String = state_get("parse_error")
|
||||
if str_eq(m, "") { return false }
|
||||
true
|
||||
}
|
||||
|
||||
// Reset state at the start of each parse pass.
|
||||
fn reset_state() -> Int {
|
||||
state_set("graph_title", "")
|
||||
state_set("graph_direction", "top-down")
|
||||
st_set_int("node_count", 0)
|
||||
st_set_int("edge_count", 0)
|
||||
st_set_int("group_count", 0)
|
||||
st_set_int("group_stack_depth", 0)
|
||||
state_set("parse_error", "")
|
||||
st_set_int("parse_error_line", 0)
|
||||
state_set("parse_error_text", "")
|
||||
0
|
||||
}
|
||||
|
||||
// ── Statement-level parsing ─────────────────────────────────────────────────
|
||||
|
||||
fn parse_node_stmt(line_no: Int, line: String) -> Int {
|
||||
let id_split: Map<String, Any> = split_identifier(line)
|
||||
let raw_id: String = id_split["id"]
|
||||
if str_eq(raw_id, "") {
|
||||
set_error("expected node id, edge, or keyword", line_no, line)
|
||||
return 0
|
||||
}
|
||||
let id: String = sanitize_id(raw_id)
|
||||
let rest: String = str_trim(id_split["rest"])
|
||||
|
||||
// Optional shape: [token]
|
||||
let shape = "rect"
|
||||
let after_shape = rest
|
||||
if str_len(rest) > 0 {
|
||||
let lead: String = str_char_at(rest, 0)
|
||||
if lead == "[" {
|
||||
let close: Int = str_index_of(rest, "]")
|
||||
if close < 0 {
|
||||
set_error("unclosed `[` in shape token", line_no, line)
|
||||
return 0
|
||||
}
|
||||
let token: String = str_slice(rest, 1, close)
|
||||
let parsed_shape: String = shape_from_token(token)
|
||||
if str_eq(parsed_shape, "") {
|
||||
set_error("unknown shape `" + token + "`", line_no, line)
|
||||
return 0
|
||||
}
|
||||
let shape = parsed_shape
|
||||
let after_shape = str_trim(str_slice(rest, close + 1, str_len(rest)))
|
||||
}
|
||||
}
|
||||
|
||||
// Optional quoted label.
|
||||
let quoted: Map<String, Any> = parse_quoted(after_shape)
|
||||
let label = raw_id
|
||||
let ok: Bool = quoted["ok"]
|
||||
if ok {
|
||||
let label = quoted["value"]
|
||||
}
|
||||
|
||||
store_node(id, label, shape)
|
||||
register_node_in_group(id)
|
||||
1
|
||||
}
|
||||
|
||||
fn parse_edge_stmt(line_no: Int, line: String) -> Int {
|
||||
let parts: Map<String, Any> = extract_edge_parts(line)
|
||||
let ok: Bool = parts["ok"]
|
||||
if !ok {
|
||||
set_error("malformed edge — expected `->` `-->` `<->` or `-/->`", line_no, line)
|
||||
return 0
|
||||
}
|
||||
let from_str: String = parts["from_str"]
|
||||
let rest_str: String = parts["rest"]
|
||||
let kind: String = parts["kind"]
|
||||
|
||||
let src: String = sanitize_id(str_trim(from_str))
|
||||
let rest_t: String = str_trim(rest_str)
|
||||
|
||||
let id_split: Map<String, Any> = split_identifier(rest_t)
|
||||
let to_raw: String = id_split["id"]
|
||||
if str_eq(to_raw, "") {
|
||||
set_error("edge missing target node id", line_no, line)
|
||||
return 0
|
||||
}
|
||||
let dst: String = sanitize_id(to_raw)
|
||||
|
||||
let label_rest: String = str_trim(id_split["rest"])
|
||||
let quoted: Map<String, Any> = parse_quoted(label_rest)
|
||||
let label = ""
|
||||
let qok: Bool = quoted["ok"]
|
||||
if qok {
|
||||
let label = quoted["value"]
|
||||
}
|
||||
store_edge(src, dst, label, kind)
|
||||
1
|
||||
}
|
||||
|
||||
fn parse_group_open(line_no: Int, line: String, rest: String) -> Int {
|
||||
// Strip trailing `{`.
|
||||
let trimmed: String = str_trim(rest)
|
||||
let n: Int = str_len(trimmed)
|
||||
let body = trimmed
|
||||
if n > 0 {
|
||||
let last: String = str_char_at(trimmed, n - 1)
|
||||
if last == "{" {
|
||||
let body = str_trim(str_slice(trimmed, 0, n - 1))
|
||||
}
|
||||
}
|
||||
|
||||
let id_split: Map<String, Any> = split_identifier(body)
|
||||
let raw_id: String = id_split["id"]
|
||||
if str_eq(raw_id, "") {
|
||||
set_error("group declaration missing id", line_no, line)
|
||||
return 0
|
||||
}
|
||||
let label_rest: String = str_trim(id_split["rest"])
|
||||
let quoted: Map<String, Any> = parse_quoted(label_rest)
|
||||
let label = raw_id
|
||||
let qok: Bool = quoted["ok"]
|
||||
if qok {
|
||||
let label = quoted["value"]
|
||||
}
|
||||
open_group(raw_id, label, line_no)
|
||||
1
|
||||
}
|
||||
|
||||
fn parse_close_brace(line_no: Int) -> Int {
|
||||
let frame: Map<String, Any> = close_group_frame()
|
||||
let frame_ok: Bool = frame["ok"]
|
||||
if !frame_ok {
|
||||
set_error("unexpected `}` — no open group", line_no, "}")
|
||||
return 0
|
||||
}
|
||||
store_group(frame["id"], frame["label"], frame["ids"])
|
||||
1
|
||||
}
|
||||
|
||||
fn parse_line_dispatch(line_no: Int, line: String) -> Int {
|
||||
if line == "}" { return parse_close_brace(line_no) }
|
||||
|
||||
if str_starts_with(line, "title:") {
|
||||
let after: String = str_trim(str_slice(line, 6, str_len(line)))
|
||||
let q: Map<String, Any> = parse_quoted(after)
|
||||
let qok: Bool = q["ok"]
|
||||
if !qok {
|
||||
set_error("expected quoted string after `title:`", line_no, line)
|
||||
return 0
|
||||
}
|
||||
state_set("graph_title", q["value"])
|
||||
return 1
|
||||
}
|
||||
|
||||
if str_starts_with(line, "direction:") {
|
||||
let after: String = str_trim(str_slice(line, 10, str_len(line)))
|
||||
let dir: String = parse_direction(after)
|
||||
if str_eq(dir, "") {
|
||||
set_error("unknown direction — expected top-down, left-right, right-left, or bottom-up",
|
||||
line_no, line)
|
||||
return 0
|
||||
}
|
||||
state_set("graph_direction", dir)
|
||||
return 1
|
||||
}
|
||||
|
||||
if str_starts_with(line, "group ") {
|
||||
let after: String = str_slice(line, 6, str_len(line))
|
||||
return parse_group_open(line_no, line, after)
|
||||
}
|
||||
|
||||
if is_edge_line(line) {
|
||||
return parse_edge_stmt(line_no, line)
|
||||
}
|
||||
|
||||
parse_node_stmt(line_no, line)
|
||||
}
|
||||
|
||||
// ── Materialise accumulators into the final graph map ───────────────────────
|
||||
|
||||
fn build_graph_value() -> Map<String, Any> {
|
||||
let n_nodes: Int = st_get_int("node_count")
|
||||
let nodes: [Map<String, Any>] = el_list_empty()
|
||||
let i = 0
|
||||
while i < n_nodes {
|
||||
let s: String = int_to_str(i)
|
||||
let node: Map<String, Any> = {
|
||||
"id": state_get("node_id_" + s),
|
||||
"label": state_get("node_label_" + s),
|
||||
"shape": state_get("node_shape_" + s)
|
||||
}
|
||||
let nodes = native_list_append(nodes, node)
|
||||
let i = i + 1
|
||||
}
|
||||
|
||||
let n_edges: Int = st_get_int("edge_count")
|
||||
let edges: [Map<String, Any>] = el_list_empty()
|
||||
let i = 0
|
||||
while i < n_edges {
|
||||
let s: String = int_to_str(i)
|
||||
let edge: Map<String, Any> = {
|
||||
"from": state_get("edge_from_" + s),
|
||||
"to": state_get("edge_to_" + s),
|
||||
"label": state_get("edge_label_" + s),
|
||||
"kind": state_get("edge_kind_" + s)
|
||||
}
|
||||
let edges = native_list_append(edges, edge)
|
||||
let i = i + 1
|
||||
}
|
||||
|
||||
let n_groups: Int = st_get_int("group_count")
|
||||
let groups: [Map<String, Any>] = el_list_empty()
|
||||
let i = 0
|
||||
while i < n_groups {
|
||||
let s: String = int_to_str(i)
|
||||
let raw_ids: String = state_get("group_ids_" + s)
|
||||
let id_list: [String] = list_decode(raw_ids)
|
||||
let group: Map<String, Any> = {
|
||||
"id": state_get("group_id_" + s),
|
||||
"label": state_get("group_label_" + s),
|
||||
"node_ids": id_list,
|
||||
"direction": ""
|
||||
}
|
||||
let groups = native_list_append(groups, group)
|
||||
let i = i + 1
|
||||
}
|
||||
|
||||
{
|
||||
"title": state_get("graph_title"),
|
||||
"direction": state_get("graph_direction"),
|
||||
"nodes": nodes,
|
||||
"edges": edges,
|
||||
"groups": groups
|
||||
}
|
||||
}
|
||||
|
||||
// ── Public entry point ──────────────────────────────────────────────────────
|
||||
|
||||
fn arbor_parse(source: String) -> Map<String, Any> {
|
||||
reset_state()
|
||||
let lines: [Map<String, Any>] = preprocess(source)
|
||||
let n: Int = el_list_len(lines)
|
||||
let i = 0
|
||||
let abort = false
|
||||
while i < n {
|
||||
if abort {
|
||||
// skip — error already recorded
|
||||
} else {
|
||||
let row: Map<String, Any> = get(lines, i)
|
||||
let line_no: Int = row["no"]
|
||||
let text: String = row["text"]
|
||||
parse_line_dispatch(line_no, text)
|
||||
if has_error() {
|
||||
let abort = true
|
||||
}
|
||||
}
|
||||
let i = i + 1
|
||||
}
|
||||
if !has_error() {
|
||||
let depth: Int = st_get_int("group_stack_depth")
|
||||
if depth > 0 {
|
||||
let idx: Int = depth - 1
|
||||
let id: String = state_get(group_frame_key(idx, "id"))
|
||||
let line_no: Int = st_get_int(group_frame_key(idx, "line"))
|
||||
set_error("unclosed group '" + id + "' — missing closing `}`",
|
||||
line_no, "group " + id)
|
||||
}
|
||||
}
|
||||
if has_error() {
|
||||
return {
|
||||
"error": state_get("parse_error"),
|
||||
"line": st_get_int("parse_error_line"),
|
||||
"text": state_get("parse_error_text")
|
||||
}
|
||||
}
|
||||
build_graph_value()
|
||||
}
|
||||
|
||||
// ── Smoke test ──────────────────────────────────────────────────────────────
|
||||
|
||||
fn fail_msg(label: String, got: String, want: String) -> Int {
|
||||
println("FAIL " + label + " got=[" + got + "] want=[" + want + "]")
|
||||
state_set("smoke_failures", "1")
|
||||
0
|
||||
}
|
||||
|
||||
fn check_eq(label: String, got: String, want: String) -> Int {
|
||||
if got == want {
|
||||
println("ok " + label)
|
||||
return 1
|
||||
}
|
||||
fail_msg(label, got, want)
|
||||
}
|
||||
|
||||
// Helper: a graph map is in the error state iff it has a non-empty "error".
|
||||
fn parse_failed(g: Map<String, Any>) -> Bool {
|
||||
let m: String = g["error"]
|
||||
if str_eq(m, "") { return false }
|
||||
// map_get returns NULL for missing keys; str_eq treats two NULLs as equal
|
||||
// and NULL vs "" as not equal — guard explicitly.
|
||||
if str_len(m) == 0 { return false }
|
||||
true
|
||||
}
|
||||
|
||||
let src1 = "title: \"Test\"\ndirection: left-right\n\napi [rounded] \"REST API\"\ndb [cylinder] \"Postgres\"\n\napi -> db \"reads\""
|
||||
let g1: Map<String, Any> = arbor_parse(src1)
|
||||
if parse_failed(g1) {
|
||||
println("FAIL parse 1: " + g1["error"])
|
||||
state_set("smoke_failures", "1")
|
||||
}
|
||||
check_eq("title parsed", g1["title"], "Test")
|
||||
check_eq("direction parsed", g1["direction"], "left-right")
|
||||
let nodes1: [Map<String, Any>] = g1["nodes"]
|
||||
let nn1: Int = el_list_len(nodes1)
|
||||
check_eq("two nodes", int_to_str(nn1), "2")
|
||||
let edges1: [Map<String, Any>] = g1["edges"]
|
||||
let ne1: Int = el_list_len(edges1)
|
||||
check_eq("one edge", int_to_str(ne1), "1")
|
||||
let e0: Map<String, Any> = get(edges1, 0)
|
||||
check_eq("edge from", e0["from"], "api")
|
||||
check_eq("edge to", e0["to"], "db")
|
||||
check_eq("edge label", e0["label"], "reads")
|
||||
check_eq("edge kind", e0["kind"], "solid")
|
||||
let n0: Map<String, Any> = get(nodes1, 0)
|
||||
check_eq("node 0 shape", n0["shape"], "rounded")
|
||||
check_eq("node 0 label", n0["label"], "REST API")
|
||||
|
||||
// Test edge varieties
|
||||
let src2 = "a \"A\"\nb \"B\"\na -> b\na --> b\na -/-> b\na <-> b"
|
||||
let g2: Map<String, Any> = arbor_parse(src2)
|
||||
let edges2: [Map<String, Any>] = g2["edges"]
|
||||
check_eq("4 edges parsed", int_to_str(el_list_len(edges2)), "4")
|
||||
let kinds = ""
|
||||
let i = 0
|
||||
while i < el_list_len(edges2) {
|
||||
let e: Map<String, Any> = get(edges2, i)
|
||||
let k: String = e["kind"]
|
||||
let kinds = kinds + k + ","
|
||||
let i = i + 1
|
||||
}
|
||||
check_eq("edge kinds", kinds, "solid,dashed,forbidden,bidirectional,")
|
||||
|
||||
// Groups
|
||||
let src3 = "group core \"Application Core\" {\n api [rounded] \"REST API\"\n svc \"Business Logic\"\n}\nstandalone \"Out\""
|
||||
let g3: Map<String, Any> = arbor_parse(src3)
|
||||
let groups3: [Map<String, Any>] = g3["groups"]
|
||||
check_eq("one group", int_to_str(el_list_len(groups3)), "1")
|
||||
let grp0: Map<String, Any> = get(groups3, 0)
|
||||
check_eq("group label", grp0["label"], "Application Core")
|
||||
let gnids: [String] = grp0["node_ids"]
|
||||
check_eq("group has 2 members", int_to_str(el_list_len(gnids)), "2")
|
||||
let nodes3: [Map<String, Any>] = g3["nodes"]
|
||||
check_eq("3 total nodes (incl standalone)",
|
||||
int_to_str(el_list_len(nodes3)), "3")
|
||||
|
||||
// Error: unknown shape
|
||||
let src4 = "node [hexagon] \"X\""
|
||||
let g4: Map<String, Any> = arbor_parse(src4)
|
||||
let err4: String = g4["error"]
|
||||
if str_eq(err4, "") {
|
||||
println("FAIL expected error for unknown shape")
|
||||
state_set("smoke_failures", "1")
|
||||
} else {
|
||||
if str_contains(err4, "hexagon") {
|
||||
println("ok error mentions hexagon: " + err4)
|
||||
} else {
|
||||
println("FAIL error wording: " + err4)
|
||||
state_set("smoke_failures", "1")
|
||||
}
|
||||
}
|
||||
|
||||
// Error: unclosed group
|
||||
let src5 = "group g \"G\" {\n a \"A\"\n"
|
||||
let g5: Map<String, Any> = arbor_parse(src5)
|
||||
let err5: String = g5["error"]
|
||||
if str_eq(err5, "") {
|
||||
println("FAIL expected unclosed-group error")
|
||||
state_set("smoke_failures", "1")
|
||||
} else {
|
||||
if str_contains(err5, "unclosed") {
|
||||
println("ok unclosed group detected")
|
||||
} else {
|
||||
println("FAIL unclosed error wording: " + err5)
|
||||
state_set("smoke_failures", "1")
|
||||
}
|
||||
}
|
||||
|
||||
// Comments and inline comments
|
||||
let src6 = "// header\na \"A\" // trailing\nb \"B\""
|
||||
let g6: Map<String, Any> = arbor_parse(src6)
|
||||
check_eq("comments stripped", int_to_str(el_list_len(g6["nodes"])), "2")
|
||||
|
||||
// Empty input
|
||||
let g7: Map<String, Any> = arbor_parse("")
|
||||
check_eq("empty graph nodes", int_to_str(el_list_len(g7["nodes"])), "0")
|
||||
check_eq("empty graph default direction", g7["direction"], "top-down")
|
||||
|
||||
println("")
|
||||
let f: String = state_get("smoke_failures")
|
||||
if str_eq(f, "1") {
|
||||
println("arbor-parse: FAILED")
|
||||
exit_program(1)
|
||||
} else {
|
||||
println("arbor-parse: ok")
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
// arbor-render — SVG renderer. Consumes a diagram graph + layout result and
|
||||
// emits an SVG document. PNG rasterization is not provided in this vessel
|
||||
// because the El runtime does not expose a vector-to-raster primitive yet
|
||||
// (see report).
|
||||
|
||||
vessel "arbor-render" {
|
||||
version "0.1.0"
|
||||
description "SVG renderer for Arbor diagrams"
|
||||
authors ["Neuron Technologies"]
|
||||
edition "2026"
|
||||
}
|
||||
|
||||
dependencies {
|
||||
arbor-core "0.1"
|
||||
arbor-layout "0.1"
|
||||
}
|
||||
|
||||
build {
|
||||
entry "src/main.el"
|
||||
output "dist/"
|
||||
}
|
||||
@@ -1,575 +0,0 @@
|
||||
// arbor-render — SVG emission from a laid-out diagram.
|
||||
//
|
||||
// Entry point:
|
||||
// fn arbor_render_svg(graph: Map, layout: Map, forbidden: [String]) -> String
|
||||
//
|
||||
// The graph is the lowered (diagram-form) shape produced by arbor-core /
|
||||
// arbor-diagram (`title`, `direction`, `nodes`, `edges`, `groups`). The
|
||||
// layout is whatever arbor-layout returned: `node_pos_<id>`, `node_size_<id>`,
|
||||
// `group_bounds_<id>`, `node_ids`, `group_ids`, `canvas`.
|
||||
//
|
||||
// `forbidden` is a list of "from->to" key strings — same format as
|
||||
// arbor-core's collect_forbidden(). The Rust crate threaded a HashSet
|
||||
// through; El threads a list and we linear-scan.
|
||||
//
|
||||
// SVG is text emission — straightforward El. Every float coordinate is
|
||||
// passed through format_float(_, 1) for stable output.
|
||||
//
|
||||
// ── PNG render is intentionally out of scope ────────────────────────────────
|
||||
// The Rust crate rasterises via resvg → tiny_skia → png. The El runtime
|
||||
// today exposes no equivalent: there is no resvg, no usvg, no font rasterer,
|
||||
// no PNG encoder, no path-fill code. fs_write writes text only — there is
|
||||
// no binary write primitive. arbor_render_png() returns an error map in El
|
||||
// until the runtime grows a rasterer (see "runtime gaps" in the report).
|
||||
|
||||
// ── Colour palette (matches the Rust constants exactly) ────────────────────
|
||||
|
||||
fn col_node_fill() -> String { "#ffffff" }
|
||||
fn col_node_stroke() -> String { "#334155" }
|
||||
fn col_primary_fill() -> String { "#0052A0" }
|
||||
fn col_primary_text() -> String { "#ffffff" }
|
||||
fn col_node_text() -> String { "#0D0D14" }
|
||||
fn col_edge() -> String { "#64748B" }
|
||||
fn col_edge_forbidden() -> String { "#DC2626" }
|
||||
fn col_group_fill() -> String { "rgba(0,0,0,0.03)" }
|
||||
fn col_group_stroke() -> String { "#CBD5E1" }
|
||||
fn col_group_text() -> String { "#64748B" }
|
||||
fn col_edge_label() -> String { "#64748B" }
|
||||
|
||||
// ── XML escape ─────────────────────────────────────────────────────────────
|
||||
|
||||
fn esc(s: String) -> String {
|
||||
let r1: String = str_replace(s, "&", "&")
|
||||
let r2: String = str_replace(r1, "<", "<")
|
||||
let r3: String = str_replace(r2, ">", ">")
|
||||
let r4: String = str_replace(r3, "\"", """)
|
||||
r4
|
||||
}
|
||||
|
||||
// Float to "%.1f" — the Rust pt() helper.
|
||||
fn pt(v: el_val_t) -> String {
|
||||
format_float(v, 1)
|
||||
}
|
||||
|
||||
// Float arithmetic helpers — float_to_int / int_to_float trip through Int,
|
||||
// which is exact for the integer-valued floats used by the layout pass.
|
||||
fn fadd(a: el_val_t, b: el_val_t) -> el_val_t {
|
||||
let ai: Int = float_to_int(a)
|
||||
let bi: Int = float_to_int(b)
|
||||
int_to_float(ai + bi)
|
||||
}
|
||||
|
||||
fn fsub(a: el_val_t, b: el_val_t) -> el_val_t {
|
||||
let ai: Int = float_to_int(a)
|
||||
let bi: Int = float_to_int(b)
|
||||
int_to_float(ai - bi)
|
||||
}
|
||||
|
||||
fn fdiv2(a: el_val_t) -> el_val_t {
|
||||
let ai: Int = float_to_int(a)
|
||||
int_to_float(ai / 2)
|
||||
}
|
||||
|
||||
fn fmid(a: el_val_t, b: el_val_t) -> el_val_t {
|
||||
fdiv2(fadd(a, b))
|
||||
}
|
||||
|
||||
// ── forbidden-edge linear lookup ───────────────────────────────────────────
|
||||
|
||||
fn forbidden_key(from: String, to: String) -> String {
|
||||
from + "->" + to
|
||||
}
|
||||
|
||||
fn forbidden_contains(set: [String], src: String, dst: String) -> Bool {
|
||||
let key: String = forbidden_key(src, dst)
|
||||
let n: Int = el_list_len(set)
|
||||
let i = 0
|
||||
while i < n {
|
||||
let s: String = get(set, i)
|
||||
if str_eq(s, key) { return true }
|
||||
let i = i + 1
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
// ── Arrow marker defs ──────────────────────────────────────────────────────
|
||||
|
||||
fn arrow_defs() -> String {
|
||||
let s = "\n <marker id=\"ah\" markerWidth=\"10\" markerHeight=\"7\" refX=\"9\" refY=\"3.5\" orient=\"auto\">\n"
|
||||
let s = s + " <polygon points=\"0 0, 10 3.5, 0 7\" fill=\"" + col_edge() + "\"/>\n"
|
||||
let s = s + " </marker>\n"
|
||||
let s = s + " <marker id=\"ah-bi\" markerWidth=\"10\" markerHeight=\"7\" refX=\"1\" refY=\"3.5\" orient=\"auto-start-reverse\">\n"
|
||||
let s = s + " <polygon points=\"0 0, 10 3.5, 0 7\" fill=\"" + col_edge() + "\"/>\n"
|
||||
let s = s + " </marker>\n"
|
||||
let s = s + " <marker id=\"ah-red\" markerWidth=\"10\" markerHeight=\"7\" refX=\"9\" refY=\"3.5\" orient=\"auto\">\n"
|
||||
let s = s + " <polygon points=\"0 0, 10 3.5, 0 7\" fill=\"" + col_edge_forbidden() + "\"/>\n"
|
||||
let s = s + " </marker>"
|
||||
s
|
||||
}
|
||||
|
||||
// ── Node rendering ─────────────────────────────────────────────────────────
|
||||
|
||||
fn render_node(buf: String, node: Map<String, Any>, layout: Map<String, Any>) -> String {
|
||||
let nid: String = node["id"]
|
||||
let pos: Map<String, Any> = el_map_get(layout, "node_pos_" + nid)
|
||||
let sz: Map<String, Any> = el_map_get(layout, "node_size_" + nid)
|
||||
|
||||
let cx: el_val_t = pos["x"]
|
||||
let cy: el_val_t = pos["y"]
|
||||
let w: el_val_t = sz["w"]
|
||||
let h: el_val_t = sz["h"]
|
||||
|
||||
let x: el_val_t = fsub(cx, fdiv2(w))
|
||||
let y: el_val_t = fsub(cy, fdiv2(h))
|
||||
|
||||
let fill_in: String = node["style_fill"]
|
||||
let stroke_in: String = node["style_stroke"]
|
||||
let color_in: String = node["style_color"]
|
||||
let fill = col_node_fill()
|
||||
if str_len(fill_in) > 0 { let fill = fill_in }
|
||||
let stroke = col_node_stroke()
|
||||
if str_len(stroke_in) > 0 { let stroke = stroke_in }
|
||||
let text_col = col_node_text()
|
||||
if str_len(color_in) > 0 { let text_col = color_in }
|
||||
|
||||
let shape: String = node["shape"]
|
||||
let buf = buf
|
||||
|
||||
if str_eq(shape, "rectangle") {
|
||||
let buf = buf + " <rect x=\"" + pt(x) + "\" y=\"" + pt(y)
|
||||
let buf = buf + "\" width=\"" + pt(w) + "\" height=\"" + pt(h)
|
||||
let buf = buf + "\" rx=\"4\" fill=\"" + fill + "\" stroke=\"" + stroke
|
||||
let buf = buf + "\" stroke-width=\"1.5\"/>\n"
|
||||
}
|
||||
if str_eq(shape, "rounded_rect") {
|
||||
let buf = buf + " <rect x=\"" + pt(x) + "\" y=\"" + pt(y)
|
||||
let buf = buf + "\" width=\"" + pt(w) + "\" height=\"" + pt(h)
|
||||
let buf = buf + "\" rx=\"20\" fill=\"" + fill + "\" stroke=\"" + stroke
|
||||
let buf = buf + "\" stroke-width=\"1.5\"/>\n"
|
||||
}
|
||||
if str_eq(shape, "stadium") {
|
||||
let buf = buf + " <rect x=\"" + pt(x) + "\" y=\"" + pt(y)
|
||||
let buf = buf + "\" width=\"" + pt(w) + "\" height=\"" + pt(h)
|
||||
let buf = buf + "\" rx=\"" + pt(fdiv2(h)) + "\" fill=\"" + fill
|
||||
let buf = buf + "\" stroke=\"" + stroke + "\" stroke-width=\"1.5\"/>\n"
|
||||
}
|
||||
if str_eq(shape, "cylinder") {
|
||||
// body: rect from y+ry to bottom; ry ≈ h/6 (Rust uses h*0.18, we use h/6
|
||||
// to stay in integer arithmetic — visually indistinguishable on the
|
||||
// canvas sizes the layout produces).
|
||||
let hi: Int = float_to_int(h)
|
||||
let ry: el_val_t = int_to_float(hi / 6)
|
||||
let body_y: el_val_t = fadd(y, ry)
|
||||
let body_h: el_val_t = fsub(h, ry)
|
||||
let buf = buf + " <rect x=\"" + pt(x) + "\" y=\"" + pt(body_y)
|
||||
let buf = buf + "\" width=\"" + pt(w) + "\" height=\"" + pt(body_h)
|
||||
let buf = buf + "\" fill=\"" + fill + "\" stroke=\"" + stroke + "\" stroke-width=\"1.5\"/>\n"
|
||||
// top ellipse
|
||||
let buf = buf + " <ellipse cx=\"" + pt(cx) + "\" cy=\"" + pt(body_y)
|
||||
let buf = buf + "\" rx=\"" + pt(fdiv2(w)) + "\" ry=\"" + pt(ry)
|
||||
let buf = buf + "\" fill=\"" + fill + "\" stroke=\"" + stroke + "\" stroke-width=\"1.5\"/>\n"
|
||||
// bottom ellipse
|
||||
let bot_y: el_val_t = fadd(y, h)
|
||||
let buf = buf + " <ellipse cx=\"" + pt(cx) + "\" cy=\"" + pt(bot_y)
|
||||
let buf = buf + "\" rx=\"" + pt(fdiv2(w)) + "\" ry=\"" + pt(ry)
|
||||
let buf = buf + "\" fill=\"" + fill + "\" stroke=\"" + stroke + "\" stroke-width=\"1.5\"/>\n"
|
||||
}
|
||||
if str_eq(shape, "diamond") {
|
||||
let hw: el_val_t = fdiv2(w)
|
||||
let hh: el_val_t = fdiv2(h)
|
||||
let buf = buf + " <polygon points=\""
|
||||
let buf = buf + pt(cx) + "," + pt(fsub(cy, hh)) + " "
|
||||
let buf = buf + pt(fadd(cx, hw)) + "," + pt(cy) + " "
|
||||
let buf = buf + pt(cx) + "," + pt(fadd(cy, hh)) + " "
|
||||
let buf = buf + pt(fsub(cx, hw)) + "," + pt(cy)
|
||||
let buf = buf + "\" fill=\"" + fill + "\" stroke=\"" + stroke + "\" stroke-width=\"1.5\"/>\n"
|
||||
}
|
||||
|
||||
// Label.
|
||||
let label: String = node["label"]
|
||||
let buf = buf + " <text x=\"" + pt(cx) + "\" y=\"" + pt(cy)
|
||||
let buf = buf + "\" text-anchor=\"middle\" dominant-baseline=\"middle\""
|
||||
let buf = buf + " class=\"arbor-node-label\" fill=\"" + text_col + "\">"
|
||||
let buf = buf + esc(label) + "</text>\n"
|
||||
|
||||
// Sublabel — Rust's DiagramNode stores Option<String>; El uses "" sentinel.
|
||||
let sub: String = node["sublabel"]
|
||||
if str_len(sub) > 0 {
|
||||
let sub_y: el_val_t = fadd(cy, int_to_float(14))
|
||||
let buf = buf + " <text x=\"" + pt(cx) + "\" y=\"" + pt(sub_y)
|
||||
let buf = buf + "\" text-anchor=\"middle\" dominant-baseline=\"middle\""
|
||||
let buf = buf + " class=\"arbor-node-label\" fill=\"" + text_col + "\" font-size=\"10\">"
|
||||
let buf = buf + esc(sub) + "</text>\n"
|
||||
}
|
||||
buf
|
||||
}
|
||||
|
||||
// ── Edge rendering ─────────────────────────────────────────────────────────
|
||||
//
|
||||
// We emit a straight line from one node centre to the other and let the
|
||||
// browser draw it; the Rust crate renders cubic bezier paths but the runtime
|
||||
// has no robust math layer, and the rectangles are large enough that
|
||||
// straight edges read clearly. (See "runtime gaps".)
|
||||
|
||||
fn render_edge(buf: String, edge: Map<String, Any>, layout: Map<String, Any>, forbidden: [String]) -> String {
|
||||
let from_id: String = edge["from"]
|
||||
let to_id: String = edge["to"]
|
||||
let from_pos: Map<String, Any> = el_map_get(layout, "node_pos_" + from_id)
|
||||
let to_pos: Map<String, Any> = el_map_get(layout, "node_pos_" + to_id)
|
||||
|
||||
let fx: el_val_t = from_pos["x"]
|
||||
let fy: el_val_t = from_pos["y"]
|
||||
let tx: el_val_t = to_pos["x"]
|
||||
let ty: el_val_t = to_pos["y"]
|
||||
|
||||
let is_forbidden: Bool = forbidden_contains(forbidden, from_id, to_id)
|
||||
let stroke = col_edge()
|
||||
if is_forbidden { let stroke = col_edge_forbidden() }
|
||||
|
||||
let line: String = edge["line"]
|
||||
let arrow: String = edge["arrow"]
|
||||
let dash_attr = ""
|
||||
if str_eq(line, "dashed") { let dash_attr = " stroke-dasharray=\"5,3\"" }
|
||||
if str_eq(line, "dotted") { let dash_attr = " stroke-dasharray=\"2,2\"" }
|
||||
|
||||
let marker_start = ""
|
||||
if str_eq(arrow, "both") { let marker_start = " marker-start=\"url(#ah-bi)\"" }
|
||||
if str_eq(arrow, "backward") { let marker_start = " marker-start=\"url(#ah-bi)\"" }
|
||||
|
||||
let marker_end = " marker-end=\"url(#ah)\""
|
||||
if is_forbidden { let marker_end = " marker-end=\"url(#ah-red)\"" }
|
||||
if str_eq(arrow, "none") { let marker_end = "" }
|
||||
if str_eq(arrow, "backward") { let marker_end = "" }
|
||||
|
||||
let buf = buf + " <line x1=\"" + pt(fx) + "\" y1=\"" + pt(fy)
|
||||
let buf = buf + "\" x2=\"" + pt(tx) + "\" y2=\"" + pt(ty)
|
||||
let buf = buf + "\" stroke=\"" + stroke + "\" stroke-width=\"1.5\""
|
||||
let buf = buf + dash_attr + marker_start + marker_end + "/>\n"
|
||||
|
||||
// Forbidden marker — circle-X at midpoint.
|
||||
if is_forbidden {
|
||||
let mx: el_val_t = fmid(fx, tx)
|
||||
let my: el_val_t = fmid(fy, ty)
|
||||
let r: el_val_t = int_to_float(7)
|
||||
let buf = buf + " <circle cx=\"" + pt(mx) + "\" cy=\"" + pt(my)
|
||||
let buf = buf + "\" r=\"" + pt(r) + "\" fill=\"white\" stroke=\""
|
||||
let buf = buf + col_edge_forbidden() + "\" stroke-width=\"1.5\"/>\n"
|
||||
let off: el_val_t = int_to_float(4)
|
||||
let buf = buf + " <line x1=\"" + pt(fsub(mx, off)) + "\" y1=\"" + pt(fsub(my, off))
|
||||
let buf = buf + "\" x2=\"" + pt(fadd(mx, off)) + "\" y2=\"" + pt(fadd(my, off))
|
||||
let buf = buf + "\" stroke=\"" + col_edge_forbidden() + "\" stroke-width=\"1.5\"/>\n"
|
||||
let buf = buf + " <line x1=\"" + pt(fadd(mx, off)) + "\" y1=\"" + pt(fsub(my, off))
|
||||
let buf = buf + "\" x2=\"" + pt(fsub(mx, off)) + "\" y2=\"" + pt(fadd(my, off))
|
||||
let buf = buf + "\" stroke=\"" + col_edge_forbidden() + "\" stroke-width=\"1.5\"/>\n"
|
||||
}
|
||||
|
||||
// Edge label
|
||||
let label: String = edge["label"]
|
||||
if str_len(label) > 0 {
|
||||
let mx: el_val_t = fmid(fx, tx)
|
||||
let my: el_val_t = fmid(fy, ty)
|
||||
let lw: el_val_t = int_to_float(str_len(label) * 7 + 8)
|
||||
let lh: el_val_t = int_to_float(16)
|
||||
let buf = buf + " <rect x=\"" + pt(fsub(mx, fdiv2(lw))) + "\" y=\"" + pt(fsub(my, fdiv2(lh)))
|
||||
let buf = buf + "\" width=\"" + pt(lw) + "\" height=\"" + pt(lh)
|
||||
let buf = buf + "\" rx=\"3\" fill=\"white\" opacity=\"0.85\"/>\n"
|
||||
let buf = buf + " <text x=\"" + pt(mx) + "\" y=\"" + pt(my)
|
||||
let buf = buf + "\" text-anchor=\"middle\" dominant-baseline=\"middle\""
|
||||
let buf = buf + " class=\"arbor-edge-label\">" + esc(label) + "</text>\n"
|
||||
}
|
||||
buf
|
||||
}
|
||||
|
||||
// ── Group rendering ────────────────────────────────────────────────────────
|
||||
|
||||
fn render_group(buf: String, group: Map<String, Any>, layout: Map<String, Any>) -> String {
|
||||
let gid: String = group["id"]
|
||||
let bounds: Map<String, Any> = el_map_get(layout, "group_bounds_" + gid)
|
||||
// Layout may not have bounds for empty groups — defensive.
|
||||
let bx_check: el_val_t = bounds["x"]
|
||||
if float_to_int(bx_check) == 0 {
|
||||
// Could be a real 0; cheaper to skip via presence check on group_ids.
|
||||
}
|
||||
let bx: el_val_t = bounds["x"]
|
||||
let by: el_val_t = bounds["y"]
|
||||
let bw: el_val_t = bounds["w"]
|
||||
let bh: el_val_t = bounds["h"]
|
||||
let buf = buf + " <rect x=\"" + pt(bx) + "\" y=\"" + pt(by)
|
||||
let buf = buf + "\" width=\"" + pt(bw) + "\" height=\"" + pt(bh)
|
||||
let buf = buf + "\" rx=\"8\" fill=\"" + col_group_fill() + "\" stroke=\""
|
||||
let buf = buf + col_group_stroke() + "\" stroke-width=\"1\" stroke-dasharray=\"4,3\"/>\n"
|
||||
|
||||
// Group label in the top-left corner.
|
||||
let lx: el_val_t = fadd(bx, int_to_float(8))
|
||||
let ly: el_val_t = fadd(by, int_to_float(14))
|
||||
let label: String = group["label"]
|
||||
let buf = buf + " <text x=\"" + pt(lx) + "\" y=\"" + pt(ly)
|
||||
let buf = buf + "\" class=\"arbor-group-label\">" + esc(label) + "</text>\n"
|
||||
buf
|
||||
}
|
||||
|
||||
// ── Public entry point ─────────────────────────────────────────────────────
|
||||
|
||||
fn arbor_render_svg(graph: Map<String, Any>, layout: Map<String, Any>, forbidden: [String]) -> String {
|
||||
let canvas: Map<String, Any> = el_map_get(layout, "canvas")
|
||||
let cw: el_val_t = canvas["w"]
|
||||
let ch: el_val_t = canvas["h"]
|
||||
|
||||
let buf = "<svg xmlns=\"http://www.w3.org/2000/svg\" width=\"" + pt(cw)
|
||||
let buf = buf + "\" height=\"" + pt(ch) + "\" viewBox=\"0 0 " + pt(cw) + " " + pt(ch) + "\">\n"
|
||||
let buf = buf + " <defs>"
|
||||
let buf = buf + arrow_defs()
|
||||
let buf = buf + "\n <style>\n"
|
||||
let buf = buf + " .arbor-node-label { font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 13px; }\n"
|
||||
let buf = buf + " .arbor-group-label { font-family: 'Helvetica Neue', Helvetica, Arial, monospace; font-size: 10px; fill: " + col_group_text() + "; letter-spacing: 0.08em; }\n"
|
||||
let buf = buf + " .arbor-edge-label { font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; font-size: 11px; fill: " + col_edge_label() + "; }\n"
|
||||
let buf = buf + " </style>\n"
|
||||
let buf = buf + " </defs>\n"
|
||||
|
||||
// Groups first (behind everything).
|
||||
let buf = buf + " <!-- Groups -->\n"
|
||||
let groups: [Map<String, Any>] = graph["groups"]
|
||||
let gn: Int = el_list_len(groups)
|
||||
let i = 0
|
||||
while i < gn {
|
||||
let g: Map<String, Any> = get(groups, i)
|
||||
let gid: String = g["id"]
|
||||
// Only render groups the layout actually placed.
|
||||
let gids: [String] = el_map_get(layout, "group_ids")
|
||||
let placed = false
|
||||
let j = 0
|
||||
while j < el_list_len(gids) {
|
||||
if str_eq(get(gids, j), gid) { let placed = true }
|
||||
let j = j + 1
|
||||
}
|
||||
if placed {
|
||||
let buf = render_group(buf, g, layout)
|
||||
}
|
||||
let i = i + 1
|
||||
}
|
||||
|
||||
// Edges
|
||||
let buf = buf + " <!-- Edges -->\n"
|
||||
let edges: [Map<String, Any>] = graph["edges"]
|
||||
let en: Int = el_list_len(edges)
|
||||
let i = 0
|
||||
while i < en {
|
||||
let e: Map<String, Any> = get(edges, i)
|
||||
let buf = render_edge(buf, e, layout, forbidden)
|
||||
let i = i + 1
|
||||
}
|
||||
|
||||
// Nodes
|
||||
let buf = buf + " <!-- Nodes -->\n"
|
||||
let nodes: [Map<String, Any>] = graph["nodes"]
|
||||
let nn: Int = el_list_len(nodes)
|
||||
let i = 0
|
||||
while i < nn {
|
||||
let n: Map<String, Any> = get(nodes, i)
|
||||
let buf = render_node(buf, n, layout)
|
||||
let i = i + 1
|
||||
}
|
||||
|
||||
// Title
|
||||
let title: String = graph["title"]
|
||||
if str_len(title) > 0 {
|
||||
let title_x: el_val_t = fdiv2(cw)
|
||||
let buf = buf + " <text x=\"" + pt(title_x) + "\" y=\"22\" text-anchor=\"middle\""
|
||||
let buf = buf + " font-family=\"'Helvetica Neue', Helvetica, Arial, sans-serif\""
|
||||
let buf = buf + " font-size=\"15\" font-weight=\"600\" fill=\"" + col_node_text() + "\">"
|
||||
let buf = buf + esc(title) + "</text>\n"
|
||||
}
|
||||
|
||||
let buf = buf + "</svg>\n"
|
||||
buf
|
||||
}
|
||||
|
||||
// PNG — not implemented; the runtime has no SVG rasterizer or PNG encoder.
|
||||
// Returns an error map that callers can inspect via map["error"].
|
||||
fn arbor_render_png(graph: Map<String, Any>, layout: Map<String, Any>, forbidden: [String]) -> Map<String, Any> {
|
||||
{
|
||||
"error": "PNG rasterization not available in El runtime — install a runtime image library or use the Rust binary"
|
||||
}
|
||||
}
|
||||
|
||||
// ── Smoke test ─────────────────────────────────────────────────────────────
|
||||
|
||||
fn fail(label: String, msg: String) -> Int {
|
||||
println("FAIL " + label + ": " + msg)
|
||||
state_set("smoke_failures", "1")
|
||||
0
|
||||
}
|
||||
|
||||
fn check_contains(label: String, haystack: String, needle: String) -> Int {
|
||||
if str_contains(haystack, needle) {
|
||||
println("ok " + label)
|
||||
return 1
|
||||
}
|
||||
fail(label, "missing [" + needle + "]")
|
||||
}
|
||||
|
||||
fn check_not_contains(label: String, haystack: String, needle: String) -> Int {
|
||||
if str_contains(haystack, needle) {
|
||||
return fail(label, "should not contain [" + needle + "]")
|
||||
}
|
||||
println("ok " + label)
|
||||
1
|
||||
}
|
||||
|
||||
fn make_test_node(id: String, label: String, shape: String) -> Map<String, Any> {
|
||||
{
|
||||
"id": id, "label": label, "sublabel": "",
|
||||
"shape": shape,
|
||||
"style_fill": "", "style_stroke": "", "style_color": ""
|
||||
}
|
||||
}
|
||||
|
||||
fn make_test_edge(src: String, dst: String, line: String, arrow: String, label: String) -> Map<String, Any> {
|
||||
{
|
||||
"from": src, "to": dst, "label": label,
|
||||
"line": line, "arrow": arrow
|
||||
}
|
||||
}
|
||||
|
||||
fn make_test_pos(x: Int, y: Int) -> Map<String, Any> {
|
||||
{ "x": int_to_float(x), "y": int_to_float(y) }
|
||||
}
|
||||
|
||||
fn make_test_size(w: Int, h: Int) -> Map<String, Any> {
|
||||
{ "w": int_to_float(w), "h": int_to_float(h) }
|
||||
}
|
||||
|
||||
// Build a minimal layout map by hand.
|
||||
fn build_layout(node_ids: [String], group_ids: [String], cw: Int, ch: Int) -> Map<String, Any> {
|
||||
let r: Map<String, Any> = el_map_new(0)
|
||||
let r = el_map_set(r, "node_ids", node_ids)
|
||||
let r = el_map_set(r, "group_ids", group_ids)
|
||||
let r = el_map_set(r, "canvas", { "w": int_to_float(cw), "h": int_to_float(ch) })
|
||||
r
|
||||
}
|
||||
|
||||
let n_a: Map<String, Any> = make_test_node("a", "Node A", "rectangle")
|
||||
let n_b: Map<String, Any> = make_test_node("b", "Node B", "rectangle")
|
||||
let e_ab: Map<String, Any> = make_test_edge("a", "b", "solid", "forward", "")
|
||||
|
||||
let nodes: [Map<String, Any>] = native_list_empty()
|
||||
let nodes = native_list_append(nodes, n_a)
|
||||
let nodes = native_list_append(nodes, n_b)
|
||||
let edges: [Map<String, Any>] = native_list_empty()
|
||||
let edges = native_list_append(edges, e_ab)
|
||||
let groups: [Map<String, Any>] = native_list_empty()
|
||||
|
||||
let g: Map<String, Any> = {
|
||||
"title": "Test", "direction": "top-down",
|
||||
"nodes": nodes, "edges": edges, "groups": groups
|
||||
}
|
||||
|
||||
let nid_list: [String] = native_list_empty()
|
||||
let nid_list = native_list_append(nid_list, "a")
|
||||
let nid_list = native_list_append(nid_list, "b")
|
||||
let gid_list: [String] = native_list_empty()
|
||||
let layout: Map<String, Any> = build_layout(nid_list, gid_list, 400, 300)
|
||||
let layout = el_map_set(layout, "node_pos_a", make_test_pos(100, 60))
|
||||
let layout = el_map_set(layout, "node_pos_b", make_test_pos(100, 200))
|
||||
let layout = el_map_set(layout, "node_size_a", make_test_size(120, 40))
|
||||
let layout = el_map_set(layout, "node_size_b", make_test_size(120, 40))
|
||||
|
||||
let forbidden: [String] = native_list_empty()
|
||||
let svg: String = arbor_render_svg(g, layout, forbidden)
|
||||
|
||||
check_contains("svg starts with <svg", svg, "<svg xmlns=")
|
||||
check_contains("svg ends with </svg>", svg, "</svg>")
|
||||
check_contains("svg contains node label", svg, "Node A")
|
||||
check_contains("svg contains title", svg, ">Test</text>")
|
||||
check_contains("svg has rect for rectangle node", svg, "<rect")
|
||||
check_contains("svg has line for edge", svg, "<line")
|
||||
check_contains("svg has arrow marker def", svg, "id=\"ah\"")
|
||||
|
||||
// Escape test
|
||||
let n_esc: Map<String, Any> = make_test_node("x", "A & B <C>", "rectangle")
|
||||
let nodes2: [Map<String, Any>] = native_list_empty()
|
||||
let nodes2 = native_list_append(nodes2, n_esc)
|
||||
let g2: Map<String, Any> = {
|
||||
"title": "Test <Title>", "direction": "top-down",
|
||||
"nodes": nodes2, "edges": native_list_empty(), "groups": native_list_empty()
|
||||
}
|
||||
let nid2: [String] = native_list_empty()
|
||||
let nid2 = native_list_append(nid2, "x")
|
||||
let layout2: Map<String, Any> = build_layout(nid2, native_list_empty(), 200, 100)
|
||||
let layout2 = el_map_set(layout2, "node_pos_x", make_test_pos(80, 40))
|
||||
let layout2 = el_map_set(layout2, "node_size_x", make_test_size(120, 40))
|
||||
let svg2: String = arbor_render_svg(g2, layout2, native_list_empty())
|
||||
check_contains("escapes ampersand", svg2, "&")
|
||||
check_contains("escapes <", svg2, "<")
|
||||
check_not_contains("no raw <C>", svg2, "<C>")
|
||||
|
||||
// Forbidden edge
|
||||
let e_fb: Map<String, Any> = make_test_edge("a", "b", "solid", "forward", "")
|
||||
let edges3: [Map<String, Any>] = native_list_empty()
|
||||
let edges3 = native_list_append(edges3, e_fb)
|
||||
let g3: Map<String, Any> = {
|
||||
"title": "F", "direction": "top-down",
|
||||
"nodes": nodes, "edges": edges3, "groups": native_list_empty()
|
||||
}
|
||||
let fb: [String] = native_list_empty()
|
||||
let fb = native_list_append(fb, forbidden_key("a", "b"))
|
||||
let svg3: String = arbor_render_svg(g3, layout, fb)
|
||||
check_contains("forbidden uses red marker", svg3, "ah-red")
|
||||
check_contains("forbidden colour present", svg3, col_edge_forbidden())
|
||||
|
||||
// Diamond shape → polygon
|
||||
let n_d: Map<String, Any> = make_test_node("d", "Decide", "diamond")
|
||||
let g4: Map<String, Any> = {
|
||||
"title": "", "direction": "top-down",
|
||||
"nodes": native_list_append(native_list_empty(), n_d),
|
||||
"edges": native_list_empty(), "groups": native_list_empty()
|
||||
}
|
||||
let nid4: [String] = native_list_append(native_list_empty(), "d")
|
||||
let layout4: Map<String, Any> = build_layout(nid4, native_list_empty(), 200, 100)
|
||||
let layout4 = el_map_set(layout4, "node_pos_d", make_test_pos(80, 50))
|
||||
let layout4 = el_map_set(layout4, "node_size_d", make_test_size(120, 40))
|
||||
let svg4: String = arbor_render_svg(g4, layout4, native_list_empty())
|
||||
check_contains("diamond uses polygon", svg4, "<polygon")
|
||||
|
||||
// Cylinder shape → ellipses
|
||||
let n_cy: Map<String, Any> = make_test_node("cy", "DB", "cylinder")
|
||||
let g5: Map<String, Any> = {
|
||||
"title": "", "direction": "top-down",
|
||||
"nodes": native_list_append(native_list_empty(), n_cy),
|
||||
"edges": native_list_empty(), "groups": native_list_empty()
|
||||
}
|
||||
let nid5: [String] = native_list_append(native_list_empty(), "cy")
|
||||
let layout5: Map<String, Any> = build_layout(nid5, native_list_empty(), 200, 100)
|
||||
let layout5 = el_map_set(layout5, "node_pos_cy", make_test_pos(80, 50))
|
||||
let layout5 = el_map_set(layout5, "node_size_cy", make_test_size(120, 40))
|
||||
let svg5: String = arbor_render_svg(g5, layout5, native_list_empty())
|
||||
check_contains("cylinder uses ellipse", svg5, "<ellipse")
|
||||
|
||||
// Dashed edge
|
||||
let e_dash: Map<String, Any> = make_test_edge("a", "b", "dashed", "forward", "")
|
||||
let g6: Map<String, Any> = {
|
||||
"title": "", "direction": "top-down",
|
||||
"nodes": nodes, "edges": native_list_append(native_list_empty(), e_dash),
|
||||
"groups": native_list_empty()
|
||||
}
|
||||
let svg6: String = arbor_render_svg(g6, layout, native_list_empty())
|
||||
check_contains("dashed line dasharray", svg6, "stroke-dasharray=\"5,3\"")
|
||||
|
||||
// PNG returns an error map
|
||||
let png: Map<String, Any> = arbor_render_png(g, layout, native_list_empty())
|
||||
let err: String = png["error"]
|
||||
if str_len(err) > 0 {
|
||||
println("ok PNG returns error map")
|
||||
} else {
|
||||
println("FAIL PNG should have returned error")
|
||||
state_set("smoke_failures", "1")
|
||||
}
|
||||
|
||||
println("")
|
||||
let f: String = state_get("smoke_failures")
|
||||
if str_eq(f, "1") {
|
||||
println("arbor-render: FAILED")
|
||||
exit_program(1)
|
||||
} else {
|
||||
println("arbor-render: ok")
|
||||
}
|
||||
BIN
Binary file not shown.
+827
-176
File diff suppressed because it is too large
Load Diff
BIN
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@@ -79,6 +79,8 @@ extern "C" {
|
||||
void println(el_val_t s);
|
||||
void print(el_val_t s);
|
||||
el_val_t readline(void);
|
||||
el_val_t stdout_to_file(el_val_t path); /* redirect println to a file */
|
||||
el_val_t stdout_restore(void); /* restore stdout after capture */
|
||||
|
||||
/* ── String builtins ─────────────────────────────────────────────────────── */
|
||||
|
||||
@@ -176,11 +178,6 @@ void http_set_handler_v2(el_val_t name);
|
||||
* auto-content-type contract for legacy handlers that return plain bodies. */
|
||||
el_val_t http_response(el_val_t status, el_val_t headers_json, el_val_t body);
|
||||
|
||||
/* SSE connection fd — set by http_worker_v2 before calling the El handler,
|
||||
* cleared afterwards. Defined in el_seed.c; called from el_runtime.c.
|
||||
* The getter is exposed as __http_conn_fd() to El programs. */
|
||||
void el_seed_set_http_conn_fd(int fd);
|
||||
|
||||
/* HTTP timeout — every libcurl request honors EL_HTTP_TIMEOUT_MS (default
|
||||
* 60000ms). Read lazily on first use, so setting the env var any time before
|
||||
* the first http_* call is sufficient. */
|
||||
+9
-21
@@ -1673,7 +1673,6 @@ static void* http_worker_v2(void* arg) {
|
||||
HttpWorkerArg* a = (HttpWorkerArg*)arg;
|
||||
int fd = a->fd;
|
||||
free(a);
|
||||
int is_sse = 0;
|
||||
char *method = NULL, *path = NULL, *body = NULL, *hdr_block = NULL;
|
||||
if (http_read_request(fd, &method, &path, &body, &hdr_block) == 0) {
|
||||
http_handler4_fn h = http_lookup_active_v2();
|
||||
@@ -1681,39 +1680,28 @@ static void* http_worker_v2(void* arg) {
|
||||
int head_only = (method && strcmp(method, "HEAD") == 0);
|
||||
const char* dispatch_method = head_only ? "GET" : method;
|
||||
el_request_start(); /* begin per-request arena */
|
||||
/* Expose the raw fd to El SSE builtins (__http_conn_fd etc.). */
|
||||
el_seed_set_http_conn_fd(fd);
|
||||
if (h) {
|
||||
el_val_t hmap = http_build_headers_map(hdr_block ? hdr_block : "");
|
||||
el_val_t r = h(EL_STR(dispatch_method), EL_STR(path), hmap, EL_STR(body));
|
||||
const char* rs = EL_CSTR(r);
|
||||
/* Detect SSE sentinel — handler took ownership of the fd. */
|
||||
if (rs && strcmp(rs, "__sse__") == 0) {
|
||||
is_sse = 1;
|
||||
} else {
|
||||
size_t rlen = _tl_fs_read_len > 0 ? _tl_fs_read_len : (rs ? strlen(rs) : 0);
|
||||
response = malloc(rlen + 1);
|
||||
if (response && rs) { memcpy(response, rs, rlen); response[rlen] = '\0'; }
|
||||
else if (response) { response[0] = '\0'; }
|
||||
}
|
||||
size_t rlen = _tl_fs_read_len > 0 ? _tl_fs_read_len : (rs ? strlen(rs) : 0);
|
||||
response = malloc(rlen + 1);
|
||||
if (response && rs) { memcpy(response, rs, rlen); response[rlen] = '\0'; }
|
||||
else if (response) { response[0] = '\0'; }
|
||||
el_release(hmap);
|
||||
} else {
|
||||
response = el_strdup_persist(
|
||||
"el-runtime: no v2 http handler registered "
|
||||
"(call http_set_handler_v2)");
|
||||
}
|
||||
el_seed_set_http_conn_fd(-1); /* clear before arena teardown */
|
||||
el_request_end(); /* free all intermediate strings */
|
||||
if (!is_sse) {
|
||||
_tl_http_head_only = head_only;
|
||||
http_send_response(fd, response);
|
||||
_tl_http_head_only = 0;
|
||||
free(response);
|
||||
}
|
||||
_tl_http_head_only = head_only;
|
||||
http_send_response(fd, response);
|
||||
_tl_http_head_only = 0;
|
||||
free(response);
|
||||
}
|
||||
free(method); free(path); free(body); free(hdr_block);
|
||||
/* SSE handlers close the fd themselves via __http_sse_close. */
|
||||
if (!is_sse) close(fd);
|
||||
close(fd);
|
||||
pthread_mutex_lock(&_http_conn_mu);
|
||||
_http_conn_active--;
|
||||
pthread_cond_signal(&_http_conn_cv);
|
||||
@@ -191,138 +191,6 @@ fn js_is_int_call(call_expr: Map<String, Any>) -> Bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// ── HTML template codegen (JS) ────────────────────────────────────────────────
|
||||
//
|
||||
// HTML template expressions compile to a JS IIFE that builds the HTML string
|
||||
// using string concatenation. Interpolated values go through html_escape();
|
||||
// raw() bypasses escaping. {#each} blocks compile to Array.forEach or a
|
||||
// for-loop that pushes fragments into a parts array.
|
||||
//
|
||||
// Entry point: js_cg_html_template(expr) → JS expression string.
|
||||
|
||||
fn js_next_html_id() -> String {
|
||||
let csv: String = state_get("__js_html_counter")
|
||||
let n = 0
|
||||
if !str_eq(csv, "") {
|
||||
let n = str_to_int(csv)
|
||||
}
|
||||
let n = n + 1
|
||||
state_set("__js_html_counter", native_int_to_str(n))
|
||||
native_int_to_str(n)
|
||||
}
|
||||
|
||||
fn js_cg_html_parts(children: [Map<String, Any>], acc_var: String) -> String {
|
||||
let n: Int = native_list_len(children)
|
||||
let i = 0
|
||||
let out = ""
|
||||
while i < n {
|
||||
let child: Map<String, Any> = native_list_get(children, i)
|
||||
let html_kind: String = child["html"]
|
||||
if str_eq(html_kind, "Text") {
|
||||
let text: String = child["text"]
|
||||
let out = out + acc_var + " += " + js_str_lit(text) + "; "
|
||||
}
|
||||
if str_eq(html_kind, "Doctype") {
|
||||
let out = out + acc_var + " += \"<!doctype html>\"; "
|
||||
}
|
||||
if str_eq(html_kind, "Interp") {
|
||||
let val_node = child["value"]
|
||||
let val_c: String = js_cg_expr(val_node)
|
||||
let out = out + acc_var + " += html_escape(" + val_c + "); "
|
||||
}
|
||||
if str_eq(html_kind, "Raw") {
|
||||
let val_node = child["value"]
|
||||
let val_c: String = js_cg_expr(val_node)
|
||||
let out = out + acc_var + " += html_raw(" + val_c + "); "
|
||||
}
|
||||
if str_eq(html_kind, "Element") {
|
||||
let elem_c: String = js_cg_html_element_str(child, acc_var)
|
||||
let out = out + elem_c
|
||||
}
|
||||
if str_eq(html_kind, "Each") {
|
||||
let each_c: String = js_cg_html_each(child, acc_var)
|
||||
let out = out + each_c
|
||||
}
|
||||
let i = i + 1
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn js_cg_html_attrs_str(attrs: [Map<String, Any>], acc_var: String) -> String {
|
||||
let n: Int = native_list_len(attrs)
|
||||
let i = 0
|
||||
let out = ""
|
||||
while i < n {
|
||||
let attr: Map<String, Any> = native_list_get(attrs, i)
|
||||
let attr_name: String = attr["name"]
|
||||
let kind: String = attr["kind"]
|
||||
// open-attr snippet: " name=\""
|
||||
let open_val: String = " " + attr_name + "=\""
|
||||
if str_eq(kind, "static") {
|
||||
let sv: String = attr["value"]
|
||||
let out = out + acc_var + " += " + js_str_lit(open_val) + "; "
|
||||
let out = out + acc_var + " += " + js_str_lit(sv) + "; "
|
||||
let out = out + acc_var + " += " + js_str_lit("\"") + "; "
|
||||
} else {
|
||||
if str_eq(kind, "dynamic") {
|
||||
let val_node = attr["value"]
|
||||
let val_c: String = js_cg_expr(val_node)
|
||||
let out = out + acc_var + " += " + js_str_lit(open_val) + "; "
|
||||
let out = out + acc_var + " += html_escape(" + val_c + "); "
|
||||
let out = out + acc_var + " += " + js_str_lit("\"") + "; "
|
||||
} else {
|
||||
// Boolean attribute
|
||||
let out = out + acc_var + " += " + js_str_lit(" " + attr_name) + "; "
|
||||
}
|
||||
}
|
||||
let i = i + 1
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn js_cg_html_element_str(elem: Map<String, Any>, acc_var: String) -> String {
|
||||
let tag: String = elem["tag"]
|
||||
let attrs: [Map<String, Any>] = elem["attrs"]
|
||||
let children: [Map<String, Any>] = elem["children"]
|
||||
let self_closing: Bool = elem["self_closing"]
|
||||
let out = acc_var + " += " + js_str_lit("<" + tag) + "; "
|
||||
let out = out + js_cg_html_attrs_str(attrs, acc_var)
|
||||
if self_closing {
|
||||
let out = out + acc_var + " += \"/>\"" + "; "
|
||||
} else {
|
||||
let out = out + acc_var + " += \">\"; "
|
||||
let out = out + js_cg_html_parts(children, acc_var)
|
||||
let out = out + acc_var + " += " + js_str_lit("</" + tag + ">") + "; "
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn js_cg_html_each(node: Map<String, Any>, acc_var: String) -> String {
|
||||
let list_expr = node["list"]
|
||||
let item_name: String = node["item"]
|
||||
let body_children: [Map<String, Any>] = node["body"]
|
||||
let id: String = js_next_html_id()
|
||||
let list_var: String = "_html_list_" + id
|
||||
let len_var: String = "_html_len_" + id
|
||||
let idx_var: String = "_html_i_" + id
|
||||
let list_c: String = js_cg_expr(list_expr)
|
||||
let inner_c: String = js_cg_html_parts(body_children, acc_var)
|
||||
"{ const " + list_var + " = " + list_c + "; const " + len_var + " = el_list_len(" + list_var + "); for (let " + idx_var + " = 0; " + idx_var + " < " + len_var + "; " + idx_var + "++) { const " + item_name + " = el_list_get(" + list_var + ", " + idx_var + "); " + inner_c + "} } "
|
||||
}
|
||||
|
||||
fn js_cg_html_template(expr: Map<String, Any>) -> String {
|
||||
let root = expr["root"]
|
||||
let id: String = js_next_html_id()
|
||||
let acc: String = "_html_" + id
|
||||
let doctype_flag: Bool = root["doctype"]
|
||||
let doctype_prefix: String = ""
|
||||
if doctype_flag {
|
||||
let doctype_prefix = acc + " += \"<!doctype html>\"; "
|
||||
}
|
||||
let body: String = js_cg_html_element_str(root, acc)
|
||||
"(() => { let " + acc + " = \"\"; " + doctype_prefix + body + "return " + acc + "; })()"
|
||||
}
|
||||
|
||||
// ── Expression codegen ────────────────────────────────────────────────────────
|
||||
//
|
||||
// js_cg_expr returns a JS expression string (not a statement).
|
||||
@@ -701,10 +569,6 @@ fn js_cg_expr(expr: Map<String, Any>) -> String {
|
||||
return js_cg_lambda(expr)
|
||||
}
|
||||
|
||||
if kind == "HtmlTemplate" {
|
||||
return js_cg_html_template(expr)
|
||||
}
|
||||
|
||||
"null"
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -20,44 +20,18 @@ import "codegen.el"
|
||||
import "codegen-js.el"
|
||||
|
||||
// compile — full pipeline (C target): source string -> C source string
|
||||
// Uses JIT function-at-a-time streaming: parse one decl → emit C → discard AST.
|
||||
// Peak memory is O(one function's AST) instead of O(whole program AST).
|
||||
fn compile(source: String) -> String {
|
||||
// Top-level arena scope: activates the string arena before lex() so that
|
||||
// ALL strdup allocations (token strings, sig strings, codegen fragments)
|
||||
// are tracked and freed on pop. Without this, lex() and scan_fn_sigs()
|
||||
// run before any push, leaving _tl_arena_active=0 and leaking every
|
||||
// token string. Also prevents inner pop(mark=0) calls from deactivating
|
||||
// the arena between per-function scopes.
|
||||
let top_mark: Any = el_arena_push()
|
||||
let tokens: [Any] = lex(source)
|
||||
// Fast pre-scan: collect fn signatures + program kind without building
|
||||
// full expression ASTs. O(tokens) time, minimal allocation.
|
||||
let sigs: [Map<String, Any>] = scan_fn_sigs(tokens)
|
||||
// Stream parse-emit: parse one decl at a time, emit C, discard.
|
||||
// All output written to stdout via println before pop.
|
||||
codegen_streaming(tokens, sigs, source)
|
||||
el_arena_pop(top_mark)
|
||||
""
|
||||
}
|
||||
|
||||
// compile_test — like compile() but sets __test_mode so codegen_streaming
|
||||
// compiles test { } blocks instead of skipping them, and emits the test
|
||||
// harness main() instead of the normal int main().
|
||||
fn compile_test(source: String) -> String {
|
||||
state_set("__test_mode", "1")
|
||||
let top_mark: Any = el_arena_push()
|
||||
let tokens: [Any] = lex(source)
|
||||
let sigs: [Map<String, Any>] = scan_fn_sigs(tokens)
|
||||
codegen_streaming(tokens, sigs, source)
|
||||
el_arena_pop(top_mark)
|
||||
state_set("__test_mode", "")
|
||||
""
|
||||
let tokens: [Map<String, Any>] = lex(source)
|
||||
let stmts: [Map<String, Any>] = parse(tokens)
|
||||
// Token list is no longer needed after parsing — release it to free memory
|
||||
// before codegen allocates its own working data on large source files.
|
||||
el_release(tokens)
|
||||
codegen(stmts, source)
|
||||
}
|
||||
|
||||
// compile_js — full pipeline (JS target, module mode): source string -> JS source string
|
||||
fn compile_js(source: String) -> String {
|
||||
let tokens: [Any] = lex(source)
|
||||
let tokens: [Map<String, Any>] = lex(source)
|
||||
let stmts: [Map<String, Any>] = parse(tokens)
|
||||
// Token list is no longer needed after parsing — release it to free memory.
|
||||
el_release(tokens)
|
||||
@@ -67,7 +41,7 @@ fn compile_js(source: String) -> String {
|
||||
// compile_js_with_bundle — JS target in bundle mode.
|
||||
// Reads el_runtime.js from runtime_path and inlines it inside an IIFE.
|
||||
fn compile_js_with_bundle(source: String, runtime_path: String) -> String {
|
||||
let tokens: [Any] = lex(source)
|
||||
let tokens: [Map<String, Any>] = lex(source)
|
||||
let stmts: [Map<String, Any>] = parse(tokens)
|
||||
el_release(tokens)
|
||||
let runtime_content: String = fs_read(runtime_path)
|
||||
@@ -173,18 +147,6 @@ fn detect_obfuscate(argv: [String]) -> Bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// Detect --test flag in argv.
|
||||
fn detect_test(argv: [String]) -> Bool {
|
||||
let n: Int = native_list_len(argv)
|
||||
let i = 0
|
||||
while i < n {
|
||||
let a: String = native_list_get(argv, i)
|
||||
if str_eq(a, "--test") { return true }
|
||||
let i = i + 1
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Build a unique temp file path: /tmp/elc-<pid>-<timestamp>.<suffix>
|
||||
fn make_temp_path(suffix: String) -> String {
|
||||
let pid: Int = getpid_now()
|
||||
@@ -287,9 +249,6 @@ fn type_node_to_el(t: Map<String, Any>) -> String {
|
||||
|
||||
// emit_header — write a .elh file from parsed statements.
|
||||
// Scans for FnDef nodes and emits 'extern fn' declarations.
|
||||
// NOTE: This function requires the full AST. Prefer emit_header_from_sigs
|
||||
// for the --emit-header path — it works from a token-level scan without
|
||||
// building expression ASTs, avoiding OOM on large files.
|
||||
fn emit_header(stmts: [Map<String, Any>], hdr_path: String) -> Void {
|
||||
let n: Int = native_list_len(stmts)
|
||||
let i = 0
|
||||
@@ -328,32 +287,6 @@ fn emit_header(stmts: [Map<String, Any>], hdr_path: String) -> Void {
|
||||
let ok: Bool = fs_write(hdr_path, content)
|
||||
}
|
||||
|
||||
// emit_header_from_sigs — write a .elh file from pre-scanned El signatures.
|
||||
// Uses the output of scan_fn_sigs_el() — no full AST required.
|
||||
// Peak memory is O(tokens) rather than O(whole-program AST), which prevents
|
||||
// OOM on large files with HTML template bodies or deep BinOp chains.
|
||||
fn emit_header_from_sigs(sigs: [Map<String, Any>], hdr_path: String) -> Void {
|
||||
let n: Int = native_list_len(sigs)
|
||||
let i: Int = 0
|
||||
let parts: [String] = native_list_empty()
|
||||
let parts = native_list_append(parts, "// auto-generated by elc --emit-header — do not edit\n")
|
||||
while i < n {
|
||||
let sig = native_list_get(sigs, i)
|
||||
let kind: String = sig["kind"]
|
||||
if str_eq(kind, "fn") {
|
||||
let name: String = sig["name"]
|
||||
let params_el: String = sig["params_el"]
|
||||
let ret_el: String = sig["ret_el"]
|
||||
if str_eq(ret_el, "") { let ret_el = "Any" }
|
||||
let line: String = "extern fn " + name + "(" + params_el + ") -> " + ret_el
|
||||
let parts = native_list_append(parts, line + "\n")
|
||||
}
|
||||
let i = i + 1
|
||||
}
|
||||
let content: String = str_join(parts, "")
|
||||
let ok: Bool = fs_write(hdr_path, content)
|
||||
}
|
||||
|
||||
// ── Import resolution ────────────────────────────────────────────────────────
|
||||
//
|
||||
// elc supports two forms of import:
|
||||
@@ -543,7 +476,6 @@ fn main() -> Void {
|
||||
let do_bundle: Bool = detect_bundle(argv)
|
||||
let do_minify: Bool = detect_minify(argv)
|
||||
let do_obfuscate: Bool = detect_obfuscate(argv)
|
||||
let do_test: Bool = detect_test(argv)
|
||||
// --obfuscate implies --minify: obfuscating unminified code is pointless.
|
||||
if do_obfuscate {
|
||||
let do_minify = true
|
||||
@@ -551,7 +483,7 @@ fn main() -> Void {
|
||||
let positional: [String] = strip_flags(argv)
|
||||
let argc: Int = native_list_len(positional)
|
||||
if argc < 1 {
|
||||
println("el-compiler: usage: elc [--target=c|js] [--bundle] [--minify] [--obfuscate] [--emit-header] [--test] <source.el> [<output>]")
|
||||
println("el-compiler: usage: elc [--target=c|js] [--bundle] [--minify] [--obfuscate] [--emit-header] <source.el> [<output>]")
|
||||
exit(1)
|
||||
}
|
||||
|
||||
@@ -565,20 +497,16 @@ fn main() -> Void {
|
||||
|
||||
let src_path: String = native_list_get(positional, 0)
|
||||
|
||||
// When --emit-header is requested, lex the source file and do a
|
||||
// token-level signature scan (no full AST) to write a .elh file.
|
||||
// This avoids OOM on large files with HTML template bodies or deep
|
||||
// BinOp chains (e.g. checkout.el) — parse() builds O(whole-program AST)
|
||||
// while scan_fn_sigs_el keeps peak memory at O(tokens).
|
||||
// When --emit-header is requested, parse the source file directly
|
||||
// (without inlining imports) and write out a .elh file alongside the .c.
|
||||
if do_emit_header {
|
||||
el_mem_check()
|
||||
let raw_source: String = fs_read(src_path)
|
||||
let hdr_tokens: [Any] = lex(raw_source)
|
||||
let hdr_sigs: [Map<String, Any>] = scan_fn_sigs_el(hdr_tokens)
|
||||
let hdr_tokens: [Map<String, Any>] = lex(raw_source)
|
||||
let hdr_stmts: [Map<String, Any>] = parse(hdr_tokens)
|
||||
el_release(hdr_tokens)
|
||||
let hdr_path: String = str_slice(src_path, 0, str_len(src_path) - 3) + ".elh"
|
||||
emit_header_from_sigs(hdr_sigs, hdr_path)
|
||||
el_release(hdr_sigs)
|
||||
emit_header(hdr_stmts, hdr_path)
|
||||
el_release(hdr_stmts)
|
||||
}
|
||||
|
||||
let source: String = resolve_imports(src_path)
|
||||
@@ -592,12 +520,6 @@ fn main() -> Void {
|
||||
exit(0)
|
||||
}
|
||||
|
||||
// --test mode: compile with test harness (C target only).
|
||||
if do_test {
|
||||
compile_test(source)
|
||||
exit(0)
|
||||
}
|
||||
|
||||
// Standard path (no post-processing).
|
||||
let out: String = ""
|
||||
if do_bundle {
|
||||
@@ -0,0 +1,749 @@
|
||||
// lexer.el — el self-hosting lexer
|
||||
//
|
||||
// Tokenises an el source string into a list of token maps.
|
||||
// Each token is a Map<String, Any> with keys:
|
||||
// "kind" -> String (e.g. "Int", "Ident", "Plus")
|
||||
// "value" -> String (the raw text of the token)
|
||||
//
|
||||
// Entry point: fn lex(source: String) -> [Map<String, Any>]
|
||||
//
|
||||
// Uses native_string_chars to split the source into a chars list,
|
||||
// then indexes it with native_list_get — avoids O(N²) string cloning.
|
||||
|
||||
// ── Character helpers ─────────────────────────────────────────────────────────
|
||||
|
||||
fn lex_is_digit(ch: String) -> Bool {
|
||||
if ch == "0" { return true }
|
||||
if ch == "1" { return true }
|
||||
if ch == "2" { return true }
|
||||
if ch == "3" { return true }
|
||||
if ch == "4" { return true }
|
||||
if ch == "5" { return true }
|
||||
if ch == "6" { return true }
|
||||
if ch == "7" { return true }
|
||||
if ch == "8" { return true }
|
||||
if ch == "9" { return true }
|
||||
false
|
||||
}
|
||||
|
||||
fn lex_is_alpha(ch: String) -> Bool {
|
||||
if ch == "a" { return true }
|
||||
if ch == "b" { return true }
|
||||
if ch == "c" { return true }
|
||||
if ch == "d" { return true }
|
||||
if ch == "e" { return true }
|
||||
if ch == "f" { return true }
|
||||
if ch == "g" { return true }
|
||||
if ch == "h" { return true }
|
||||
if ch == "i" { return true }
|
||||
if ch == "j" { return true }
|
||||
if ch == "k" { return true }
|
||||
if ch == "l" { return true }
|
||||
if ch == "m" { return true }
|
||||
if ch == "n" { return true }
|
||||
if ch == "o" { return true }
|
||||
if ch == "p" { return true }
|
||||
if ch == "q" { return true }
|
||||
if ch == "r" { return true }
|
||||
if ch == "s" { return true }
|
||||
if ch == "t" { return true }
|
||||
if ch == "u" { return true }
|
||||
if ch == "v" { return true }
|
||||
if ch == "w" { return true }
|
||||
if ch == "x" { return true }
|
||||
if ch == "y" { return true }
|
||||
if ch == "z" { return true }
|
||||
if ch == "A" { return true }
|
||||
if ch == "B" { return true }
|
||||
if ch == "C" { return true }
|
||||
if ch == "D" { return true }
|
||||
if ch == "E" { return true }
|
||||
if ch == "F" { return true }
|
||||
if ch == "G" { return true }
|
||||
if ch == "H" { return true }
|
||||
if ch == "I" { return true }
|
||||
if ch == "J" { return true }
|
||||
if ch == "K" { return true }
|
||||
if ch == "L" { return true }
|
||||
if ch == "M" { return true }
|
||||
if ch == "N" { return true }
|
||||
if ch == "O" { return true }
|
||||
if ch == "P" { return true }
|
||||
if ch == "Q" { return true }
|
||||
if ch == "R" { return true }
|
||||
if ch == "S" { return true }
|
||||
if ch == "T" { return true }
|
||||
if ch == "U" { return true }
|
||||
if ch == "V" { return true }
|
||||
if ch == "W" { return true }
|
||||
if ch == "X" { return true }
|
||||
if ch == "Y" { return true }
|
||||
if ch == "Z" { return true }
|
||||
false
|
||||
}
|
||||
|
||||
fn is_alnum_or_underscore(ch: String) -> Bool {
|
||||
if lex_is_digit(ch) { return true }
|
||||
if lex_is_alpha(ch) { return true }
|
||||
if ch == "_" { return true }
|
||||
false
|
||||
}
|
||||
|
||||
fn lex_is_whitespace(ch: String) -> Bool {
|
||||
if ch == " " { return true }
|
||||
if ch == "\t" { return true }
|
||||
if ch == "\n" { return true }
|
||||
if ch == "\r" { return true }
|
||||
false
|
||||
}
|
||||
|
||||
fn make_tok(kind: String, value: String) -> Map<String, Any> {
|
||||
{ "kind": kind, "value": value }
|
||||
}
|
||||
|
||||
// ── Keyword lookup ────────────────────────────────────────────────────────────
|
||||
|
||||
fn keyword_kind(word: String) -> String {
|
||||
if word == "let" { return "Let" }
|
||||
if word == "fn" { return "Fn" }
|
||||
if word == "type" { return "Type" }
|
||||
if word == "enum" { return "Enum" }
|
||||
if word == "match" { return "Match" }
|
||||
if word == "return" { return "Return" }
|
||||
if word == "if" { return "If" }
|
||||
if word == "else" { return "Else" }
|
||||
if word == "for" { return "For" }
|
||||
if word == "in" { return "In" }
|
||||
if word == "while" { return "While" }
|
||||
if word == "import" { return "Import" }
|
||||
if word == "from" { return "From" }
|
||||
if word == "as" { return "As" }
|
||||
if word == "with" { return "With" }
|
||||
if word == "sealed" { return "Sealed" }
|
||||
if word == "activate" { return "Activate" }
|
||||
if word == "where" { return "Where" }
|
||||
if word == "test" { return "Test" }
|
||||
if word == "seed" { return "Seed" }
|
||||
if word == "assert" { return "Assert" }
|
||||
if word == "protocol" { return "Protocol" }
|
||||
if word == "impl" { return "Impl" }
|
||||
if word == "retry" { return "Retry" }
|
||||
if word == "times" { return "Times" }
|
||||
if word == "fallback" { return "Fallback" }
|
||||
if word == "reason" { return "Reason" }
|
||||
if word == "parallel" { return "Parallel" }
|
||||
if word == "trace" { return "Trace" }
|
||||
if word == "requires" { return "Requires" }
|
||||
if word == "deploy" { return "Deploy" }
|
||||
if word == "to" { return "To" }
|
||||
if word == "via" { return "Via" }
|
||||
if word == "target" { return "Target" }
|
||||
if word == "true" { return "Bool" }
|
||||
if word == "false" { return "Bool" }
|
||||
if word == "cgi" { return "Cgi" }
|
||||
if word == "service" { return "Service" }
|
||||
if word == "manager" { return "Manager" }
|
||||
if word == "engine" { return "Engine" }
|
||||
if word == "accessor" { return "Accessor" }
|
||||
if word == "vessel" { return "Vessel" }
|
||||
if word == "extern" { return "Extern" }
|
||||
if word == "try" { return "Try" }
|
||||
if word == "catch" { return "Catch" }
|
||||
""
|
||||
}
|
||||
|
||||
// ── Scan helpers ──────────────────────────────────────────────────────────────
|
||||
// All scan helpers receive the chars list and total length.
|
||||
|
||||
// scan_digits — advance i while chars[i] is a digit
|
||||
// Returns { "text": ..., "pos": i }
|
||||
fn scan_digits(chars: [String], start: Int, total: Int) -> Map<String, Any> {
|
||||
let i = start
|
||||
let parts: [String] = native_list_empty()
|
||||
let running = true
|
||||
while running {
|
||||
if i >= total {
|
||||
let running = false
|
||||
} else {
|
||||
let ch: String = native_list_get(chars, i)
|
||||
if lex_is_digit(ch) {
|
||||
let parts = native_list_append(parts, ch)
|
||||
let i = i + 1
|
||||
} else {
|
||||
let running = false
|
||||
}
|
||||
}
|
||||
}
|
||||
{ "text": str_join(parts, ""), "pos": i }
|
||||
}
|
||||
|
||||
// scan_ident — advance i while chars[i] is alphanumeric or underscore
|
||||
fn scan_ident(chars: [String], start: Int, total: Int) -> Map<String, Any> {
|
||||
let i = start
|
||||
let parts: [String] = native_list_empty()
|
||||
let running = true
|
||||
while running {
|
||||
if i >= total {
|
||||
let running = false
|
||||
} else {
|
||||
let ch: String = native_list_get(chars, i)
|
||||
if is_alnum_or_underscore(ch) {
|
||||
let parts = native_list_append(parts, ch)
|
||||
let i = i + 1
|
||||
} else {
|
||||
let running = false
|
||||
}
|
||||
}
|
||||
}
|
||||
{ "text": str_join(parts, ""), "pos": i }
|
||||
}
|
||||
|
||||
// ── Code-bearing string detection + comment strip ────────────────────────────
|
||||
// Inline JS/CSS literals embedded in El source (e.g. <script>…</script> blobs
|
||||
// or stylesheet payloads inside string literals) carry their own line and
|
||||
// block comments. Those comments leak into the served HTML and reveal build
|
||||
// notes the visitor should never see. We strip them at the lexer so every
|
||||
// downstream consumer (codegen-c, codegen-js, parser) gets the cleaned form.
|
||||
//
|
||||
// looks_like_code — heuristic gate so we only strip strings that actually
|
||||
// embed JS or CSS. Plain prose, hex blobs, JSON, etc. pass through verbatim.
|
||||
|
||||
fn substr_at(chars: [String], start: Int, total: Int, needle: String) -> Bool {
|
||||
let nchars: [String] = native_string_chars(needle)
|
||||
let nlen: Int = native_list_len(nchars)
|
||||
if start + nlen > total { return false }
|
||||
let i = 0
|
||||
let matched = true
|
||||
while i < nlen {
|
||||
let a: String = native_list_get(chars, start + i)
|
||||
let b: String = native_list_get(nchars, i)
|
||||
if a == b { let i = i + 1 } else { let matched = false; let i = nlen }
|
||||
}
|
||||
matched
|
||||
}
|
||||
|
||||
fn str_has(s: String, needle: String) -> Bool {
|
||||
let chars: [String] = native_string_chars(s)
|
||||
let total: Int = native_list_len(chars)
|
||||
let i = 0
|
||||
let found = false
|
||||
while i < total {
|
||||
if substr_at(chars, i, total, needle) {
|
||||
let found = true
|
||||
let i = total
|
||||
} else {
|
||||
let i = i + 1
|
||||
}
|
||||
}
|
||||
found
|
||||
}
|
||||
|
||||
fn looks_like_code(s: String) -> Bool {
|
||||
if str_has(s, "<script") { return true }
|
||||
if str_has(s, "<style") { return true }
|
||||
if str_has(s, "function") {
|
||||
if str_has(s, ";") { return true }
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
// strip_code_comments — character-by-character walk. Tracks JS string state
|
||||
// (single, double, backtick) and never strips inside one. Backslash escapes
|
||||
// inside JS strings consume the next char verbatim. URLs like https:// are
|
||||
// preserved by checking the previous char before treating // as a line
|
||||
// comment opener: if the char immediately before '/' is ':', emit the '/'
|
||||
// literally and advance one position.
|
||||
fn strip_code_comments(s: String) -> String {
|
||||
let chars: [String] = native_string_chars(s)
|
||||
let total: Int = native_list_len(chars)
|
||||
let out_parts: [String] = native_list_empty()
|
||||
let i = 0
|
||||
let in_squote = false
|
||||
let in_dquote = false
|
||||
let in_btick = false
|
||||
let prev = ""
|
||||
while i < total {
|
||||
let ch: String = native_list_get(chars, i)
|
||||
let in_js_string = false
|
||||
if in_squote { let in_js_string = true }
|
||||
if in_dquote { let in_js_string = true }
|
||||
if in_btick { let in_js_string = true }
|
||||
|
||||
if in_js_string {
|
||||
// Backslash escape: consume next char verbatim regardless of which.
|
||||
if ch == "\\" {
|
||||
let out_parts = native_list_append(out_parts, ch)
|
||||
let next_i = i + 1
|
||||
if next_i < total {
|
||||
let nc: String = native_list_get(chars, next_i)
|
||||
let out_parts = native_list_append(out_parts, nc)
|
||||
let prev = nc
|
||||
let i = next_i + 1
|
||||
} else {
|
||||
let prev = ch
|
||||
let i = next_i
|
||||
}
|
||||
} else {
|
||||
if in_squote {
|
||||
if ch == "'" { let in_squote = false }
|
||||
} else {
|
||||
if in_dquote {
|
||||
if ch == "\"" { let in_dquote = false }
|
||||
} else {
|
||||
if in_btick {
|
||||
if ch == "`" { let in_btick = false }
|
||||
}
|
||||
}
|
||||
}
|
||||
let out_parts = native_list_append(out_parts, ch)
|
||||
let prev = ch
|
||||
let i = i + 1
|
||||
}
|
||||
} else {
|
||||
// Not in a JS string. Check for comment openers.
|
||||
let next_i = i + 1
|
||||
let next_ch = ""
|
||||
if next_i < total {
|
||||
let next_ch: String = native_list_get(chars, next_i)
|
||||
}
|
||||
|
||||
if ch == "/" {
|
||||
if next_ch == "/" {
|
||||
// URL guard: prev char ':' means this is "://", not a comment.
|
||||
if prev == ":" {
|
||||
let out_parts = native_list_append(out_parts, ch)
|
||||
let prev = ch
|
||||
let i = i + 1
|
||||
} else {
|
||||
// Skip until newline (newline itself is preserved so
|
||||
// surrounding line counts/structure stay sane).
|
||||
let i = i + 2
|
||||
let scanning = true
|
||||
while scanning {
|
||||
if i >= total {
|
||||
let scanning = false
|
||||
} else {
|
||||
let lc: String = native_list_get(chars, i)
|
||||
if lc == "\n" {
|
||||
let scanning = false
|
||||
} else {
|
||||
let i = i + 1
|
||||
}
|
||||
}
|
||||
}
|
||||
let prev = ""
|
||||
}
|
||||
} else {
|
||||
if next_ch == "*" {
|
||||
// Skip until matching "*/".
|
||||
let i = i + 2
|
||||
let scanning2 = true
|
||||
while scanning2 {
|
||||
if i >= total {
|
||||
let scanning2 = false
|
||||
} else {
|
||||
let bc: String = native_list_get(chars, i)
|
||||
if bc == "*" {
|
||||
let after = i + 1
|
||||
if after < total {
|
||||
let nc2: String = native_list_get(chars, after)
|
||||
if nc2 == "/" {
|
||||
let i = after + 1
|
||||
let scanning2 = false
|
||||
} else {
|
||||
let i = i + 1
|
||||
}
|
||||
} else {
|
||||
let i = i + 1
|
||||
}
|
||||
} else {
|
||||
let i = i + 1
|
||||
}
|
||||
}
|
||||
}
|
||||
let prev = ""
|
||||
} else {
|
||||
let out_parts = native_list_append(out_parts, ch)
|
||||
let prev = ch
|
||||
let i = i + 1
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Open a JS string?
|
||||
if ch == "'" {
|
||||
let in_squote = true
|
||||
let out_parts = native_list_append(out_parts, ch)
|
||||
let prev = ch
|
||||
let i = i + 1
|
||||
} else {
|
||||
if ch == "\"" {
|
||||
let in_dquote = true
|
||||
let out_parts = native_list_append(out_parts, ch)
|
||||
let prev = ch
|
||||
let i = i + 1
|
||||
} else {
|
||||
if ch == "`" {
|
||||
let in_btick = true
|
||||
let out_parts = native_list_append(out_parts, ch)
|
||||
let prev = ch
|
||||
let i = i + 1
|
||||
} else {
|
||||
let out_parts = native_list_append(out_parts, ch)
|
||||
let prev = ch
|
||||
let i = i + 1
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
str_join(out_parts, "")
|
||||
}
|
||||
|
||||
// scan_string — scan a quoted string literal, handling \" escapes.
|
||||
// Starts AFTER the opening quote. Returns { "text": content, "pos": i_after_close }
|
||||
fn scan_string(chars: [String], start: Int, total: Int) -> Map<String, Any> {
|
||||
let i = start
|
||||
let parts: [String] = native_list_empty()
|
||||
let running = true
|
||||
while running {
|
||||
if i >= total {
|
||||
let running = false
|
||||
} else {
|
||||
let ch: String = native_list_get(chars, i)
|
||||
if ch == "\\" {
|
||||
// escape: peek next char
|
||||
let next_i = i + 1
|
||||
if next_i < total {
|
||||
let next_ch: String = native_list_get(chars, next_i)
|
||||
if next_ch == "\"" {
|
||||
let parts = native_list_append(parts, "\"")
|
||||
let i = next_i + 1
|
||||
} else {
|
||||
if next_ch == "n" {
|
||||
let parts = native_list_append(parts, "\n")
|
||||
let i = next_i + 1
|
||||
} else {
|
||||
if next_ch == "t" {
|
||||
let parts = native_list_append(parts, "\t")
|
||||
let i = next_i + 1
|
||||
} else {
|
||||
if next_ch == "r" {
|
||||
let parts = native_list_append(parts, "\r")
|
||||
let i = next_i + 1
|
||||
} else {
|
||||
if next_ch == "\\" {
|
||||
let parts = native_list_append(parts, "\\")
|
||||
let i = next_i + 1
|
||||
} else {
|
||||
let parts = native_list_append(parts, next_ch)
|
||||
let i = next_i + 1
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let i = i + 1
|
||||
}
|
||||
} else {
|
||||
if ch == "\"" {
|
||||
let i = i + 1
|
||||
let running = false
|
||||
} else {
|
||||
let parts = native_list_append(parts, ch)
|
||||
let i = i + 1
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
{ "text": str_join(parts, ""), "pos": i }
|
||||
}
|
||||
|
||||
// ── Main lexer ────────────────────────────────────────────────────────────────
|
||||
|
||||
fn lex(source: String) -> [Map<String, Any>] {
|
||||
let chars: [String] = native_string_chars(source)
|
||||
let total: Int = native_list_len(chars)
|
||||
let tokens: [Map<String, Any>] = native_list_empty()
|
||||
let i: Int = 0
|
||||
|
||||
while i < total {
|
||||
let ch: String = native_list_get(chars, i)
|
||||
|
||||
// Skip whitespace
|
||||
if lex_is_whitespace(ch) {
|
||||
let i = i + 1
|
||||
} else {
|
||||
// Line comments: //
|
||||
if ch == "/" {
|
||||
let next_i = i + 1
|
||||
if next_i < total {
|
||||
let next_ch: String = native_list_get(chars, next_i)
|
||||
if next_ch == "/" {
|
||||
// skip to end of line
|
||||
let i = i + 2
|
||||
let running2 = true
|
||||
while running2 {
|
||||
if i >= total {
|
||||
let running2 = false
|
||||
} else {
|
||||
let lch: String = native_list_get(chars, i)
|
||||
if lch == "\n" {
|
||||
let running2 = false
|
||||
} else {
|
||||
let i = i + 1
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let tokens = native_list_append(tokens, make_tok("Slash", "/"))
|
||||
let i = i + 1
|
||||
}
|
||||
} else {
|
||||
let tokens = native_list_append(tokens, make_tok("Slash", "/"))
|
||||
let i = i + 1
|
||||
}
|
||||
} else {
|
||||
// String literal
|
||||
if ch == "\"" {
|
||||
let result = scan_string(chars, i + 1, total)
|
||||
let str_text: String = result["text"]
|
||||
let new_pos: Int = result["pos"]
|
||||
// Compile-time scrub: strings that embed JS or CSS get
|
||||
// their // line comments and /* block comments stripped
|
||||
// before the token reaches the parser. Plain prose passes
|
||||
// through untouched.
|
||||
let clean_text = str_text
|
||||
if looks_like_code(str_text) {
|
||||
let clean_text = strip_code_comments(str_text)
|
||||
}
|
||||
let tokens = native_list_append(tokens, make_tok("Str", clean_text))
|
||||
let i = new_pos
|
||||
} else {
|
||||
// Number literal
|
||||
if lex_is_digit(ch) {
|
||||
let result = scan_digits(chars, i, total)
|
||||
let num_text: String = result["text"]
|
||||
let new_pos: Int = result["pos"]
|
||||
// check for float (dot followed by digit)
|
||||
if new_pos < total {
|
||||
let dot_ch: String = native_list_get(chars, new_pos)
|
||||
if dot_ch == "." {
|
||||
let after_dot = new_pos + 1
|
||||
if after_dot < total {
|
||||
let after_dot_ch: String = native_list_get(chars, after_dot)
|
||||
if lex_is_digit(after_dot_ch) {
|
||||
let frac_result = scan_digits(chars, after_dot, total)
|
||||
let frac_text: String = frac_result["text"]
|
||||
let frac_pos: Int = frac_result["pos"]
|
||||
let tokens = native_list_append(tokens, make_tok("Float", num_text + "." + frac_text))
|
||||
let i = frac_pos
|
||||
} else {
|
||||
let tokens = native_list_append(tokens, make_tok("Int", num_text))
|
||||
let i = new_pos
|
||||
}
|
||||
} else {
|
||||
let tokens = native_list_append(tokens, make_tok("Int", num_text))
|
||||
let i = new_pos
|
||||
}
|
||||
} else {
|
||||
let tokens = native_list_append(tokens, make_tok("Int", num_text))
|
||||
let i = new_pos
|
||||
}
|
||||
} else {
|
||||
let tokens = native_list_append(tokens, make_tok("Int", num_text))
|
||||
let i = new_pos
|
||||
}
|
||||
} else {
|
||||
// Identifier or keyword
|
||||
if lex_is_alpha(ch) || ch == "_" {
|
||||
let result = scan_ident(chars, i, total)
|
||||
let word: String = result["text"]
|
||||
let new_pos: Int = result["pos"]
|
||||
let kw = keyword_kind(word)
|
||||
if kw == "" {
|
||||
let tokens = native_list_append(tokens, make_tok("Ident", word))
|
||||
} else {
|
||||
let tokens = native_list_append(tokens, make_tok(kw, word))
|
||||
}
|
||||
let i = new_pos
|
||||
} else {
|
||||
// Multi-char and single-char operators/delimiters
|
||||
let peek_i = i + 1
|
||||
let peek_ch = ""
|
||||
if peek_i < total {
|
||||
let peek_ch: String = native_list_get(chars, peek_i)
|
||||
}
|
||||
|
||||
if ch == "=" {
|
||||
if peek_ch == "=" {
|
||||
let tokens = native_list_append(tokens, make_tok("EqEq", "=="))
|
||||
let i = i + 2
|
||||
} else {
|
||||
if peek_ch == ">" {
|
||||
let tokens = native_list_append(tokens, make_tok("FatArrow", "=>"))
|
||||
let i = i + 2
|
||||
} else {
|
||||
let tokens = native_list_append(tokens, make_tok("Eq", "="))
|
||||
let i = i + 1
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if ch == "!" {
|
||||
if peek_ch == "=" {
|
||||
let tokens = native_list_append(tokens, make_tok("NotEq", "!="))
|
||||
let i = i + 2
|
||||
} else {
|
||||
let tokens = native_list_append(tokens, make_tok("Not", "!"))
|
||||
let i = i + 1
|
||||
}
|
||||
} else {
|
||||
if ch == "<" {
|
||||
if peek_ch == "=" {
|
||||
let tokens = native_list_append(tokens, make_tok("LtEq", "<="))
|
||||
let i = i + 2
|
||||
} else {
|
||||
let tokens = native_list_append(tokens, make_tok("Lt", "<"))
|
||||
let i = i + 1
|
||||
}
|
||||
} else {
|
||||
if ch == ">" {
|
||||
if peek_ch == "=" {
|
||||
let tokens = native_list_append(tokens, make_tok("GtEq", ">="))
|
||||
let i = i + 2
|
||||
} else {
|
||||
let tokens = native_list_append(tokens, make_tok("Gt", ">"))
|
||||
let i = i + 1
|
||||
}
|
||||
} else {
|
||||
if ch == "&" {
|
||||
if peek_ch == "&" {
|
||||
let tokens = native_list_append(tokens, make_tok("And", "&&"))
|
||||
let i = i + 2
|
||||
} else {
|
||||
let i = i + 1
|
||||
}
|
||||
} else {
|
||||
if ch == "|" {
|
||||
if peek_ch == "|" {
|
||||
let tokens = native_list_append(tokens, make_tok("Or", "||"))
|
||||
let i = i + 2
|
||||
} else {
|
||||
if peek_ch == ">" {
|
||||
let tokens = native_list_append(tokens, make_tok("PipeOp", "|>"))
|
||||
let i = i + 2
|
||||
} else {
|
||||
let tokens = native_list_append(tokens, make_tok("Pipe", "|"))
|
||||
let i = i + 1
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if ch == "-" {
|
||||
if peek_ch == ">" {
|
||||
let tokens = native_list_append(tokens, make_tok("Arrow", "->"))
|
||||
let i = i + 2
|
||||
} else {
|
||||
let tokens = native_list_append(tokens, make_tok("Minus", "-"))
|
||||
let i = i + 1
|
||||
}
|
||||
} else {
|
||||
if ch == ":" {
|
||||
if peek_ch == ":" {
|
||||
let tokens = native_list_append(tokens, make_tok("ColonColon", "::"))
|
||||
let i = i + 2
|
||||
} else {
|
||||
let tokens = native_list_append(tokens, make_tok("Colon", ":"))
|
||||
let i = i + 1
|
||||
}
|
||||
} else {
|
||||
if ch == "+" {
|
||||
let tokens = native_list_append(tokens, make_tok("Plus", "+"))
|
||||
let i = i + 1
|
||||
} else {
|
||||
if ch == "*" {
|
||||
let tokens = native_list_append(tokens, make_tok("Star", "*"))
|
||||
let i = i + 1
|
||||
} else {
|
||||
if ch == "%" {
|
||||
let tokens = native_list_append(tokens, make_tok("Percent", "%"))
|
||||
let i = i + 1
|
||||
} else {
|
||||
if ch == "(" {
|
||||
let tokens = native_list_append(tokens, make_tok("LParen", "("))
|
||||
let i = i + 1
|
||||
} else {
|
||||
if ch == ")" {
|
||||
let tokens = native_list_append(tokens, make_tok("RParen", ")"))
|
||||
let i = i + 1
|
||||
} else {
|
||||
if ch == "{" {
|
||||
let tokens = native_list_append(tokens, make_tok("LBrace", "{"))
|
||||
let i = i + 1
|
||||
} else {
|
||||
if ch == "}" {
|
||||
let tokens = native_list_append(tokens, make_tok("RBrace", "}"))
|
||||
let i = i + 1
|
||||
} else {
|
||||
if ch == "[" {
|
||||
let tokens = native_list_append(tokens, make_tok("LBracket", "["))
|
||||
let i = i + 1
|
||||
} else {
|
||||
if ch == "]" {
|
||||
let tokens = native_list_append(tokens, make_tok("RBracket", "]"))
|
||||
let i = i + 1
|
||||
} else {
|
||||
if ch == "," {
|
||||
let tokens = native_list_append(tokens, make_tok("Comma", ","))
|
||||
let i = i + 1
|
||||
} else {
|
||||
if ch == "." {
|
||||
let tokens = native_list_append(tokens, make_tok("Dot", "."))
|
||||
let i = i + 1
|
||||
} else {
|
||||
if ch == ";" {
|
||||
let tokens = native_list_append(tokens, make_tok("Semicolon", ";"))
|
||||
let i = i + 1
|
||||
} else {
|
||||
if ch == "@" {
|
||||
let tokens = native_list_append(tokens, make_tok("At", "@"))
|
||||
let i = i + 1
|
||||
} else {
|
||||
if ch == "?" {
|
||||
let tokens = native_list_append(tokens, make_tok("QuestionMark", "?"))
|
||||
let i = i + 1
|
||||
} else {
|
||||
// unknown char — skip
|
||||
let i = i + 1
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let tokens = native_list_append(tokens, make_tok("Eof", ""))
|
||||
tokens
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
+5
-79
@@ -77,33 +77,6 @@ fn parse_manifest_entry(src: String) -> String {
|
||||
return ""
|
||||
}
|
||||
|
||||
// parse_manifest_c_sources - collect all `c_source "path"` lines from the
|
||||
// build block. Returns a flat list of path strings.
|
||||
fn parse_manifest_c_sources(src: String) -> [String] {
|
||||
let result: [String] = native_list_empty()
|
||||
let lines: [String] = str_split(src, "\n")
|
||||
let n: Int = native_list_len(lines)
|
||||
let i = 0
|
||||
while i < n {
|
||||
let line: String = native_list_get(lines, i)
|
||||
let t: String = str_trim(line)
|
||||
if str_starts_with(t, "c_source ") {
|
||||
let after: String = str_slice(t, 9, str_len(t))
|
||||
let trimmed: String = str_trim(after)
|
||||
if str_starts_with(trimmed, "\"") {
|
||||
let inner: String = str_slice(trimmed, 1, str_len(trimmed))
|
||||
let q: Int = str_index_of(inner, "\"")
|
||||
if q >= 0 {
|
||||
let path: String = str_slice(inner, 0, q)
|
||||
let result = native_list_append(result, path)
|
||||
}
|
||||
}
|
||||
}
|
||||
let i = i + 1
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
fn parse_manifest_name(src: String) -> String {
|
||||
let lines: [String] = str_split(src, "\n")
|
||||
let n: Int = native_list_len(lines)
|
||||
@@ -252,7 +225,6 @@ fn compile_module(src_path: String, out_dir: String, elc_bin: String, dry_run: B
|
||||
let bname: String = basename_noext(src_path)
|
||||
let c_out: String = out_dir + "/" + bname + ".c"
|
||||
let elh_out: String = out_dir + "/" + bname + ".elh"
|
||||
let err_tmp: String = "/tmp/elb-err-" + bname + ".txt"
|
||||
|
||||
// Check if recompile needed
|
||||
if !file_is_newer(src_path, c_out) {
|
||||
@@ -262,26 +234,18 @@ fn compile_module(src_path: String, out_dir: String, elc_bin: String, dry_run: B
|
||||
return true
|
||||
}
|
||||
|
||||
// elc streams C to stdout; redirect stderr to a temp file so we can
|
||||
// surface the actual error message on failure instead of swallowing it.
|
||||
let cmd: String = elc_bin + " --emit-header " + src_path + " > " + c_out + " 2>" + err_tmp
|
||||
// elc streams C to stdout (collect mode not yet implemented); use
|
||||
// shell redirection so the output lands in the file, not the terminal.
|
||||
let cmd: String = elc_bin + " --emit-header " + src_path + " > " + c_out + " 2>&1"
|
||||
println(" compile " + src_path)
|
||||
|
||||
if dry_run { return true }
|
||||
|
||||
let ret: Int = exec_command(cmd)
|
||||
if ret != 0 {
|
||||
// Surface the actual compiler error from stderr
|
||||
let err_msg: String = str_trim(fs_read(err_tmp))
|
||||
if !str_eq(err_msg, "") {
|
||||
println(err_msg)
|
||||
}
|
||||
// Remove partial output so a retry starts clean
|
||||
exec_command("rm -f " + c_out + " " + err_tmp)
|
||||
println("elb: compile failed: " + src_path)
|
||||
return false
|
||||
}
|
||||
exec_command("rm -f " + err_tmp)
|
||||
|
||||
// Move the generated .elh (written next to the source by elc) into
|
||||
// out_dir so that #include "module.elh" lines in the generated .c
|
||||
@@ -298,21 +262,7 @@ fn link_binary(c_files: [String], out_bin: String, runtime_path: String, out_dir
|
||||
let parts: [String] = native_list_empty()
|
||||
// Include both the runtime dir (for el_runtime.h) and the output dir
|
||||
// (for module.elh cross-module forward declarations).
|
||||
// Detect clang vs gcc: -fbracket-depth is clang-only; silently ignored
|
||||
// if unsupported but gcc rejects it with an error.
|
||||
let bracket_flag: String = "$(cc --version 2>&1 | grep -q clang && printf -- '-fbracket-depth=1024' || true)"
|
||||
// On macOS, OpenSSL is not on the default linker path. Detect homebrew
|
||||
// prefix and add it if present (no-op on Linux where libssl is in /usr/lib).
|
||||
let ossl_lib_flag: String = "$(brew --prefix openssl 2>/dev/null | xargs -I{} printf -- '-L{}/lib' 2>/dev/null || true)"
|
||||
let ossl_inc_flag: String = "$(brew --prefix openssl 2>/dev/null | xargs -I{} printf -- '-I{}/include' 2>/dev/null || true)"
|
||||
// Force-include the C-level master declarations header so every translation
|
||||
// unit sees all cross-module function signatures. Handles packages (like ELP)
|
||||
// where modules call each other without explicit El import statements.
|
||||
// The header is generated by elb --gen-decls or manually placed in out_dir.
|
||||
let master_decls: String = out_dir + "/elp-c-decls.h"
|
||||
let has_master: String = str_trim(exec_capture("test -f " + master_decls + " && echo yes || echo no"))
|
||||
let include_flag: String = if str_eq(has_master, "yes") { "-include " + master_decls } else { "" }
|
||||
let parts = native_list_append(parts, "cc -O2 " + bracket_flag + " " + ossl_inc_flag + " " + include_flag + " -I " + dirname_of(runtime_path) + " -I " + out_dir)
|
||||
let parts = native_list_append(parts, "cc -O2 -I " + dirname_of(runtime_path) + " -I " + out_dir)
|
||||
let i = 0
|
||||
while i < n {
|
||||
let f: String = native_list_get(c_files, i)
|
||||
@@ -320,7 +270,7 @@ fn link_binary(c_files: [String], out_bin: String, runtime_path: String, out_dir
|
||||
let i = i + 1
|
||||
}
|
||||
let parts = native_list_append(parts, runtime_path)
|
||||
let parts = native_list_append(parts, ossl_lib_flag + " -lcurl -lssl -lcrypto -lpthread -lm")
|
||||
let parts = native_list_append(parts, "-lcurl -lpthread")
|
||||
let parts = native_list_append(parts, "-o " + out_bin)
|
||||
let cmd: String = str_join(parts, " ")
|
||||
println(" link " + out_bin)
|
||||
@@ -353,7 +303,6 @@ fn main() -> Void {
|
||||
|
||||
let pkg_name: String = parse_manifest_name(manifest_src)
|
||||
let entry: String = parse_manifest_entry(manifest_src)
|
||||
let extra_c: [String] = parse_manifest_c_sources(manifest_src)
|
||||
if str_eq(entry, "") {
|
||||
println("elb: manifest.el has no 'entry' declaration")
|
||||
exit(1)
|
||||
@@ -371,20 +320,6 @@ fn main() -> Void {
|
||||
runtime_path = elc_dir + "/../el-compiler/runtime/el_runtime.c"
|
||||
}
|
||||
}
|
||||
// If --runtime points to a directory, auto-locate el_runtime.c inside it.
|
||||
// This lets both forms work:
|
||||
// --runtime=/opt/el/el-compiler/runtime (directory form)
|
||||
// --runtime=/opt/el/el-compiler/runtime/el_runtime.c (file form)
|
||||
if !str_eq(runtime_path, "") {
|
||||
let is_dir: String = str_trim(exec_capture("test -d " + runtime_path + " && echo dir || echo file"))
|
||||
if str_eq(is_dir, "dir") {
|
||||
let candidate: String = runtime_path + "/el_runtime.c"
|
||||
let has_file: String = str_trim(exec_capture("test -f " + candidate + " && echo yes || echo no"))
|
||||
if str_eq(has_file, "yes") {
|
||||
let runtime_path = candidate
|
||||
}
|
||||
}
|
||||
}
|
||||
if str_eq(runtime_path, "") {
|
||||
println("elb: cannot locate el_runtime.c - use --runtime=PATH")
|
||||
exit(1)
|
||||
@@ -432,15 +367,6 @@ fn main() -> Void {
|
||||
exit(1)
|
||||
}
|
||||
|
||||
// Append any extra C sources declared in the manifest (e.g. platform stubs)
|
||||
let ei = 0
|
||||
let en: Int = native_list_len(extra_c)
|
||||
while ei < en {
|
||||
let ec: String = native_list_get(extra_c, ei)
|
||||
let c_files = native_list_append(c_files, ec)
|
||||
let ei = ei + 1
|
||||
}
|
||||
|
||||
// Link
|
||||
let out_bin: String = out_dir + "/" + pkg_name
|
||||
let linked: Bool = link_binary(c_files, out_bin, runtime_path, out_dir, dry_run)
|
||||
@@ -1,21 +0,0 @@
|
||||
# Compiled El bytecode
|
||||
*.elc
|
||||
|
||||
# C codegen output
|
||||
*.c
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
*.dylib
|
||||
|
||||
# Combined build artifacts
|
||||
_combined.el
|
||||
*-combined.el
|
||||
|
||||
# Distribution / build output
|
||||
dist/
|
||||
build/
|
||||
out/
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
@@ -1,85 +0,0 @@
|
||||
package "elp" {
|
||||
version "0.7.0"
|
||||
description "Engram Language Protocol — bidirectional engine mapping between Engram semantic forms and natural language surface text. 31 languages."
|
||||
edition "2026"
|
||||
}
|
||||
|
||||
build {
|
||||
entry "src/elp.el"
|
||||
|
||||
// Compilation order (dependency order):
|
||||
// language-profile (no deps)
|
||||
// vocabulary (no deps)
|
||||
// morphology (depends on: language-profile)
|
||||
// morphology-es (depends on: morphology) Spanish
|
||||
// morphology-fr (depends on: morphology) French
|
||||
// morphology-de (depends on: morphology) German
|
||||
// morphology-ru (depends on: morphology) Russian
|
||||
// morphology-ja (depends on: morphology) Japanese
|
||||
// morphology-fi (depends on: morphology) Finnish
|
||||
// morphology-ar (depends on: morphology) Arabic
|
||||
// morphology-hi (depends on: morphology) Hindi
|
||||
// morphology-sw (depends on: morphology) Swahili
|
||||
// morphology-la (depends on: morphology) Latin
|
||||
// morphology-he (depends on: morphology) Hebrew
|
||||
// morphology-grc (depends on: morphology) Ancient Greek
|
||||
// morphology-ang (depends on: morphology) Old English
|
||||
// morphology-sa (depends on: morphology) Sanskrit
|
||||
// morphology-got (depends on: morphology) Gothic
|
||||
// morphology-non (depends on: morphology) Old Norse
|
||||
// morphology-enm (depends on: morphology) Middle English
|
||||
// morphology-pi (depends on: morphology) Pali
|
||||
// morphology-fro (depends on: morphology) Old French
|
||||
// morphology-goh (depends on: morphology) Old High German
|
||||
// morphology-sga (depends on: morphology) Old Irish
|
||||
// morphology-txb (depends on: morphology) Tocharian B
|
||||
// morphology-peo (depends on: morphology) Old Persian
|
||||
// morphology-akk (depends on: morphology) Akkadian
|
||||
// morphology-uga (depends on: morphology) Ugaritic
|
||||
// morphology-egy (depends on: morphology) Ancient Egyptian
|
||||
// morphology-sux (depends on: morphology) Sumerian
|
||||
// morphology-gez (depends on: morphology) Ge'ez (Classical Ethiopic)
|
||||
// morphology-cop (depends on: morphology) Coptic (Sahidic)
|
||||
// grammar (depends on: language-profile)
|
||||
// realizer (depends on: morphology, grammar, language-profile)
|
||||
// semantics (depends on: grammar, realizer, language-profile)
|
||||
// elp (depends on: semantics, realizer)
|
||||
sources [
|
||||
"src/language-profile.el",
|
||||
"src/vocabulary.el",
|
||||
"src/morphology.el",
|
||||
"src/morphology-es.el",
|
||||
"src/morphology-fr.el",
|
||||
"src/morphology-de.el",
|
||||
"src/morphology-ru.el",
|
||||
"src/morphology-ja.el",
|
||||
"src/morphology-fi.el",
|
||||
"src/morphology-ar.el",
|
||||
"src/morphology-hi.el",
|
||||
"src/morphology-sw.el",
|
||||
"src/morphology-la.el",
|
||||
"src/morphology-he.el",
|
||||
"src/morphology-grc.el",
|
||||
"src/morphology-ang.el",
|
||||
"src/morphology-sa.el",
|
||||
"src/morphology-got.el",
|
||||
"src/morphology-non.el",
|
||||
"src/morphology-enm.el",
|
||||
"src/morphology-pi.el",
|
||||
"src/morphology-fro.el",
|
||||
"src/morphology-goh.el",
|
||||
"src/morphology-sga.el",
|
||||
"src/morphology-txb.el",
|
||||
"src/morphology-peo.el",
|
||||
"src/morphology-akk.el",
|
||||
"src/morphology-uga.el",
|
||||
"src/morphology-egy.el",
|
||||
"src/morphology-sux.el",
|
||||
"src/morphology-gez.el",
|
||||
"src/morphology-cop.el",
|
||||
"src/grammar.el",
|
||||
"src/realizer.el",
|
||||
"src/semantics.el",
|
||||
"src/elp.el",
|
||||
]
|
||||
}
|
||||
@@ -1 +0,0 @@
|
||||
import "language-profile.el"
|
||||
@@ -1,2 +0,0 @@
|
||||
import "language-profile.el"
|
||||
import "dedup_test_a_nodedup.el"
|
||||
@@ -1,2 +0,0 @@
|
||||
import "language-profile.el"
|
||||
extern fn fn_a(x: String) -> String
|
||||
@@ -1 +0,0 @@
|
||||
extern fn fn_a(x: String) -> String
|
||||
@@ -1,2 +0,0 @@
|
||||
import "language-profile.el"
|
||||
extern fn fn_a(x: String) -> String
|
||||
@@ -1,6 +0,0 @@
|
||||
import "language-profile.el"
|
||||
import "dedup_test_a.el"
|
||||
|
||||
fn main_fn(x: String) -> String {
|
||||
return x
|
||||
}
|
||||
@@ -1,2 +0,0 @@
|
||||
import "language-profile.el"
|
||||
import "dedup_test_a.el"
|
||||
@@ -1,2 +0,0 @@
|
||||
import "language-profile.el"
|
||||
import "dedup_test_a_notail.el"
|
||||
-159
@@ -1,159 +0,0 @@
|
||||
// elp.el - Engram Language Protocol — public API.
|
||||
//
|
||||
// Output half of the ELP: Engram semantic form → natural language surface text.
|
||||
// 31 languages. Ties together language-profile, vocabulary, morphology,
|
||||
// grammar, realizer, and semantics into a single entry point.
|
||||
//
|
||||
// Import chain (mirrors manifest.el dependency order):
|
||||
// language-profile (no deps)
|
||||
// vocabulary (no deps)
|
||||
// morphology (depends on: language-profile)
|
||||
// morphology-XX (depends on: morphology) — all language engines
|
||||
// grammar (depends on: language-profile)
|
||||
// realizer (depends on: morphology, grammar, language-profile)
|
||||
// semantics (depends on: grammar, realizer, language-profile)
|
||||
//
|
||||
// When elc processes a source that imports this file, it resolves all
|
||||
// transitive imports via depth-first deduplication — each module is
|
||||
// inlined exactly once regardless of how many importers reference it.
|
||||
|
||||
// ── Base layers ───────────────────────────────────────────────────────────────
|
||||
import "language-profile.el"
|
||||
import "vocabulary.el"
|
||||
|
||||
// ── Morphology: base engine ───────────────────────────────────────────────────
|
||||
import "morphology.el"
|
||||
|
||||
// ── Morphology: living languages ──────────────────────────────────────────────
|
||||
import "morphology-es.el"
|
||||
import "morphology-fr.el"
|
||||
import "morphology-de.el"
|
||||
import "morphology-ru.el"
|
||||
import "morphology-ja.el"
|
||||
import "morphology-fi.el"
|
||||
import "morphology-ar.el"
|
||||
import "morphology-hi.el"
|
||||
import "morphology-sw.el"
|
||||
|
||||
// ── Morphology: ancient / classical ──────────────────────────────────────────
|
||||
import "morphology-la.el"
|
||||
import "morphology-he.el"
|
||||
|
||||
// ── Morphology: dead languages ────────────────────────────────────────────────
|
||||
import "morphology-grc.el"
|
||||
import "morphology-ang.el"
|
||||
import "morphology-sa.el"
|
||||
import "morphology-got.el"
|
||||
import "morphology-non.el"
|
||||
import "morphology-enm.el"
|
||||
import "morphology-pi.el"
|
||||
import "morphology-fro.el"
|
||||
import "morphology-goh.el"
|
||||
import "morphology-sga.el"
|
||||
import "morphology-txb.el"
|
||||
import "morphology-peo.el"
|
||||
import "morphology-akk.el"
|
||||
import "morphology-uga.el"
|
||||
import "morphology-egy.el"
|
||||
import "morphology-sux.el"
|
||||
import "morphology-gez.el"
|
||||
import "morphology-cop.el"
|
||||
|
||||
// ── Higher layers ─────────────────────────────────────────────────────────────
|
||||
import "grammar.el"
|
||||
import "realizer.el"
|
||||
import "semantics.el"
|
||||
//
|
||||
// Entry points:
|
||||
//
|
||||
// generate(semantic_form_json) -> String
|
||||
// Low-level JSON-based API, defaults to English. SemanticForm JSON fields:
|
||||
// intent - "assert" | "question" | "command"
|
||||
// agent - subject (pronoun or noun phrase, optional for commands)
|
||||
// predicate - verb base form
|
||||
// patient - object noun phrase (optional)
|
||||
// location - prepositional phrase e.g. "in the park" (optional)
|
||||
// tense - "present" | "past" | "future" (default: "present")
|
||||
// aspect - "simple" | "progressive" | "perfect" (default: "simple")
|
||||
// lang - ISO 639-1 code (default: "en")
|
||||
//
|
||||
// generate_lang(semantic_form_json, lang_code) -> String
|
||||
// JSON-based API with explicit language code (overrides any "lang" in JSON).
|
||||
//
|
||||
// generate_frame(frame: SemFrame) -> String
|
||||
// High-level SemFrame API. Language from frame's "lang" field (default "en").
|
||||
// Intents: "assert" | "query" | "describe" | "greet".
|
||||
//
|
||||
// generate_frame_lang(frame: SemFrame, lang_code: String) -> String
|
||||
// High-level SemFrame API with explicit language code override.
|
||||
|
||||
// ── JSON helpers ──────────────────────────────────────────────────────────────
|
||||
|
||||
fn sem_get(json: String, key: String) -> String {
|
||||
let val: String = json_get(json, key)
|
||||
return val
|
||||
}
|
||||
|
||||
// ── Public API: SemFrame ──────────────────────────────────────────────────────
|
||||
|
||||
// Generate text from a SemFrame in the language embedded in the frame (default "en").
|
||||
fn generate_frame(frame: [String]) -> String {
|
||||
return sem_realize(frame)
|
||||
}
|
||||
|
||||
// Generate text from a SemFrame in the specified language.
|
||||
fn generate_frame_lang(frame: [String], lang_code: String) -> String {
|
||||
return sem_realize_lang(frame, lang_code)
|
||||
}
|
||||
|
||||
// ── Public API: JSON ──────────────────────────────────────────────────────────
|
||||
|
||||
// Build a realizer slot map from JSON fields and an explicit lang code.
|
||||
fn build_form_from_json(semantic_form_json: String, lang_code: String) -> [String] {
|
||||
let intent: String = sem_get(semantic_form_json, "intent")
|
||||
let agent: String = sem_get(semantic_form_json, "agent")
|
||||
let predicate: String = sem_get(semantic_form_json, "predicate")
|
||||
let patient: String = sem_get(semantic_form_json, "patient")
|
||||
let location: String = sem_get(semantic_form_json, "location")
|
||||
let tense: String = sem_get(semantic_form_json, "tense")
|
||||
let aspect: String = sem_get(semantic_form_json, "aspect")
|
||||
|
||||
let form: [String] = native_list_empty()
|
||||
let form = native_list_append(form, "intent")
|
||||
let form = native_list_append(form, intent)
|
||||
let form = native_list_append(form, "agent")
|
||||
let form = native_list_append(form, agent)
|
||||
let form = native_list_append(form, "predicate")
|
||||
let form = native_list_append(form, predicate)
|
||||
let form = native_list_append(form, "patient")
|
||||
let form = native_list_append(form, patient)
|
||||
let form = native_list_append(form, "location")
|
||||
let form = native_list_append(form, location)
|
||||
let form = native_list_append(form, "tense")
|
||||
let form = native_list_append(form, tense)
|
||||
let form = native_list_append(form, "aspect")
|
||||
let form = native_list_append(form, aspect)
|
||||
let form = native_list_append(form, "lang")
|
||||
let form = native_list_append(form, lang_code)
|
||||
|
||||
return form
|
||||
}
|
||||
|
||||
// Generate text from a JSON semantic form. Language defaults to "en" unless
|
||||
// the JSON contains a "lang" field.
|
||||
fn generate(semantic_form_json: String) -> String {
|
||||
let lang_in_json: String = sem_get(semantic_form_json, "lang")
|
||||
let lang_code: String = lang_in_json
|
||||
if str_eq(lang_code, "") {
|
||||
let lang_code = "en"
|
||||
}
|
||||
let form: [String] = build_form_from_json(semantic_form_json, lang_code)
|
||||
return realize(form)
|
||||
}
|
||||
|
||||
// Generate text from a JSON semantic form in the specified language.
|
||||
// lang_code overrides any "lang" field present in the JSON.
|
||||
fn generate_lang(semantic_form_json: String, lang_code: String) -> String {
|
||||
let form: [String] = build_form_from_json(semantic_form_json, lang_code)
|
||||
return realize(form)
|
||||
}
|
||||
@@ -1,7 +0,0 @@
|
||||
// auto-generated by elc --emit-header — do not edit
|
||||
extern fn sem_get(json: String, key: String) -> String
|
||||
extern fn generate_frame(frame: Any) -> String
|
||||
extern fn generate_frame_lang(frame: Any, lang_code: String) -> String
|
||||
extern fn build_form_from_json(semantic_form_json: String, lang_code: String) -> Any
|
||||
extern fn generate(semantic_form_json: String) -> String
|
||||
extern fn generate_lang(semantic_form_json: String, lang_code: String) -> String
|
||||
@@ -1,2 +0,0 @@
|
||||
import "language-profile.el"
|
||||
extern fn fn_a(x: String) -> String
|
||||
@@ -1,2 +0,0 @@
|
||||
import "language-profile.el"
|
||||
extern fn fn_b(x: String) -> String
|
||||
@@ -1,555 +0,0 @@
|
||||
// grammar.el - Grammar engine: syntactic structure, word order, phrase assembly.
|
||||
//
|
||||
// Language-specific word order and question strategy are driven by the language
|
||||
// profile, not hardcoded. The slot map format (GramSpec) is universal; a "lang"
|
||||
// key carries the ISO 639-1 code so every downstream function can resolve the
|
||||
// active profile.
|
||||
//
|
||||
// GramSpec slot keys:
|
||||
// intent - "assert" | "question" | "command"
|
||||
// agent - subject referent string
|
||||
// predicate - verb base form
|
||||
// patient - object noun phrase (optional)
|
||||
// location - prepositional phrase (optional)
|
||||
// tense - "present" | "past" | "future"
|
||||
// aspect - "simple" | "progressive" | "perfect"
|
||||
// lang - ISO 639-1 code (default "en")
|
||||
// verb_surf - conjugated verb surface form (computed)
|
||||
// aux_surf - auxiliary surface form (computed)
|
||||
//
|
||||
// Depends on: language-profile
|
||||
|
||||
// ── Slot map helpers ──────────────────────────────────────────────────────────
|
||||
|
||||
fn slots_get(slots: [String], key: String) -> String {
|
||||
let n: Int = native_list_len(slots)
|
||||
let i: Int = 0
|
||||
while i < n - 1 {
|
||||
let k: String = native_list_get(slots, i)
|
||||
if str_eq(k, key) {
|
||||
return native_list_get(slots, i + 1)
|
||||
}
|
||||
let i = i + 2
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
fn slots_set(slots: [String], key: String, val: String) -> [String] {
|
||||
let n: Int = native_list_len(slots)
|
||||
let result: [String] = native_list_empty()
|
||||
let found: Bool = false
|
||||
let i: Int = 0
|
||||
while i < n - 1 {
|
||||
let k: String = native_list_get(slots, i)
|
||||
let v: String = native_list_get(slots, i + 1)
|
||||
if str_eq(k, key) {
|
||||
let result = native_list_append(result, k)
|
||||
let result = native_list_append(result, val)
|
||||
let found = true
|
||||
} else {
|
||||
let result = native_list_append(result, k)
|
||||
let result = native_list_append(result, v)
|
||||
}
|
||||
let i = i + 2
|
||||
}
|
||||
if !found {
|
||||
let result = native_list_append(result, key)
|
||||
let result = native_list_append(result, val)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
fn make_slots(k0: String, v0: String) -> [String] {
|
||||
let r: [String] = native_list_empty()
|
||||
let r = native_list_append(r, k0)
|
||||
let r = native_list_append(r, v0)
|
||||
return r
|
||||
}
|
||||
|
||||
fn make_slots2(k0: String, v0: String, k1: String, v1: String) -> [String] {
|
||||
let r: [String] = make_slots(k0, v0)
|
||||
let r = native_list_append(r, k1)
|
||||
let r = native_list_append(r, v1)
|
||||
return r
|
||||
}
|
||||
|
||||
fn make_slots3(k0: String, v0: String, k1: String, v1: String, k2: String, v2: String) -> [String] {
|
||||
let r: [String] = make_slots2(k0, v0, k1, v1)
|
||||
let r = native_list_append(r, k2)
|
||||
let r = native_list_append(r, v2)
|
||||
return r
|
||||
}
|
||||
|
||||
fn make_slots4(k0: String, v0: String, k1: String, v1: String, k2: String, v2: String, k3: String, v3: String) -> [String] {
|
||||
let r: [String] = make_slots3(k0, v0, k1, v1, k2, v2)
|
||||
let r = native_list_append(r, k3)
|
||||
let r = native_list_append(r, v3)
|
||||
return r
|
||||
}
|
||||
|
||||
fn make_slots5(k0: String, v0: String, k1: String, v1: String, k2: String, v2: String, k3: String, v3: String, k4: String, v4: String) -> [String] {
|
||||
let r: [String] = make_slots4(k0, v0, k1, v1, k2, v2, k3, v3)
|
||||
let r = native_list_append(r, k4)
|
||||
let r = native_list_append(r, v4)
|
||||
return r
|
||||
}
|
||||
|
||||
// ── Grammar rule catalog ──────────────────────────────────────────────────────
|
||||
|
||||
fn rule_id(rule: [String]) -> String {
|
||||
return native_list_get(rule, 0)
|
||||
}
|
||||
|
||||
fn rule_lhs(rule: [String]) -> String {
|
||||
return native_list_get(rule, 1)
|
||||
}
|
||||
|
||||
fn rule_rhs_len(rule: [String]) -> Int {
|
||||
let n: Int = native_list_len(rule)
|
||||
return n - 2
|
||||
}
|
||||
|
||||
fn rule_rhs(rule: [String], idx: Int) -> String {
|
||||
return native_list_get(rule, idx + 2)
|
||||
}
|
||||
|
||||
fn make_rule(id: String, lhs: String, r0: String) -> [String] {
|
||||
let r: [String] = native_list_empty()
|
||||
let r = native_list_append(r, id)
|
||||
let r = native_list_append(r, lhs)
|
||||
let r = native_list_append(r, r0)
|
||||
return r
|
||||
}
|
||||
|
||||
fn make_rule2(id: String, lhs: String, r0: String, r1: String) -> [String] {
|
||||
let r: [String] = make_rule(id, lhs, r0)
|
||||
let r = native_list_append(r, r1)
|
||||
return r
|
||||
}
|
||||
|
||||
fn make_rule3(id: String, lhs: String, r0: String, r1: String, r2: String) -> [String] {
|
||||
let r: [String] = make_rule2(id, lhs, r0, r1)
|
||||
let r = native_list_append(r, r2)
|
||||
return r
|
||||
}
|
||||
|
||||
fn make_rule4(id: String, lhs: String, r0: String, r1: String, r2: String, r3: String) -> [String] {
|
||||
let r: [String] = make_rule3(id, lhs, r0, r1, r2)
|
||||
let r = native_list_append(r, r3)
|
||||
return r
|
||||
}
|
||||
|
||||
fn build_rules() -> [[String]] {
|
||||
let rules: [[String]] = native_list_empty()
|
||||
|
||||
let rules = native_list_append(rules, make_rule2("S-DECL", "S", "NP", "VP"))
|
||||
let rules = native_list_append(rules, make_rule3("S-QUEST", "S", "Aux", "NP", "VP"))
|
||||
let rules = native_list_append(rules, make_rule("S-IMP", "S", "VP"))
|
||||
let rules = native_list_append(rules, make_rule2("NP-DET-N", "NP", "Det", "N"))
|
||||
let rules = native_list_append(rules, make_rule3("NP-DET-ADJ-N","NP", "Det", "Adj", "N"))
|
||||
let rules = native_list_append(rules, make_rule("NP-PRON", "NP", "Pron"))
|
||||
let rules = native_list_append(rules, make_rule("NP-N", "NP", "N"))
|
||||
let rules = native_list_append(rules, make_rule("VP-V", "VP", "V"))
|
||||
let rules = native_list_append(rules, make_rule2("VP-V-NP", "VP", "V", "NP"))
|
||||
let rules = native_list_append(rules, make_rule2("VP-V-PP", "VP", "V", "PP"))
|
||||
let rules = native_list_append(rules, make_rule3("VP-V-NP-PP", "VP", "V", "NP", "PP"))
|
||||
let rules = native_list_append(rules, make_rule2("VP-AUX-V", "VP", "Aux", "V"))
|
||||
let rules = native_list_append(rules, make_rule3("VP-AUX-V-NP", "VP", "Aux", "V", "NP"))
|
||||
let rules = native_list_append(rules, make_rule2("PP-P-NP", "PP", "P", "NP"))
|
||||
|
||||
return rules
|
||||
}
|
||||
|
||||
fn get_rules() -> [[String]] {
|
||||
return build_rules()
|
||||
}
|
||||
|
||||
fn find_rule(rule_id_str: String) -> [String] {
|
||||
let rules: [[String]] = get_rules()
|
||||
let n: Int = native_list_len(rules)
|
||||
let i: Int = 0
|
||||
while i < n {
|
||||
let rule: [String] = native_list_get(rules, i)
|
||||
let id: String = native_list_get(rule, 0)
|
||||
if str_eq(id, rule_id_str) {
|
||||
return rule
|
||||
}
|
||||
let i = i + 1
|
||||
}
|
||||
let empty: [String] = native_list_empty()
|
||||
return empty
|
||||
}
|
||||
|
||||
// ── Tree node construction ────────────────────────────────────────────────────
|
||||
|
||||
fn make_leaf(label: String, word: String) -> String {
|
||||
return "(" + label + " " + word + ")"
|
||||
}
|
||||
|
||||
fn make_node1(label: String, child0: String) -> String {
|
||||
return "(" + label + " _ " + child0 + ")"
|
||||
}
|
||||
|
||||
fn make_node2(label: String, child0: String, child1: String) -> String {
|
||||
return "(" + label + " _ " + child0 + " " + child1 + ")"
|
||||
}
|
||||
|
||||
fn make_node3(label: String, child0: String, child1: String, child2: String) -> String {
|
||||
return "(" + label + " _ " + child0 + " " + child1 + " " + child2 + ")"
|
||||
}
|
||||
|
||||
fn make_node4(label: String, child0: String, child1: String, child2: String, child3: String) -> String {
|
||||
return "(" + label + " _ " + child0 + " " + child1 + " " + child2 + " " + child3 + ")"
|
||||
}
|
||||
|
||||
// ── Tree rendering ────────────────────────────────────────────────────────────
|
||||
|
||||
fn nlg_is_ws(c: String) -> Bool {
|
||||
if str_eq(c, " ") { return true }
|
||||
if str_eq(c, "\t") { return true }
|
||||
if str_eq(c, "\n") { return true }
|
||||
return false
|
||||
}
|
||||
|
||||
fn skip_ws(s: String, pos: Int) -> Int {
|
||||
let n: Int = str_len(s)
|
||||
let i: Int = pos
|
||||
let running: Bool = true
|
||||
while running {
|
||||
if i >= n {
|
||||
let running = false
|
||||
} else {
|
||||
let c: String = str_slice(s, i, i + 1)
|
||||
if nlg_is_ws(c) {
|
||||
let i = i + 1
|
||||
} else {
|
||||
let running = false
|
||||
}
|
||||
}
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
fn scan_token(s: String, start: Int) -> [String] {
|
||||
let n: Int = str_len(s)
|
||||
let i: Int = start
|
||||
let running: Bool = true
|
||||
while running {
|
||||
if i >= n {
|
||||
let running = false
|
||||
} else {
|
||||
let c: String = str_slice(s, i, i + 1)
|
||||
if nlg_is_ws(c) {
|
||||
let running = false
|
||||
} else {
|
||||
if str_eq(c, "(") {
|
||||
let running = false
|
||||
} else {
|
||||
if str_eq(c, ")") {
|
||||
let running = false
|
||||
} else {
|
||||
let i = i + 1
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
let tok: String = str_slice(s, start, i)
|
||||
let result: [String] = native_list_empty()
|
||||
let result = native_list_append(result, tok)
|
||||
let result = native_list_append(result, int_to_str(i))
|
||||
return result
|
||||
}
|
||||
|
||||
fn render_tree(tree: String) -> String {
|
||||
let words: [String] = native_list_empty()
|
||||
let n: Int = str_len(tree)
|
||||
let i: Int = 0
|
||||
let prev_was_open: Bool = false
|
||||
while i < n {
|
||||
let c: String = str_slice(tree, i, i + 1)
|
||||
if str_eq(c, "(") {
|
||||
let prev_was_open = true
|
||||
let i = i + 1
|
||||
} else {
|
||||
if str_eq(c, ")") {
|
||||
let prev_was_open = false
|
||||
let i = i + 1
|
||||
} else {
|
||||
if nlg_is_ws(c) {
|
||||
let i = i + 1
|
||||
} else {
|
||||
let tok_info: [String] = scan_token(tree, i)
|
||||
let tok: String = native_list_get(tok_info, 0)
|
||||
let new_i: Int = str_to_int(native_list_get(tok_info, 1))
|
||||
let i = new_i
|
||||
if prev_was_open {
|
||||
let prev_was_open = false
|
||||
} else {
|
||||
if !str_eq(tok, "_") {
|
||||
let words = native_list_append(words, tok)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return str_join(words, " ")
|
||||
}
|
||||
|
||||
// ── Word-order engine ─────────────────────────────────────────────────────────
|
||||
|
||||
// gram_word_order: returns the word order string from a profile.
|
||||
fn gram_word_order(profile: [String]) -> String {
|
||||
return lang_word_order(profile)
|
||||
}
|
||||
|
||||
// gram_order_constituents: order Subject, Verb, Object tokens according to the
|
||||
// language profile's word_order.
|
||||
//
|
||||
// subj, verb, obj: surface strings (may be empty).
|
||||
// Returns a space-joined string in the correct order.
|
||||
//
|
||||
// Supported orders: SVO, SOV, VSO, VOS, OVS, OSV, free (defaults to SVO).
|
||||
|
||||
fn gram_order_constituents(subj: String, verb: String, obj: String, profile: [String]) -> String {
|
||||
let order: String = gram_word_order(profile)
|
||||
let parts: [String] = native_list_empty()
|
||||
|
||||
if str_eq(order, "SVO") {
|
||||
if !str_eq(subj, "") { let parts = native_list_append(parts, subj) }
|
||||
if !str_eq(verb, "") { let parts = native_list_append(parts, verb) }
|
||||
if !str_eq(obj, "") { let parts = native_list_append(parts, obj) }
|
||||
return str_join(parts, " ")
|
||||
}
|
||||
|
||||
if str_eq(order, "SOV") {
|
||||
if !str_eq(subj, "") { let parts = native_list_append(parts, subj) }
|
||||
if !str_eq(obj, "") { let parts = native_list_append(parts, obj) }
|
||||
if !str_eq(verb, "") { let parts = native_list_append(parts, verb) }
|
||||
return str_join(parts, " ")
|
||||
}
|
||||
|
||||
if str_eq(order, "VSO") {
|
||||
if !str_eq(verb, "") { let parts = native_list_append(parts, verb) }
|
||||
if !str_eq(subj, "") { let parts = native_list_append(parts, subj) }
|
||||
if !str_eq(obj, "") { let parts = native_list_append(parts, obj) }
|
||||
return str_join(parts, " ")
|
||||
}
|
||||
|
||||
if str_eq(order, "VOS") {
|
||||
if !str_eq(verb, "") { let parts = native_list_append(parts, verb) }
|
||||
if !str_eq(obj, "") { let parts = native_list_append(parts, obj) }
|
||||
if !str_eq(subj, "") { let parts = native_list_append(parts, subj) }
|
||||
return str_join(parts, " ")
|
||||
}
|
||||
|
||||
if str_eq(order, "OVS") {
|
||||
if !str_eq(obj, "") { let parts = native_list_append(parts, obj) }
|
||||
if !str_eq(verb, "") { let parts = native_list_append(parts, verb) }
|
||||
if !str_eq(subj, "") { let parts = native_list_append(parts, subj) }
|
||||
return str_join(parts, " ")
|
||||
}
|
||||
|
||||
if str_eq(order, "OSV") {
|
||||
if !str_eq(obj, "") { let parts = native_list_append(parts, obj) }
|
||||
if !str_eq(subj, "") { let parts = native_list_append(parts, subj) }
|
||||
if !str_eq(verb, "") { let parts = native_list_append(parts, verb) }
|
||||
return str_join(parts, " ")
|
||||
}
|
||||
|
||||
// "free" and unknown: use SVO as the neutral citation order.
|
||||
if !str_eq(subj, "") { let parts = native_list_append(parts, subj) }
|
||||
if !str_eq(verb, "") { let parts = native_list_append(parts, verb) }
|
||||
if !str_eq(obj, "") { let parts = native_list_append(parts, obj) }
|
||||
return str_join(parts, " ")
|
||||
}
|
||||
|
||||
// gram_build_vp: construct a verb phrase surface string.
|
||||
//
|
||||
// verb: main verb surface form.
|
||||
// aux: auxiliary surface form (empty if none).
|
||||
// profile: language profile.
|
||||
//
|
||||
// In SVO/VSO/VOS languages the auxiliary precedes the main verb.
|
||||
// In SOV languages the verb cluster appears at the end; we keep aux before V
|
||||
// as a reasonable default for the auxiliary-final constructions in those languages.
|
||||
|
||||
fn gram_build_vp(verb: String, aux: String, profile: [String]) -> String {
|
||||
if str_eq(aux, "") {
|
||||
return verb
|
||||
}
|
||||
return aux + " " + verb
|
||||
}
|
||||
|
||||
// gram_question_strategy: returns the question formation strategy for a language.
|
||||
//
|
||||
// "do-support" - English: "Do you see?" — do-auxiliary inserted, verb stays base
|
||||
// "particle" - Japanese: sentence-final か appended
|
||||
// "intonation" - Mandarin, Spanish: rising intonation only, word order unchanged
|
||||
// "inversion" - French, German: subject-verb inversion
|
||||
|
||||
fn gram_question_strategy(profile: [String]) -> String {
|
||||
let code: String = lang_get(profile, "code")
|
||||
if str_eq(code, "en") { return "do-support" }
|
||||
if str_eq(code, "ja") { return "particle" }
|
||||
if str_eq(code, "zh") { return "intonation" }
|
||||
if str_eq(code, "es") { return "intonation" }
|
||||
if str_eq(code, "fr") { return "inversion" }
|
||||
if str_eq(code, "de") { return "inversion" }
|
||||
if str_eq(code, "ar") { return "intonation" }
|
||||
if str_eq(code, "hi") { return "particle" }
|
||||
if str_eq(code, "ru") { return "intonation" }
|
||||
if str_eq(code, "fi") { return "particle" }
|
||||
if str_eq(code, "sw") { return "intonation" }
|
||||
if str_eq(code, "la") { return "intonation" } // Latin: word order marks Q (VSO or -ne suffix)
|
||||
if str_eq(code, "he") { return "intonation" } // Modern Hebrew: rising intonation
|
||||
if str_eq(code, "grc") { return "intonation" } // Ancient Greek: ἆρα particle or intonation
|
||||
if str_eq(code, "ang") { return "intonation" } // Old English: hwæþer particle or intonation
|
||||
if str_eq(code, "sa") { return "intonation" } // Sanskrit: kim particle or intonation
|
||||
if str_eq(code, "got") { return "intonation" } // Gothic: ibai particle or intonation
|
||||
if str_eq(code, "non") { return "intonation" } // Old Norse: hvárr particle or intonation
|
||||
if str_eq(code, "enm") { return "do-support" } // Middle English: do-support emerging
|
||||
if str_eq(code, "pi") { return "intonation" } // Pali: kim particle or intonation
|
||||
// Unknown: default to intonation (safest — never wrong, just flat)
|
||||
return "intonation"
|
||||
}
|
||||
|
||||
// ── NP and PP assembly ────────────────────────────────────────────────────────
|
||||
//
|
||||
// These functions are profile-aware but the logic is the same across languages
|
||||
// because we work with pre-assembled strings (Engram vocabulary supplies
|
||||
// language-specific forms before these functions see them).
|
||||
|
||||
fn is_pronoun(word: String) -> Bool {
|
||||
if str_eq(word, "I") { return true }
|
||||
if str_eq(word, "you") { return true }
|
||||
if str_eq(word, "he") { return true }
|
||||
if str_eq(word, "she") { return true }
|
||||
if str_eq(word, "it") { return true }
|
||||
if str_eq(word, "we") { return true }
|
||||
if str_eq(word, "they") { return true }
|
||||
if str_eq(word, "me") { return true }
|
||||
if str_eq(word, "him") { return true }
|
||||
if str_eq(word, "her") { return true }
|
||||
if str_eq(word, "us") { return true }
|
||||
if str_eq(word, "them") { return true }
|
||||
return false
|
||||
}
|
||||
|
||||
// build_np: assemble a noun phrase tree from a referent string.
|
||||
// profile parameter reserved for future case-marking / article agreement.
|
||||
fn build_np(referent: String, slots: [String]) -> String {
|
||||
if is_pronoun(referent) {
|
||||
return make_node1("NP", make_leaf("Pron", referent))
|
||||
}
|
||||
let parts: [String] = str_split(referent, " ")
|
||||
let np: Int = native_list_len(parts)
|
||||
if np == 1 {
|
||||
return make_node1("NP", make_leaf("N", referent))
|
||||
}
|
||||
if np == 2 {
|
||||
let det: String = native_list_get(parts, 0)
|
||||
let noun: String = native_list_get(parts, 1)
|
||||
return make_node2("NP", make_leaf("Det", det), make_leaf("N", noun))
|
||||
}
|
||||
if np == 3 {
|
||||
let det: String = native_list_get(parts, 0)
|
||||
let adj: String = native_list_get(parts, 1)
|
||||
let noun: String = native_list_get(parts, 2)
|
||||
return make_node3("NP", make_leaf("Det", det), make_leaf("Adj", adj), make_leaf("N", noun))
|
||||
}
|
||||
return make_node1("NP", make_leaf("N", referent))
|
||||
}
|
||||
|
||||
// build_pp: assemble a prepositional phrase tree from a "PREP NP" string.
|
||||
// For postpositional languages (ja, hi, ko) the slot value is expected to be
|
||||
// already pre-assembled with the postposition in the correct position by the
|
||||
// caller (vocabulary lookup from Engram supplies the right surface form).
|
||||
fn build_pp(loc: String) -> String {
|
||||
let parts: [String] = str_split(loc, " ")
|
||||
let n: Int = native_list_len(parts)
|
||||
if n < 2 {
|
||||
return make_leaf("PP", loc)
|
||||
}
|
||||
let prep: String = native_list_get(parts, 0)
|
||||
let np_parts: [String] = native_list_empty()
|
||||
let i: Int = 1
|
||||
while i < n {
|
||||
let np_parts = native_list_append(np_parts, native_list_get(parts, i))
|
||||
let i = i + 1
|
||||
}
|
||||
let np_str: String = str_join(np_parts, " ")
|
||||
let np_tree: String = build_np(np_str, native_list_empty())
|
||||
return make_node2("PP", make_leaf("P", prep), np_tree)
|
||||
}
|
||||
|
||||
// ── VP tree construction ──────────────────────────────────────────────────────
|
||||
|
||||
fn build_vp_body(slots: [String]) -> String {
|
||||
let verb_surf: String = slots_get(slots, "verb_surf")
|
||||
let patient: String = slots_get(slots, "patient")
|
||||
let loc: String = slots_get(slots, "location")
|
||||
if !str_eq(patient, "") {
|
||||
let obj_np: String = build_np(patient, slots)
|
||||
if !str_eq(loc, "") {
|
||||
let pp: String = build_pp(loc)
|
||||
return make_node3("VP", make_leaf("V", verb_surf), obj_np, pp)
|
||||
}
|
||||
return make_node2("VP", make_leaf("V", verb_surf), obj_np)
|
||||
}
|
||||
if !str_eq(loc, "") {
|
||||
let pp: String = build_pp(loc)
|
||||
return make_node2("VP", make_leaf("V", verb_surf), pp)
|
||||
}
|
||||
return make_node1("VP", make_leaf("V", verb_surf))
|
||||
}
|
||||
|
||||
fn build_vp_from_slots(slots: [String]) -> String {
|
||||
let aux_surf: String = slots_get(slots, "aux_surf")
|
||||
if !str_eq(aux_surf, "") {
|
||||
let verb_surf: String = slots_get(slots, "verb_surf")
|
||||
let patient: String = slots_get(slots, "patient")
|
||||
let loc: String = slots_get(slots, "location")
|
||||
if !str_eq(patient, "") {
|
||||
let obj_np: String = build_np(patient, slots)
|
||||
return make_node3("VP", make_leaf("Aux", aux_surf), make_leaf("V", verb_surf), obj_np)
|
||||
}
|
||||
return make_node2("VP", make_leaf("Aux", aux_surf), make_leaf("V", verb_surf))
|
||||
}
|
||||
return build_vp_body(slots)
|
||||
}
|
||||
|
||||
// ── Tree generator ────────────────────────────────────────────────────────────
|
||||
|
||||
fn generate_tree(rule_id_str: String, slots: [String]) -> String {
|
||||
let rule: [String] = find_rule(rule_id_str)
|
||||
let n: Int = native_list_len(rule)
|
||||
if n == 0 {
|
||||
return make_leaf("ERR", "unknown-rule")
|
||||
}
|
||||
|
||||
let lhs: String = native_list_get(rule, 1)
|
||||
|
||||
if str_eq(rule_id_str, "S-DECL") {
|
||||
let agent: String = slots_get(slots, "agent")
|
||||
let np_tree: String = build_np(agent, slots)
|
||||
let vp_tree: String = build_vp_from_slots(slots)
|
||||
return make_node2("S", np_tree, vp_tree)
|
||||
}
|
||||
|
||||
if str_eq(rule_id_str, "S-QUEST") {
|
||||
let agent: String = slots_get(slots, "agent")
|
||||
let np_tree: String = build_np(agent, slots)
|
||||
let vp_tree: String = build_vp_body(slots)
|
||||
let aux_surf: String = slots_get(slots, "aux_surf")
|
||||
return make_node3("S", make_leaf("Aux", aux_surf), np_tree, vp_tree)
|
||||
}
|
||||
|
||||
if str_eq(rule_id_str, "S-IMP") {
|
||||
let vp_tree: String = build_vp_from_slots(slots)
|
||||
return make_node1("S", vp_tree)
|
||||
}
|
||||
|
||||
return make_leaf(lhs, "?")
|
||||
}
|
||||
@@ -1,38 +0,0 @@
|
||||
// auto-generated by elc --emit-header - do not edit
|
||||
extern fn slots_get(slots: Any, key: String) -> String
|
||||
extern fn slots_set(slots: Any, key: String, val: String) -> Any
|
||||
extern fn make_slots(k0: String, v0: String) -> Any
|
||||
extern fn make_slots2(k0: String, v0: String, k1: String, v1: String) -> Any
|
||||
extern fn make_slots3(k0: String, v0: String, k1: String, v1: String, k2: String, v2: String) -> Any
|
||||
extern fn make_slots4(k0: String, v0: String, k1: String, v1: String, k2: String, v2: String, k3: String, v3: String) -> Any
|
||||
extern fn make_slots5(k0: String, v0: String, k1: String, v1: String, k2: String, v2: String, k3: String, v3: String, k4: String, v4: String) -> Any
|
||||
extern fn rule_id(rule: Any) -> String
|
||||
extern fn rule_lhs(rule: Any) -> String
|
||||
extern fn rule_rhs_len(rule: Any) -> Int
|
||||
extern fn rule_rhs(rule: Any, idx: Int) -> String
|
||||
extern fn make_rule(id: String, lhs: String, r0: String) -> Any
|
||||
extern fn make_rule2(id: String, lhs: String, r0: String, r1: String) -> Any
|
||||
extern fn make_rule3(id: String, lhs: String, r0: String, r1: String, r2: String) -> Any
|
||||
extern fn make_rule4(id: String, lhs: String, r0: String, r1: String, r2: String, r3: String) -> Any
|
||||
extern fn build_rules() -> Any
|
||||
extern fn get_rules() -> Any
|
||||
extern fn find_rule(rule_id_str: String) -> Any
|
||||
extern fn make_leaf(label: String, word: String) -> String
|
||||
extern fn make_node1(label: String, child0: String) -> String
|
||||
extern fn make_node2(label: String, child0: String, child1: String) -> String
|
||||
extern fn make_node3(label: String, child0: String, child1: String, child2: String) -> String
|
||||
extern fn make_node4(label: String, child0: String, child1: String, child2: String, child3: String) -> String
|
||||
extern fn nlg_is_ws(c: String) -> Bool
|
||||
extern fn skip_ws(s: String, pos: Int) -> Int
|
||||
extern fn scan_token(s: String, start: Int) -> Any
|
||||
extern fn render_tree(tree: String) -> String
|
||||
extern fn gram_word_order(profile: Any) -> String
|
||||
extern fn gram_order_constituents(subj: String, verb: String, obj: String, profile: Any) -> String
|
||||
extern fn gram_build_vp(verb: String, aux: String, profile: Any) -> String
|
||||
extern fn gram_question_strategy(profile: Any) -> String
|
||||
extern fn is_pronoun(word: String) -> Bool
|
||||
extern fn build_np(referent: String, slots: Any) -> String
|
||||
extern fn build_pp(loc: String) -> String
|
||||
extern fn build_vp_body(slots: Any) -> String
|
||||
extern fn build_vp_from_slots(slots: Any) -> String
|
||||
extern fn generate_tree(rule_id_str: String, slots: Any) -> String
|
||||
@@ -1,761 +0,0 @@
|
||||
// big language-profile for testing
|
||||
fn lang_profile_big0(code: String, word_order: String, morph_type: String, has_case: String, has_gender: String, script_dir: String, agreement: String, null_subject: String) -> [String] {
|
||||
let r: [String] = native_list_empty()
|
||||
let r = native_list_append(r, "code")
|
||||
let r = native_list_append(r, code)
|
||||
let r = native_list_append(r, "word_order")
|
||||
let r = native_list_append(r, word_order)
|
||||
let r = native_list_append(r, "morph_type")
|
||||
let r = native_list_append(r, morph_type)
|
||||
let r = native_list_append(r, "has_case")
|
||||
let r = native_list_append(r, has_case)
|
||||
let r = native_list_append(r, "has_gender")
|
||||
let r = native_list_append(r, has_gender)
|
||||
let r = native_list_append(r, "script_dir")
|
||||
let r = native_list_append(r, script_dir)
|
||||
let r = native_list_append(r, "agreement")
|
||||
let r = native_list_append(r, agreement)
|
||||
let r = native_list_append(r, "null_subject")
|
||||
let r = native_list_append(r, null_subject)
|
||||
return r
|
||||
}
|
||||
|
||||
fn lang_get_big0(profile: [String], key: String) -> String {
|
||||
let n: Int = native_list_len(profile)
|
||||
let i: Int = 0
|
||||
while i < n - 1 {
|
||||
let k: String = native_list_get(profile, i)
|
||||
if str_eq(k, key) {
|
||||
return native_list_get(profile, i + 1)
|
||||
}
|
||||
let i = i + 2
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
fn lang_profile_en() -> [String] {
|
||||
return lang_profile_big0("en", "SVO", "fusional", "false", "false", "ltr", "number;person", "false")
|
||||
}
|
||||
|
||||
fn lang_profile_big1(code: String, word_order: String, morph_type: String, has_case: String, has_gender: String, script_dir: String, agreement: String, null_subject: String) -> [String] {
|
||||
let r: [String] = native_list_empty()
|
||||
let r = native_list_append(r, "code")
|
||||
let r = native_list_append(r, code)
|
||||
let r = native_list_append(r, "word_order")
|
||||
let r = native_list_append(r, word_order)
|
||||
let r = native_list_append(r, "morph_type")
|
||||
let r = native_list_append(r, morph_type)
|
||||
let r = native_list_append(r, "has_case")
|
||||
let r = native_list_append(r, has_case)
|
||||
let r = native_list_append(r, "has_gender")
|
||||
let r = native_list_append(r, has_gender)
|
||||
let r = native_list_append(r, "script_dir")
|
||||
let r = native_list_append(r, script_dir)
|
||||
let r = native_list_append(r, "agreement")
|
||||
let r = native_list_append(r, agreement)
|
||||
let r = native_list_append(r, "null_subject")
|
||||
let r = native_list_append(r, null_subject)
|
||||
return r
|
||||
}
|
||||
|
||||
fn lang_get_big1(profile: [String], key: String) -> String {
|
||||
let n: Int = native_list_len(profile)
|
||||
let i: Int = 0
|
||||
while i < n - 1 {
|
||||
let k: String = native_list_get(profile, i)
|
||||
if str_eq(k, key) {
|
||||
return native_list_get(profile, i + 1)
|
||||
}
|
||||
let i = i + 2
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
fn lang_profile_en() -> [String] {
|
||||
return lang_profile_big1("en", "SVO", "fusional", "false", "false", "ltr", "number;person", "false")
|
||||
}
|
||||
|
||||
fn lang_profile_big2(code: String, word_order: String, morph_type: String, has_case: String, has_gender: String, script_dir: String, agreement: String, null_subject: String) -> [String] {
|
||||
let r: [String] = native_list_empty()
|
||||
let r = native_list_append(r, "code")
|
||||
let r = native_list_append(r, code)
|
||||
let r = native_list_append(r, "word_order")
|
||||
let r = native_list_append(r, word_order)
|
||||
let r = native_list_append(r, "morph_type")
|
||||
let r = native_list_append(r, morph_type)
|
||||
let r = native_list_append(r, "has_case")
|
||||
let r = native_list_append(r, has_case)
|
||||
let r = native_list_append(r, "has_gender")
|
||||
let r = native_list_append(r, has_gender)
|
||||
let r = native_list_append(r, "script_dir")
|
||||
let r = native_list_append(r, script_dir)
|
||||
let r = native_list_append(r, "agreement")
|
||||
let r = native_list_append(r, agreement)
|
||||
let r = native_list_append(r, "null_subject")
|
||||
let r = native_list_append(r, null_subject)
|
||||
return r
|
||||
}
|
||||
|
||||
fn lang_get_big2(profile: [String], key: String) -> String {
|
||||
let n: Int = native_list_len(profile)
|
||||
let i: Int = 0
|
||||
while i < n - 1 {
|
||||
let k: String = native_list_get(profile, i)
|
||||
if str_eq(k, key) {
|
||||
return native_list_get(profile, i + 1)
|
||||
}
|
||||
let i = i + 2
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
fn lang_profile_en() -> [String] {
|
||||
return lang_profile_big2("en", "SVO", "fusional", "false", "false", "ltr", "number;person", "false")
|
||||
}
|
||||
|
||||
fn lang_profile_big3(code: String, word_order: String, morph_type: String, has_case: String, has_gender: String, script_dir: String, agreement: String, null_subject: String) -> [String] {
|
||||
let r: [String] = native_list_empty()
|
||||
let r = native_list_append(r, "code")
|
||||
let r = native_list_append(r, code)
|
||||
let r = native_list_append(r, "word_order")
|
||||
let r = native_list_append(r, word_order)
|
||||
let r = native_list_append(r, "morph_type")
|
||||
let r = native_list_append(r, morph_type)
|
||||
let r = native_list_append(r, "has_case")
|
||||
let r = native_list_append(r, has_case)
|
||||
let r = native_list_append(r, "has_gender")
|
||||
let r = native_list_append(r, has_gender)
|
||||
let r = native_list_append(r, "script_dir")
|
||||
let r = native_list_append(r, script_dir)
|
||||
let r = native_list_append(r, "agreement")
|
||||
let r = native_list_append(r, agreement)
|
||||
let r = native_list_append(r, "null_subject")
|
||||
let r = native_list_append(r, null_subject)
|
||||
return r
|
||||
}
|
||||
|
||||
fn lang_get_big3(profile: [String], key: String) -> String {
|
||||
let n: Int = native_list_len(profile)
|
||||
let i: Int = 0
|
||||
while i < n - 1 {
|
||||
let k: String = native_list_get(profile, i)
|
||||
if str_eq(k, key) {
|
||||
return native_list_get(profile, i + 1)
|
||||
}
|
||||
let i = i + 2
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
fn lang_profile_en() -> [String] {
|
||||
return lang_profile_big3("en", "SVO", "fusional", "false", "false", "ltr", "number;person", "false")
|
||||
}
|
||||
|
||||
fn lang_profile_big4(code: String, word_order: String, morph_type: String, has_case: String, has_gender: String, script_dir: String, agreement: String, null_subject: String) -> [String] {
|
||||
let r: [String] = native_list_empty()
|
||||
let r = native_list_append(r, "code")
|
||||
let r = native_list_append(r, code)
|
||||
let r = native_list_append(r, "word_order")
|
||||
let r = native_list_append(r, word_order)
|
||||
let r = native_list_append(r, "morph_type")
|
||||
let r = native_list_append(r, morph_type)
|
||||
let r = native_list_append(r, "has_case")
|
||||
let r = native_list_append(r, has_case)
|
||||
let r = native_list_append(r, "has_gender")
|
||||
let r = native_list_append(r, has_gender)
|
||||
let r = native_list_append(r, "script_dir")
|
||||
let r = native_list_append(r, script_dir)
|
||||
let r = native_list_append(r, "agreement")
|
||||
let r = native_list_append(r, agreement)
|
||||
let r = native_list_append(r, "null_subject")
|
||||
let r = native_list_append(r, null_subject)
|
||||
return r
|
||||
}
|
||||
|
||||
fn lang_get_big4(profile: [String], key: String) -> String {
|
||||
let n: Int = native_list_len(profile)
|
||||
let i: Int = 0
|
||||
while i < n - 1 {
|
||||
let k: String = native_list_get(profile, i)
|
||||
if str_eq(k, key) {
|
||||
return native_list_get(profile, i + 1)
|
||||
}
|
||||
let i = i + 2
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
fn lang_profile_en() -> [String] {
|
||||
return lang_profile_big4("en", "SVO", "fusional", "false", "false", "ltr", "number;person", "false")
|
||||
}
|
||||
|
||||
fn lang_profile_big5(code: String, word_order: String, morph_type: String, has_case: String, has_gender: String, script_dir: String, agreement: String, null_subject: String) -> [String] {
|
||||
let r: [String] = native_list_empty()
|
||||
let r = native_list_append(r, "code")
|
||||
let r = native_list_append(r, code)
|
||||
let r = native_list_append(r, "word_order")
|
||||
let r = native_list_append(r, word_order)
|
||||
let r = native_list_append(r, "morph_type")
|
||||
let r = native_list_append(r, morph_type)
|
||||
let r = native_list_append(r, "has_case")
|
||||
let r = native_list_append(r, has_case)
|
||||
let r = native_list_append(r, "has_gender")
|
||||
let r = native_list_append(r, has_gender)
|
||||
let r = native_list_append(r, "script_dir")
|
||||
let r = native_list_append(r, script_dir)
|
||||
let r = native_list_append(r, "agreement")
|
||||
let r = native_list_append(r, agreement)
|
||||
let r = native_list_append(r, "null_subject")
|
||||
let r = native_list_append(r, null_subject)
|
||||
return r
|
||||
}
|
||||
|
||||
fn lang_get_big5(profile: [String], key: String) -> String {
|
||||
let n: Int = native_list_len(profile)
|
||||
let i: Int = 0
|
||||
while i < n - 1 {
|
||||
let k: String = native_list_get(profile, i)
|
||||
if str_eq(k, key) {
|
||||
return native_list_get(profile, i + 1)
|
||||
}
|
||||
let i = i + 2
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
fn lang_profile_en() -> [String] {
|
||||
return lang_profile_big5("en", "SVO", "fusional", "false", "false", "ltr", "number;person", "false")
|
||||
}
|
||||
|
||||
fn lang_profile_big6(code: String, word_order: String, morph_type: String, has_case: String, has_gender: String, script_dir: String, agreement: String, null_subject: String) -> [String] {
|
||||
let r: [String] = native_list_empty()
|
||||
let r = native_list_append(r, "code")
|
||||
let r = native_list_append(r, code)
|
||||
let r = native_list_append(r, "word_order")
|
||||
let r = native_list_append(r, word_order)
|
||||
let r = native_list_append(r, "morph_type")
|
||||
let r = native_list_append(r, morph_type)
|
||||
let r = native_list_append(r, "has_case")
|
||||
let r = native_list_append(r, has_case)
|
||||
let r = native_list_append(r, "has_gender")
|
||||
let r = native_list_append(r, has_gender)
|
||||
let r = native_list_append(r, "script_dir")
|
||||
let r = native_list_append(r, script_dir)
|
||||
let r = native_list_append(r, "agreement")
|
||||
let r = native_list_append(r, agreement)
|
||||
let r = native_list_append(r, "null_subject")
|
||||
let r = native_list_append(r, null_subject)
|
||||
return r
|
||||
}
|
||||
|
||||
fn lang_get_big6(profile: [String], key: String) -> String {
|
||||
let n: Int = native_list_len(profile)
|
||||
let i: Int = 0
|
||||
while i < n - 1 {
|
||||
let k: String = native_list_get(profile, i)
|
||||
if str_eq(k, key) {
|
||||
return native_list_get(profile, i + 1)
|
||||
}
|
||||
let i = i + 2
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
fn lang_profile_en() -> [String] {
|
||||
return lang_profile_big6("en", "SVO", "fusional", "false", "false", "ltr", "number;person", "false")
|
||||
}
|
||||
|
||||
fn lang_profile_big7(code: String, word_order: String, morph_type: String, has_case: String, has_gender: String, script_dir: String, agreement: String, null_subject: String) -> [String] {
|
||||
let r: [String] = native_list_empty()
|
||||
let r = native_list_append(r, "code")
|
||||
let r = native_list_append(r, code)
|
||||
let r = native_list_append(r, "word_order")
|
||||
let r = native_list_append(r, word_order)
|
||||
let r = native_list_append(r, "morph_type")
|
||||
let r = native_list_append(r, morph_type)
|
||||
let r = native_list_append(r, "has_case")
|
||||
let r = native_list_append(r, has_case)
|
||||
let r = native_list_append(r, "has_gender")
|
||||
let r = native_list_append(r, has_gender)
|
||||
let r = native_list_append(r, "script_dir")
|
||||
let r = native_list_append(r, script_dir)
|
||||
let r = native_list_append(r, "agreement")
|
||||
let r = native_list_append(r, agreement)
|
||||
let r = native_list_append(r, "null_subject")
|
||||
let r = native_list_append(r, null_subject)
|
||||
return r
|
||||
}
|
||||
|
||||
fn lang_get_big7(profile: [String], key: String) -> String {
|
||||
let n: Int = native_list_len(profile)
|
||||
let i: Int = 0
|
||||
while i < n - 1 {
|
||||
let k: String = native_list_get(profile, i)
|
||||
if str_eq(k, key) {
|
||||
return native_list_get(profile, i + 1)
|
||||
}
|
||||
let i = i + 2
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
fn lang_profile_en() -> [String] {
|
||||
return lang_profile_big7("en", "SVO", "fusional", "false", "false", "ltr", "number;person", "false")
|
||||
}
|
||||
|
||||
fn lang_profile_big8(code: String, word_order: String, morph_type: String, has_case: String, has_gender: String, script_dir: String, agreement: String, null_subject: String) -> [String] {
|
||||
let r: [String] = native_list_empty()
|
||||
let r = native_list_append(r, "code")
|
||||
let r = native_list_append(r, code)
|
||||
let r = native_list_append(r, "word_order")
|
||||
let r = native_list_append(r, word_order)
|
||||
let r = native_list_append(r, "morph_type")
|
||||
let r = native_list_append(r, morph_type)
|
||||
let r = native_list_append(r, "has_case")
|
||||
let r = native_list_append(r, has_case)
|
||||
let r = native_list_append(r, "has_gender")
|
||||
let r = native_list_append(r, has_gender)
|
||||
let r = native_list_append(r, "script_dir")
|
||||
let r = native_list_append(r, script_dir)
|
||||
let r = native_list_append(r, "agreement")
|
||||
let r = native_list_append(r, agreement)
|
||||
let r = native_list_append(r, "null_subject")
|
||||
let r = native_list_append(r, null_subject)
|
||||
return r
|
||||
}
|
||||
|
||||
fn lang_get_big8(profile: [String], key: String) -> String {
|
||||
let n: Int = native_list_len(profile)
|
||||
let i: Int = 0
|
||||
while i < n - 1 {
|
||||
let k: String = native_list_get(profile, i)
|
||||
if str_eq(k, key) {
|
||||
return native_list_get(profile, i + 1)
|
||||
}
|
||||
let i = i + 2
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
fn lang_profile_en() -> [String] {
|
||||
return lang_profile_big8("en", "SVO", "fusional", "false", "false", "ltr", "number;person", "false")
|
||||
}
|
||||
|
||||
fn lang_profile_big9(code: String, word_order: String, morph_type: String, has_case: String, has_gender: String, script_dir: String, agreement: String, null_subject: String) -> [String] {
|
||||
let r: [String] = native_list_empty()
|
||||
let r = native_list_append(r, "code")
|
||||
let r = native_list_append(r, code)
|
||||
let r = native_list_append(r, "word_order")
|
||||
let r = native_list_append(r, word_order)
|
||||
let r = native_list_append(r, "morph_type")
|
||||
let r = native_list_append(r, morph_type)
|
||||
let r = native_list_append(r, "has_case")
|
||||
let r = native_list_append(r, has_case)
|
||||
let r = native_list_append(r, "has_gender")
|
||||
let r = native_list_append(r, has_gender)
|
||||
let r = native_list_append(r, "script_dir")
|
||||
let r = native_list_append(r, script_dir)
|
||||
let r = native_list_append(r, "agreement")
|
||||
let r = native_list_append(r, agreement)
|
||||
let r = native_list_append(r, "null_subject")
|
||||
let r = native_list_append(r, null_subject)
|
||||
return r
|
||||
}
|
||||
|
||||
fn lang_get_big9(profile: [String], key: String) -> String {
|
||||
let n: Int = native_list_len(profile)
|
||||
let i: Int = 0
|
||||
while i < n - 1 {
|
||||
let k: String = native_list_get(profile, i)
|
||||
if str_eq(k, key) {
|
||||
return native_list_get(profile, i + 1)
|
||||
}
|
||||
let i = i + 2
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
fn lang_profile_en() -> [String] {
|
||||
return lang_profile_big9("en", "SVO", "fusional", "false", "false", "ltr", "number;person", "false")
|
||||
}
|
||||
|
||||
fn lang_profile_big10(code: String, word_order: String, morph_type: String, has_case: String, has_gender: String, script_dir: String, agreement: String, null_subject: String) -> [String] {
|
||||
let r: [String] = native_list_empty()
|
||||
let r = native_list_append(r, "code")
|
||||
let r = native_list_append(r, code)
|
||||
let r = native_list_append(r, "word_order")
|
||||
let r = native_list_append(r, word_order)
|
||||
let r = native_list_append(r, "morph_type")
|
||||
let r = native_list_append(r, morph_type)
|
||||
let r = native_list_append(r, "has_case")
|
||||
let r = native_list_append(r, has_case)
|
||||
let r = native_list_append(r, "has_gender")
|
||||
let r = native_list_append(r, has_gender)
|
||||
let r = native_list_append(r, "script_dir")
|
||||
let r = native_list_append(r, script_dir)
|
||||
let r = native_list_append(r, "agreement")
|
||||
let r = native_list_append(r, agreement)
|
||||
let r = native_list_append(r, "null_subject")
|
||||
let r = native_list_append(r, null_subject)
|
||||
return r
|
||||
}
|
||||
|
||||
fn lang_get_big10(profile: [String], key: String) -> String {
|
||||
let n: Int = native_list_len(profile)
|
||||
let i: Int = 0
|
||||
while i < n - 1 {
|
||||
let k: String = native_list_get(profile, i)
|
||||
if str_eq(k, key) {
|
||||
return native_list_get(profile, i + 1)
|
||||
}
|
||||
let i = i + 2
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
fn lang_profile_en() -> [String] {
|
||||
return lang_profile_big10("en", "SVO", "fusional", "false", "false", "ltr", "number;person", "false")
|
||||
}
|
||||
|
||||
fn lang_profile_big11(code: String, word_order: String, morph_type: String, has_case: String, has_gender: String, script_dir: String, agreement: String, null_subject: String) -> [String] {
|
||||
let r: [String] = native_list_empty()
|
||||
let r = native_list_append(r, "code")
|
||||
let r = native_list_append(r, code)
|
||||
let r = native_list_append(r, "word_order")
|
||||
let r = native_list_append(r, word_order)
|
||||
let r = native_list_append(r, "morph_type")
|
||||
let r = native_list_append(r, morph_type)
|
||||
let r = native_list_append(r, "has_case")
|
||||
let r = native_list_append(r, has_case)
|
||||
let r = native_list_append(r, "has_gender")
|
||||
let r = native_list_append(r, has_gender)
|
||||
let r = native_list_append(r, "script_dir")
|
||||
let r = native_list_append(r, script_dir)
|
||||
let r = native_list_append(r, "agreement")
|
||||
let r = native_list_append(r, agreement)
|
||||
let r = native_list_append(r, "null_subject")
|
||||
let r = native_list_append(r, null_subject)
|
||||
return r
|
||||
}
|
||||
|
||||
fn lang_get_big11(profile: [String], key: String) -> String {
|
||||
let n: Int = native_list_len(profile)
|
||||
let i: Int = 0
|
||||
while i < n - 1 {
|
||||
let k: String = native_list_get(profile, i)
|
||||
if str_eq(k, key) {
|
||||
return native_list_get(profile, i + 1)
|
||||
}
|
||||
let i = i + 2
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
fn lang_profile_en() -> [String] {
|
||||
return lang_profile_big11("en", "SVO", "fusional", "false", "false", "ltr", "number;person", "false")
|
||||
}
|
||||
|
||||
fn lang_profile_big12(code: String, word_order: String, morph_type: String, has_case: String, has_gender: String, script_dir: String, agreement: String, null_subject: String) -> [String] {
|
||||
let r: [String] = native_list_empty()
|
||||
let r = native_list_append(r, "code")
|
||||
let r = native_list_append(r, code)
|
||||
let r = native_list_append(r, "word_order")
|
||||
let r = native_list_append(r, word_order)
|
||||
let r = native_list_append(r, "morph_type")
|
||||
let r = native_list_append(r, morph_type)
|
||||
let r = native_list_append(r, "has_case")
|
||||
let r = native_list_append(r, has_case)
|
||||
let r = native_list_append(r, "has_gender")
|
||||
let r = native_list_append(r, has_gender)
|
||||
let r = native_list_append(r, "script_dir")
|
||||
let r = native_list_append(r, script_dir)
|
||||
let r = native_list_append(r, "agreement")
|
||||
let r = native_list_append(r, agreement)
|
||||
let r = native_list_append(r, "null_subject")
|
||||
let r = native_list_append(r, null_subject)
|
||||
return r
|
||||
}
|
||||
|
||||
fn lang_get_big12(profile: [String], key: String) -> String {
|
||||
let n: Int = native_list_len(profile)
|
||||
let i: Int = 0
|
||||
while i < n - 1 {
|
||||
let k: String = native_list_get(profile, i)
|
||||
if str_eq(k, key) {
|
||||
return native_list_get(profile, i + 1)
|
||||
}
|
||||
let i = i + 2
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
fn lang_profile_en() -> [String] {
|
||||
return lang_profile_big12("en", "SVO", "fusional", "false", "false", "ltr", "number;person", "false")
|
||||
}
|
||||
|
||||
fn lang_profile_big13(code: String, word_order: String, morph_type: String, has_case: String, has_gender: String, script_dir: String, agreement: String, null_subject: String) -> [String] {
|
||||
let r: [String] = native_list_empty()
|
||||
let r = native_list_append(r, "code")
|
||||
let r = native_list_append(r, code)
|
||||
let r = native_list_append(r, "word_order")
|
||||
let r = native_list_append(r, word_order)
|
||||
let r = native_list_append(r, "morph_type")
|
||||
let r = native_list_append(r, morph_type)
|
||||
let r = native_list_append(r, "has_case")
|
||||
let r = native_list_append(r, has_case)
|
||||
let r = native_list_append(r, "has_gender")
|
||||
let r = native_list_append(r, has_gender)
|
||||
let r = native_list_append(r, "script_dir")
|
||||
let r = native_list_append(r, script_dir)
|
||||
let r = native_list_append(r, "agreement")
|
||||
let r = native_list_append(r, agreement)
|
||||
let r = native_list_append(r, "null_subject")
|
||||
let r = native_list_append(r, null_subject)
|
||||
return r
|
||||
}
|
||||
|
||||
fn lang_get_big13(profile: [String], key: String) -> String {
|
||||
let n: Int = native_list_len(profile)
|
||||
let i: Int = 0
|
||||
while i < n - 1 {
|
||||
let k: String = native_list_get(profile, i)
|
||||
if str_eq(k, key) {
|
||||
return native_list_get(profile, i + 1)
|
||||
}
|
||||
let i = i + 2
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
fn lang_profile_en() -> [String] {
|
||||
return lang_profile_big13("en", "SVO", "fusional", "false", "false", "ltr", "number;person", "false")
|
||||
}
|
||||
|
||||
fn lang_profile_big14(code: String, word_order: String, morph_type: String, has_case: String, has_gender: String, script_dir: String, agreement: String, null_subject: String) -> [String] {
|
||||
let r: [String] = native_list_empty()
|
||||
let r = native_list_append(r, "code")
|
||||
let r = native_list_append(r, code)
|
||||
let r = native_list_append(r, "word_order")
|
||||
let r = native_list_append(r, word_order)
|
||||
let r = native_list_append(r, "morph_type")
|
||||
let r = native_list_append(r, morph_type)
|
||||
let r = native_list_append(r, "has_case")
|
||||
let r = native_list_append(r, has_case)
|
||||
let r = native_list_append(r, "has_gender")
|
||||
let r = native_list_append(r, has_gender)
|
||||
let r = native_list_append(r, "script_dir")
|
||||
let r = native_list_append(r, script_dir)
|
||||
let r = native_list_append(r, "agreement")
|
||||
let r = native_list_append(r, agreement)
|
||||
let r = native_list_append(r, "null_subject")
|
||||
let r = native_list_append(r, null_subject)
|
||||
return r
|
||||
}
|
||||
|
||||
fn lang_get_big14(profile: [String], key: String) -> String {
|
||||
let n: Int = native_list_len(profile)
|
||||
let i: Int = 0
|
||||
while i < n - 1 {
|
||||
let k: String = native_list_get(profile, i)
|
||||
if str_eq(k, key) {
|
||||
return native_list_get(profile, i + 1)
|
||||
}
|
||||
let i = i + 2
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
fn lang_profile_en() -> [String] {
|
||||
return lang_profile_big14("en", "SVO", "fusional", "false", "false", "ltr", "number;person", "false")
|
||||
}
|
||||
|
||||
fn lang_profile_big15(code: String, word_order: String, morph_type: String, has_case: String, has_gender: String, script_dir: String, agreement: String, null_subject: String) -> [String] {
|
||||
let r: [String] = native_list_empty()
|
||||
let r = native_list_append(r, "code")
|
||||
let r = native_list_append(r, code)
|
||||
let r = native_list_append(r, "word_order")
|
||||
let r = native_list_append(r, word_order)
|
||||
let r = native_list_append(r, "morph_type")
|
||||
let r = native_list_append(r, morph_type)
|
||||
let r = native_list_append(r, "has_case")
|
||||
let r = native_list_append(r, has_case)
|
||||
let r = native_list_append(r, "has_gender")
|
||||
let r = native_list_append(r, has_gender)
|
||||
let r = native_list_append(r, "script_dir")
|
||||
let r = native_list_append(r, script_dir)
|
||||
let r = native_list_append(r, "agreement")
|
||||
let r = native_list_append(r, agreement)
|
||||
let r = native_list_append(r, "null_subject")
|
||||
let r = native_list_append(r, null_subject)
|
||||
return r
|
||||
}
|
||||
|
||||
fn lang_get_big15(profile: [String], key: String) -> String {
|
||||
let n: Int = native_list_len(profile)
|
||||
let i: Int = 0
|
||||
while i < n - 1 {
|
||||
let k: String = native_list_get(profile, i)
|
||||
if str_eq(k, key) {
|
||||
return native_list_get(profile, i + 1)
|
||||
}
|
||||
let i = i + 2
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
fn lang_profile_en() -> [String] {
|
||||
return lang_profile_big15("en", "SVO", "fusional", "false", "false", "ltr", "number;person", "false")
|
||||
}
|
||||
|
||||
fn lang_profile_big16(code: String, word_order: String, morph_type: String, has_case: String, has_gender: String, script_dir: String, agreement: String, null_subject: String) -> [String] {
|
||||
let r: [String] = native_list_empty()
|
||||
let r = native_list_append(r, "code")
|
||||
let r = native_list_append(r, code)
|
||||
let r = native_list_append(r, "word_order")
|
||||
let r = native_list_append(r, word_order)
|
||||
let r = native_list_append(r, "morph_type")
|
||||
let r = native_list_append(r, morph_type)
|
||||
let r = native_list_append(r, "has_case")
|
||||
let r = native_list_append(r, has_case)
|
||||
let r = native_list_append(r, "has_gender")
|
||||
let r = native_list_append(r, has_gender)
|
||||
let r = native_list_append(r, "script_dir")
|
||||
let r = native_list_append(r, script_dir)
|
||||
let r = native_list_append(r, "agreement")
|
||||
let r = native_list_append(r, agreement)
|
||||
let r = native_list_append(r, "null_subject")
|
||||
let r = native_list_append(r, null_subject)
|
||||
return r
|
||||
}
|
||||
|
||||
fn lang_get_big16(profile: [String], key: String) -> String {
|
||||
let n: Int = native_list_len(profile)
|
||||
let i: Int = 0
|
||||
while i < n - 1 {
|
||||
let k: String = native_list_get(profile, i)
|
||||
if str_eq(k, key) {
|
||||
return native_list_get(profile, i + 1)
|
||||
}
|
||||
let i = i + 2
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
fn lang_profile_en() -> [String] {
|
||||
return lang_profile_big16("en", "SVO", "fusional", "false", "false", "ltr", "number;person", "false")
|
||||
}
|
||||
|
||||
fn lang_profile_big17(code: String, word_order: String, morph_type: String, has_case: String, has_gender: String, script_dir: String, agreement: String, null_subject: String) -> [String] {
|
||||
let r: [String] = native_list_empty()
|
||||
let r = native_list_append(r, "code")
|
||||
let r = native_list_append(r, code)
|
||||
let r = native_list_append(r, "word_order")
|
||||
let r = native_list_append(r, word_order)
|
||||
let r = native_list_append(r, "morph_type")
|
||||
let r = native_list_append(r, morph_type)
|
||||
let r = native_list_append(r, "has_case")
|
||||
let r = native_list_append(r, has_case)
|
||||
let r = native_list_append(r, "has_gender")
|
||||
let r = native_list_append(r, has_gender)
|
||||
let r = native_list_append(r, "script_dir")
|
||||
let r = native_list_append(r, script_dir)
|
||||
let r = native_list_append(r, "agreement")
|
||||
let r = native_list_append(r, agreement)
|
||||
let r = native_list_append(r, "null_subject")
|
||||
let r = native_list_append(r, null_subject)
|
||||
return r
|
||||
}
|
||||
|
||||
fn lang_get_big17(profile: [String], key: String) -> String {
|
||||
let n: Int = native_list_len(profile)
|
||||
let i: Int = 0
|
||||
while i < n - 1 {
|
||||
let k: String = native_list_get(profile, i)
|
||||
if str_eq(k, key) {
|
||||
return native_list_get(profile, i + 1)
|
||||
}
|
||||
let i = i + 2
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
fn lang_profile_en() -> [String] {
|
||||
return lang_profile_big17("en", "SVO", "fusional", "false", "false", "ltr", "number;person", "false")
|
||||
}
|
||||
|
||||
fn lang_profile_big18(code: String, word_order: String, morph_type: String, has_case: String, has_gender: String, script_dir: String, agreement: String, null_subject: String) -> [String] {
|
||||
let r: [String] = native_list_empty()
|
||||
let r = native_list_append(r, "code")
|
||||
let r = native_list_append(r, code)
|
||||
let r = native_list_append(r, "word_order")
|
||||
let r = native_list_append(r, word_order)
|
||||
let r = native_list_append(r, "morph_type")
|
||||
let r = native_list_append(r, morph_type)
|
||||
let r = native_list_append(r, "has_case")
|
||||
let r = native_list_append(r, has_case)
|
||||
let r = native_list_append(r, "has_gender")
|
||||
let r = native_list_append(r, has_gender)
|
||||
let r = native_list_append(r, "script_dir")
|
||||
let r = native_list_append(r, script_dir)
|
||||
let r = native_list_append(r, "agreement")
|
||||
let r = native_list_append(r, agreement)
|
||||
let r = native_list_append(r, "null_subject")
|
||||
let r = native_list_append(r, null_subject)
|
||||
return r
|
||||
}
|
||||
|
||||
fn lang_get_big18(profile: [String], key: String) -> String {
|
||||
let n: Int = native_list_len(profile)
|
||||
let i: Int = 0
|
||||
while i < n - 1 {
|
||||
let k: String = native_list_get(profile, i)
|
||||
if str_eq(k, key) {
|
||||
return native_list_get(profile, i + 1)
|
||||
}
|
||||
let i = i + 2
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
fn lang_profile_en() -> [String] {
|
||||
return lang_profile_big18("en", "SVO", "fusional", "false", "false", "ltr", "number;person", "false")
|
||||
}
|
||||
|
||||
fn lang_profile_big19(code: String, word_order: String, morph_type: String, has_case: String, has_gender: String, script_dir: String, agreement: String, null_subject: String) -> [String] {
|
||||
let r: [String] = native_list_empty()
|
||||
let r = native_list_append(r, "code")
|
||||
let r = native_list_append(r, code)
|
||||
let r = native_list_append(r, "word_order")
|
||||
let r = native_list_append(r, word_order)
|
||||
let r = native_list_append(r, "morph_type")
|
||||
let r = native_list_append(r, morph_type)
|
||||
let r = native_list_append(r, "has_case")
|
||||
let r = native_list_append(r, has_case)
|
||||
let r = native_list_append(r, "has_gender")
|
||||
let r = native_list_append(r, has_gender)
|
||||
let r = native_list_append(r, "script_dir")
|
||||
let r = native_list_append(r, script_dir)
|
||||
let r = native_list_append(r, "agreement")
|
||||
let r = native_list_append(r, agreement)
|
||||
let r = native_list_append(r, "null_subject")
|
||||
let r = native_list_append(r, null_subject)
|
||||
return r
|
||||
}
|
||||
|
||||
fn lang_get_big19(profile: [String], key: String) -> String {
|
||||
let n: Int = native_list_len(profile)
|
||||
let i: Int = 0
|
||||
while i < n - 1 {
|
||||
let k: String = native_list_get(profile, i)
|
||||
if str_eq(k, key) {
|
||||
return native_list_get(profile, i + 1)
|
||||
}
|
||||
let i = i + 2
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
fn lang_profile_en() -> [String] {
|
||||
return lang_profile_big19("en", "SVO", "fusional", "false", "false", "ltr", "number;person", "false")
|
||||
}
|
||||
|
||||
@@ -1,353 +0,0 @@
|
||||
// language-profile.el - Language profile data and accessors.
|
||||
//
|
||||
// A language profile is a slot map ([String] key-value list) describing the
|
||||
// typological properties of a natural language. The engine reads these
|
||||
// properties to drive morphology, word-order, and question-formation without
|
||||
// any per-language code paths.
|
||||
//
|
||||
// Profile slot keys:
|
||||
// code - ISO 639-1 code: "en", "ja", "ar", "zh", "de", "fr", "es", "sw", "hi", "ru", etc.
|
||||
// word_order - "SVO" | "SOV" | "VSO" | "VOS" | "OVS" | "OSV" | "free"
|
||||
// morph_type - "isolating" | "agglutinative" | "fusional" | "polysynthetic"
|
||||
// has_case - "true" | "false"
|
||||
// has_gender - "true" | "false"
|
||||
// script_dir - "ltr" | "rtl" | "ttb"
|
||||
// agreement - semicolon-separated features: "number;person" | "number;person;gender;case" | "none"
|
||||
// null_subject - "true" | "false" (pro-drop: subject may be omitted)
|
||||
|
||||
// ── Constructor ───────────────────────────────────────────────────────────────
|
||||
|
||||
fn lang_profile(code: String, word_order: String, morph_type: String, has_case: String, has_gender: String, script_dir: String, agreement: String, null_subject: String) -> [String] {
|
||||
let r: [String] = native_list_empty()
|
||||
let r = native_list_append(r, "code")
|
||||
let r = native_list_append(r, code)
|
||||
let r = native_list_append(r, "word_order")
|
||||
let r = native_list_append(r, word_order)
|
||||
let r = native_list_append(r, "morph_type")
|
||||
let r = native_list_append(r, morph_type)
|
||||
let r = native_list_append(r, "has_case")
|
||||
let r = native_list_append(r, has_case)
|
||||
let r = native_list_append(r, "has_gender")
|
||||
let r = native_list_append(r, has_gender)
|
||||
let r = native_list_append(r, "script_dir")
|
||||
let r = native_list_append(r, script_dir)
|
||||
let r = native_list_append(r, "agreement")
|
||||
let r = native_list_append(r, agreement)
|
||||
let r = native_list_append(r, "null_subject")
|
||||
let r = native_list_append(r, null_subject)
|
||||
return r
|
||||
}
|
||||
|
||||
// ── Accessor ──────────────────────────────────────────────────────────────────
|
||||
|
||||
fn lang_get(profile: [String], key: String) -> String {
|
||||
let n: Int = native_list_len(profile)
|
||||
let i: Int = 0
|
||||
while i < n - 1 {
|
||||
let k: String = native_list_get(profile, i)
|
||||
if str_eq(k, key) {
|
||||
return native_list_get(profile, i + 1)
|
||||
}
|
||||
let i = i + 2
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// ── Built-in profiles ─────────────────────────────────────────────────────────
|
||||
//
|
||||
// Each profile encodes typological facts about one language. These are data,
|
||||
// not separate code paths. Adding a new language means adding a new profile
|
||||
// and loading its vocabulary/suffix tables into the Engram - no engine changes.
|
||||
|
||||
// English: SVO, fusional, no grammatical case (nominative/accusative collapsed),
|
||||
// no grammatical gender, left-to-right, agreement on number and person,
|
||||
// obligatory subject (no pro-drop).
|
||||
fn lang_profile_en() -> [String] {
|
||||
return lang_profile("en", "SVO", "fusional", "false", "false", "ltr", "number;person", "false")
|
||||
}
|
||||
|
||||
// Japanese: SOV, agglutinative, grammatical relations marked by postpositions
|
||||
// (not inflectional case), no grammatical gender, left-to-right, no agreement
|
||||
// morphology on verbs, pro-drop (null subject frequent).
|
||||
fn lang_profile_ja() -> [String] {
|
||||
return lang_profile("ja", "SOV", "agglutinative", "false", "false", "ltr", "none", "true")
|
||||
}
|
||||
|
||||
// Arabic: VSO, fusional, full case system, grammatical gender (masc/fem),
|
||||
// right-to-left script, agreement on number, person, gender, and case,
|
||||
// pro-drop (subject agreement marking on verb allows subject omission).
|
||||
fn lang_profile_ar() -> [String] {
|
||||
return lang_profile("ar", "VSO", "fusional", "true", "true", "rtl", "number;person;gender;case", "true")
|
||||
}
|
||||
|
||||
// Mandarin Chinese: SVO, isolating (no morphological inflection), no case,
|
||||
// no grammatical gender, left-to-right, no agreement (no morphological marking),
|
||||
// null subject allowed in discourse context.
|
||||
fn lang_profile_zh() -> [String] {
|
||||
return lang_profile("zh", "SVO", "isolating", "false", "false", "ltr", "none", "true")
|
||||
}
|
||||
|
||||
// German: V2 (second-position verb, base SOV in subordinate clauses), fusional,
|
||||
// four-case system, three grammatical genders, left-to-right, agreement on
|
||||
// number, person, gender, and case, obligatory subject.
|
||||
fn lang_profile_de() -> [String] {
|
||||
return lang_profile("de", "SOV", "fusional", "true", "true", "ltr", "number;person;gender;case", "false")
|
||||
}
|
||||
|
||||
// Spanish: SVO, fusional, no morphological case (but object clitics exist),
|
||||
// grammatical gender (masc/fem), left-to-right, agreement on number, person,
|
||||
// and gender, pro-drop (rich verbal agreement allows subject omission).
|
||||
fn lang_profile_es() -> [String] {
|
||||
return lang_profile("es", "SVO", "fusional", "false", "true", "ltr", "number;person;gender", "true")
|
||||
}
|
||||
|
||||
// Finnish: SOV, agglutinative, fifteen grammatical cases, no grammatical gender,
|
||||
// left-to-right, agreement on number, person, and case, no pro-drop (subject
|
||||
// required in finite clauses).
|
||||
fn lang_profile_fi() -> [String] {
|
||||
return lang_profile("fi", "SOV", "agglutinative", "true", "false", "ltr", "number;person;case", "false")
|
||||
}
|
||||
|
||||
// Swahili: SVO, agglutinative, noun-class system (15+ classes replacing gender),
|
||||
// no case inflection, left-to-right, agreement driven by noun class and number,
|
||||
// pro-drop (subject prefix on verb can stand alone).
|
||||
fn lang_profile_sw() -> [String] {
|
||||
return lang_profile("sw", "SVO", "agglutinative", "false", "false", "ltr", "noun-class;number", "true")
|
||||
}
|
||||
|
||||
// Hindi: SOV, fusional, case-marked postpositional system, grammatical gender
|
||||
// (masc/fem), left-to-right (Devanagari script still ltr), agreement on number,
|
||||
// person, gender, and case, pro-drop (subject frequently dropped).
|
||||
fn lang_profile_hi() -> [String] {
|
||||
return lang_profile("hi", "SOV", "fusional", "true", "true", "ltr", "number;person;gender;case", "true")
|
||||
}
|
||||
|
||||
// Russian: free word order (pragmatically determined), fusional, six-case system,
|
||||
// three grammatical genders, left-to-right (Cyrillic), agreement on number,
|
||||
// person, gender, and case, no pro-drop (subject required).
|
||||
fn lang_profile_ru() -> [String] {
|
||||
return lang_profile("ru", "free", "fusional", "true", "true", "ltr", "number;person;gender;case", "false")
|
||||
}
|
||||
|
||||
// French: SVO, fusional, no morphological case (but clitic object pronouns),
|
||||
// two grammatical genders (masc/fem), left-to-right, agreement on number,
|
||||
// person, and gender, no pro-drop.
|
||||
fn lang_profile_fr() -> [String] {
|
||||
return lang_profile("fr", "SVO", "fusional", "false", "true", "ltr", "number;person;gender", "false")
|
||||
}
|
||||
|
||||
// Latin: SOV (highly free word order), fusional, six-case system (nom/gen/dat/acc/abl/voc),
|
||||
// three genders (masc/fem/neut), left-to-right, rich agreement on number, person, gender,
|
||||
// and case, pro-drop (subject expressed in verb ending).
|
||||
fn lang_profile_la() -> [String] {
|
||||
return lang_profile("la", "SOV", "fusional", "true", "true", "ltr", "number;person;gender;case", "true")
|
||||
}
|
||||
|
||||
// Hebrew (Modern): SVO, Semitic trilateral root morphology, two genders (masc/fem),
|
||||
// two numbers (singular/plural; dual vestigial), right-to-left (Hebrew script),
|
||||
// agreement on number, person, gender; zero copula in present tense; no grammatical cases.
|
||||
fn lang_profile_he() -> [String] {
|
||||
return lang_profile("he", "SVO", "semitic", "true", "false", "rtl", "number;person;gender", "true")
|
||||
}
|
||||
|
||||
// Sanskrit: SOV/free, highly fusional, 3 genders, 8 cases, 3 numbers (sg/du/pl),
|
||||
// Devanagari script, rich verb system (10 classes, 9 tenses/moods), pro-drop.
|
||||
fn lang_profile_sa() -> [String] {
|
||||
return lang_profile("sa", "SOV", "fusional", "true", "true", "ltr", "number;person;gender;case", "true")
|
||||
}
|
||||
|
||||
// Gothic: SOV, fusional, 3 genders, 4 cases, singular/plural,
|
||||
// Gothic alphabet (romanized as þ/ƕ/ai/au/ei), strong and weak classes, pro-drop.
|
||||
fn lang_profile_got() -> [String] {
|
||||
return lang_profile("got", "SOV", "fusional", "true", "true", "ltr", "number;person;gender;case", "true")
|
||||
}
|
||||
|
||||
// Old Norse: free/SOV, fusional, 3 genders, 4 cases, singular/plural,
|
||||
// definite article as noun suffix (-inn/-in/-it), strong and weak classes, pro-drop.
|
||||
fn lang_profile_non() -> [String] {
|
||||
return lang_profile("non", "SOV", "fusional", "true", "true", "ltr", "number;person;gender;case", "true")
|
||||
}
|
||||
|
||||
// Middle English (ca. 1100–1500): SVO emerging, mostly lost case system,
|
||||
// -es plural/genitive, strong and weak verbs, no grammatical gender on nouns.
|
||||
fn lang_profile_enm() -> [String] {
|
||||
return lang_profile("enm", "SVO", "fusional", "false", "false", "ltr", "number;person", "false")
|
||||
}
|
||||
|
||||
// Pali: SOV, fusional (simplified Sanskrit), 3 genders, 8 cases, sg/pl,
|
||||
// Latin transliteration with IAST diacritics, Buddhist canonical language.
|
||||
fn lang_profile_pi() -> [String] {
|
||||
return lang_profile("pi", "SOV", "fusional", "true", "true", "ltr", "number;person;gender;case", "true")
|
||||
}
|
||||
|
||||
// Ancient Greek: free/SOV word order, highly fusional, 3 genders, 5 cases (nom/acc/gen/dat/voc),
|
||||
// singular/dual/plural, polytonic Greek script (Unicode), complex verb system with aspect
|
||||
// (imperfective/perfective), augment in past tenses, pro-drop.
|
||||
fn lang_profile_grc() -> [String] {
|
||||
return lang_profile("grc", "SOV", "fusional", "true", "true", "ltr", "number;person;gender;case;aspect", "true")
|
||||
}
|
||||
|
||||
// Old English (Anglo-Saxon): SOV/V2, fusional, 3 genders, 4 cases (nom/acc/gen/dat),
|
||||
// singular/plural, Latin alphabet + þ/ð/ƿ/æ, strong and weak noun/verb classes, pro-drop.
|
||||
fn lang_profile_ang() -> [String] {
|
||||
return lang_profile("ang", "SOV", "fusional", "true", "true", "ltr", "number;person;gender;case", "true")
|
||||
}
|
||||
|
||||
// Old French (ca. 1000–1300 CE): SVO/V2, fusional, two-case system (nominative/oblique),
|
||||
// two genders (masculine/feminine), left-to-right, agreement on number, person, gender,
|
||||
// and case, no pro-drop (subject generally required).
|
||||
fn lang_profile_fro() -> [String] {
|
||||
return lang_profile("fro", "SVO", "fusional", "true", "true", "ltr", "number;person;gender;case", "false")
|
||||
}
|
||||
|
||||
// Old High German (ca. 750–1050 CE): SOV/V2, fusional, four-case system, three genders,
|
||||
// left-to-right, agreement on number, person, gender, and case, pro-drop.
|
||||
fn lang_profile_goh() -> [String] {
|
||||
return lang_profile("goh", "SOV", "fusional", "true", "true", "ltr", "number;person;gender;case", "true")
|
||||
}
|
||||
|
||||
// Old Irish (ca. 600–900 CE): VSO, fusional, case system, three genders,
|
||||
// left-to-right, agreement on number, person, gender, and case, pro-drop.
|
||||
fn lang_profile_sga() -> [String] {
|
||||
return lang_profile("sga", "VSO", "fusional", "true", "true", "ltr", "number;person;gender;case", "true")
|
||||
}
|
||||
|
||||
// Tocharian B (ca. 500–1000 CE): SOV, fusional, case system, two genders,
|
||||
// left-to-right, agreement on number, person, gender, and case, no pro-drop.
|
||||
fn lang_profile_txb() -> [String] {
|
||||
return lang_profile("txb", "SOV", "fusional", "true", "true", "ltr", "number;person;gender;case", "false")
|
||||
}
|
||||
|
||||
// Old Persian (ca. 525–330 BCE): SOV, fusional, 8-case system, no grammatical gender,
|
||||
// left-to-right, agreement on number, person, and case, pro-drop.
|
||||
fn lang_profile_peo() -> [String] {
|
||||
return lang_profile("peo", "SOV", "fusional", "true", "false", "ltr", "number;person;case", "true")
|
||||
}
|
||||
|
||||
// Akkadian (Old Babylonian period, ca. 1900–1600 BCE): VSO, fusional, 3-case system
|
||||
// (nominative/accusative/genitive with mimation), two genders, left-to-right,
|
||||
// agreement on number, person, gender, and case, no pro-drop.
|
||||
fn lang_profile_akk() -> [String] {
|
||||
return lang_profile("akk", "VSO", "fusional", "true", "true", "ltr", "number;person;gender;case", "false")
|
||||
}
|
||||
|
||||
// Ugaritic (ca. 1400–1200 BCE): VSO, Semitic trilateral root morphology, 3-case system,
|
||||
// two genders, left-to-right (cuneiform alphabetic script), agreement on number, person,
|
||||
// gender, and case, no pro-drop.
|
||||
fn lang_profile_uga() -> [String] {
|
||||
return lang_profile("uga", "VSO", "semitic", "true", "true", "ltr", "number;person;gender;case", "false")
|
||||
}
|
||||
|
||||
// Ancient Egyptian / Middle Egyptian (ca. 2100–1300 BCE): SVO, agglutinative,
|
||||
// no morphological case (word order + prepositions), two genders, left-to-right,
|
||||
// agreement on number, person, and gender, pro-drop (zero copula in present).
|
||||
fn lang_profile_egy() -> [String] {
|
||||
return lang_profile("egy", "SVO", "agglutinative", "false", "true", "ltr", "number;person;gender", "true")
|
||||
}
|
||||
|
||||
// Sumerian (ca. 3000–2000 BCE): SOV, agglutinative, ergative-absolutive case system,
|
||||
// no grammatical gender (animacy distinction instead), left-to-right, agreement on
|
||||
// number and person, pro-drop.
|
||||
fn lang_profile_sux() -> [String] {
|
||||
return lang_profile("sux", "SOV", "agglutinative", "true", "false", "ltr", "number;person", "true")
|
||||
}
|
||||
|
||||
// Ge'ez (Classical Ethiopic, ca. 4th–7th century CE): SOV, Semitic trilateral root
|
||||
// morphology, two genders (masc/fem), Ethiopic/Fidel script (ltr), agreement on
|
||||
// number, person, and gender, pro-drop (subject inflection on verb).
|
||||
fn lang_profile_gez() -> [String] {
|
||||
return lang_profile("gez", "SOV", "semitic", "true", "true", "ltr", "number;person;gender", "true")
|
||||
}
|
||||
|
||||
// Coptic (Sahidic dialect, ca. 3rd–11th century CE): SVO, agglutinative, no
|
||||
// morphological case, two genders (masc/fem), left-to-right (Coptic alphabet),
|
||||
// agreement on number and gender via bound subject pronouns, no pro-drop (explicit
|
||||
// subject prefix required on every verb).
|
||||
fn lang_profile_cop() -> [String] {
|
||||
return lang_profile("cop", "SVO", "agglutinative", "false", "true", "ltr", "number;person;gender", "false")
|
||||
}
|
||||
|
||||
// ── Dispatch: code -> profile ─────────────────────────────────────────────────
|
||||
|
||||
fn lang_from_code(code: String) -> [String] {
|
||||
if str_eq(code, "en") { return lang_profile_en() }
|
||||
if str_eq(code, "ja") { return lang_profile_ja() }
|
||||
if str_eq(code, "ar") { return lang_profile_ar() }
|
||||
if str_eq(code, "zh") { return lang_profile_zh() }
|
||||
if str_eq(code, "de") { return lang_profile_de() }
|
||||
if str_eq(code, "es") { return lang_profile_es() }
|
||||
if str_eq(code, "fi") { return lang_profile_fi() }
|
||||
if str_eq(code, "sw") { return lang_profile_sw() }
|
||||
if str_eq(code, "hi") { return lang_profile_hi() }
|
||||
if str_eq(code, "ru") { return lang_profile_ru() }
|
||||
if str_eq(code, "fr") { return lang_profile_fr() }
|
||||
if str_eq(code, "la") { return lang_profile_la() }
|
||||
if str_eq(code, "he") { return lang_profile_he() }
|
||||
if str_eq(code, "grc") { return lang_profile_grc() }
|
||||
if str_eq(code, "ang") { return lang_profile_ang() }
|
||||
if str_eq(code, "sa") { return lang_profile_sa() }
|
||||
if str_eq(code, "got") { return lang_profile_got() }
|
||||
if str_eq(code, "non") { return lang_profile_non() }
|
||||
if str_eq(code, "enm") { return lang_profile_enm() }
|
||||
if str_eq(code, "pi") { return lang_profile_pi() }
|
||||
if str_eq(code, "fro") { return lang_profile_fro() }
|
||||
if str_eq(code, "goh") { return lang_profile_goh() }
|
||||
if str_eq(code, "sga") { return lang_profile_sga() }
|
||||
if str_eq(code, "txb") { return lang_profile_txb() }
|
||||
if str_eq(code, "peo") { return lang_profile_peo() }
|
||||
if str_eq(code, "akk") { return lang_profile_akk() }
|
||||
if str_eq(code, "uga") { return lang_profile_uga() }
|
||||
if str_eq(code, "egy") { return lang_profile_egy() }
|
||||
if str_eq(code, "sux") { return lang_profile_sux() }
|
||||
if str_eq(code, "gez") { return lang_profile_gez() }
|
||||
if str_eq(code, "cop") { return lang_profile_cop() }
|
||||
// Unknown code: fall back to English profile
|
||||
return lang_profile_en()
|
||||
}
|
||||
|
||||
// English default - backward compatibility entry point.
|
||||
fn lang_default() -> [String] {
|
||||
return lang_profile_en()
|
||||
}
|
||||
|
||||
// ── Typed convenience predicates ──────────────────────────────────────────────
|
||||
|
||||
fn lang_is_isolating(profile: [String]) -> Bool {
|
||||
return str_eq(lang_get(profile, "morph_type"), "isolating")
|
||||
}
|
||||
|
||||
fn lang_is_agglutinative(profile: [String]) -> Bool {
|
||||
return str_eq(lang_get(profile, "morph_type"), "agglutinative")
|
||||
}
|
||||
|
||||
fn lang_is_fusional(profile: [String]) -> Bool {
|
||||
return str_eq(lang_get(profile, "morph_type"), "fusional")
|
||||
}
|
||||
|
||||
fn lang_is_polysynthetic(profile: [String]) -> Bool {
|
||||
return str_eq(lang_get(profile, "morph_type"), "polysynthetic")
|
||||
}
|
||||
|
||||
fn lang_is_rtl(profile: [String]) -> Bool {
|
||||
return str_eq(lang_get(profile, "script_dir"), "rtl")
|
||||
}
|
||||
|
||||
fn lang_has_null_subject(profile: [String]) -> Bool {
|
||||
return str_eq(lang_get(profile, "null_subject"), "true")
|
||||
}
|
||||
|
||||
fn lang_has_case(profile: [String]) -> Bool {
|
||||
return str_eq(lang_get(profile, "has_case"), "true")
|
||||
}
|
||||
|
||||
fn lang_has_gender(profile: [String]) -> Bool {
|
||||
return str_eq(lang_get(profile, "has_gender"), "true")
|
||||
}
|
||||
|
||||
fn lang_word_order(profile: [String]) -> String {
|
||||
return lang_get(profile, "word_order")
|
||||
}
|
||||
|
||||
fn lang_code(profile: [String]) -> String {
|
||||
return lang_get(profile, "code")
|
||||
}
|
||||
@@ -1,46 +0,0 @@
|
||||
// auto-generated by elc --emit-header — do not edit
|
||||
extern fn lang_profile(code: String, word_order: String, morph_type: String, has_case: String, has_gender: String, script_dir: String, agreement: String, null_subject: String) -> [String]
|
||||
extern fn lang_get(profile: [String], key: String) -> String
|
||||
extern fn lang_profile_en() -> [String]
|
||||
extern fn lang_profile_ja() -> [String]
|
||||
extern fn lang_profile_ar() -> [String]
|
||||
extern fn lang_profile_zh() -> [String]
|
||||
extern fn lang_profile_de() -> [String]
|
||||
extern fn lang_profile_es() -> [String]
|
||||
extern fn lang_profile_fi() -> [String]
|
||||
extern fn lang_profile_sw() -> [String]
|
||||
extern fn lang_profile_hi() -> [String]
|
||||
extern fn lang_profile_ru() -> [String]
|
||||
extern fn lang_profile_fr() -> [String]
|
||||
extern fn lang_profile_la() -> [String]
|
||||
extern fn lang_profile_he() -> [String]
|
||||
extern fn lang_profile_sa() -> [String]
|
||||
extern fn lang_profile_got() -> [String]
|
||||
extern fn lang_profile_non() -> [String]
|
||||
extern fn lang_profile_enm() -> [String]
|
||||
extern fn lang_profile_pi() -> [String]
|
||||
extern fn lang_profile_grc() -> [String]
|
||||
extern fn lang_profile_ang() -> [String]
|
||||
extern fn lang_profile_fro() -> [String]
|
||||
extern fn lang_profile_goh() -> [String]
|
||||
extern fn lang_profile_sga() -> [String]
|
||||
extern fn lang_profile_txb() -> [String]
|
||||
extern fn lang_profile_peo() -> [String]
|
||||
extern fn lang_profile_akk() -> [String]
|
||||
extern fn lang_profile_uga() -> [String]
|
||||
extern fn lang_profile_egy() -> [String]
|
||||
extern fn lang_profile_sux() -> [String]
|
||||
extern fn lang_profile_gez() -> [String]
|
||||
extern fn lang_profile_cop() -> [String]
|
||||
extern fn lang_from_code(code: String) -> [String]
|
||||
extern fn lang_default() -> [String]
|
||||
extern fn lang_is_isolating(profile: [String]) -> Bool
|
||||
extern fn lang_is_agglutinative(profile: [String]) -> Bool
|
||||
extern fn lang_is_fusional(profile: [String]) -> Bool
|
||||
extern fn lang_is_polysynthetic(profile: [String]) -> Bool
|
||||
extern fn lang_is_rtl(profile: [String]) -> Bool
|
||||
extern fn lang_has_null_subject(profile: [String]) -> Bool
|
||||
extern fn lang_has_case(profile: [String]) -> Bool
|
||||
extern fn lang_has_gender(profile: [String]) -> Bool
|
||||
extern fn lang_word_order(profile: [String]) -> String
|
||||
extern fn lang_code(profile: [String]) -> String
|
||||
@@ -1,40 +0,0 @@
|
||||
import "language-profile.el"
|
||||
|
||||
extern fn es_pluralize(noun: String) -> String
|
||||
extern fn es_conjugate(verb: String, tense: String, person: String, number: String) -> String
|
||||
extern fn fr_pluralize(noun: String) -> String
|
||||
extern fn fr_conjugate(verb: String, tense: String, person: String, number: String) -> String
|
||||
extern fn de_noun_plural(noun: String, gender: String) -> String
|
||||
extern fn de_conjugate(verb: String, tense: String, person: String, number: String) -> String
|
||||
extern fn ru_noun_case(noun: String, gender: String, gram_case: String, number: String) -> String
|
||||
extern fn ru_conjugate(verb: String, tense: String, person: String, number: String, gender: String) -> String
|
||||
extern fn ja_conjugate(dict_form: String, form: String) -> String
|
||||
extern fn fi_apply_case(noun: String, gram_case: String, number: String) -> String
|
||||
extern fn fi_conjugate(verb: String, tense: String, person: String, number: String) -> String
|
||||
extern fn ar_sound_plural(noun: String, gender: String) -> String
|
||||
extern fn ar_conjugate(verb: String, tense: String, person: String, gender: String, number: String) -> String
|
||||
extern fn hi_noun_direct(noun: String, gender: String, number: String) -> String
|
||||
extern fn hi_gender(noun: String) -> String
|
||||
extern fn hi_conjugate(verb: String, tense: String, person: String, gender: String, number: String) -> String
|
||||
extern fn sw_noun_plural(noun: String) -> String
|
||||
extern fn sw_conjugate(verb: String, person: String, number: String, noun_class: String, tense: String) -> String
|
||||
extern fn la_conjugate(verb: String, tense: String, person: String, number: String) -> String
|
||||
extern fn he_conjugate(verb: String, tense: String, person: String, gender: String, number: String) -> String
|
||||
extern fn grc_conjugate(verb: String, tense: String, person: String, number: String) -> String
|
||||
extern fn ang_conjugate(verb: String, tense: String, person: String, number: String) -> String
|
||||
extern fn sa_conjugate(verb: String, tense: String, person: String, number: String) -> String
|
||||
extern fn got_conjugate(verb: String, tense: String, person: String, number: String) -> String
|
||||
extern fn non_conjugate(verb: String, tense: String, person: String, number: String) -> String
|
||||
extern fn enm_conjugate(verb: String, tense: String, person: String, number: String) -> String
|
||||
extern fn pi_conjugate(verb: String, tense: String, person: String, number: String) -> String
|
||||
extern fn fro_conjugate(verb: String, tense: String, person: String, number: String) -> String
|
||||
extern fn goh_conjugate(verb: String, tense: String, person: String, number: String) -> String
|
||||
extern fn sga_conjugate(verb: String, tense: String, person: String, number: String) -> String
|
||||
extern fn txb_conjugate(verb: String, tense: String, person: String, number: String) -> String
|
||||
extern fn peo_conjugate(verb: String, tense: String, person: String, number: String) -> String
|
||||
extern fn akk_conjugate(verb: String, tense: String, person: String, number: String) -> String
|
||||
extern fn uga_conjugate(verb: String, tense: String, person: String, number: String) -> String
|
||||
extern fn egy_conjugate(verb: String, tense: String, person: String, number: String) -> String
|
||||
extern fn sux_conjugate(verb: String, tense: String, person: String, number: String) -> String
|
||||
extern fn gez_conjugate(verb: String, tense: String, person: String, number: String) -> String
|
||||
extern fn cop_conjugate(verb: String, tense: String, person: String, number: String) -> String
|
||||
@@ -1,3 +0,0 @@
|
||||
fn morph_tiny(x: String) -> String {
|
||||
return x
|
||||
}
|
||||
@@ -1,528 +0,0 @@
|
||||
// morphology-akk.el - Akkadian morphology for the NLG engine.
|
||||
// 𒀭𒂗𒍪 — Akkadian (akkadû), the language of Babylon and Assyria.
|
||||
//
|
||||
// Implements Old Babylonian Akkadian verb conjugation (G-stem / Grundstamm),
|
||||
// noun declension with mimation, and noun-phrase construction.
|
||||
//
|
||||
// Akkadian is the oldest attested Semitic language (ca. 2800–100 BCE).
|
||||
// It uses cuneiform script; we work in standard Latin transliteration
|
||||
// (Old Babylonian dialect — the classical prestige form).
|
||||
//
|
||||
// Language profile:
|
||||
// code=akk, name=Akkadian, morph_type=semitic, word_order=VSO/SOV,
|
||||
// script=cuneiform (transliterated), family=semitic/east-semitic
|
||||
//
|
||||
// Key grammatical facts:
|
||||
// - Semitic trilateral root system: words built from 3-consonant roots
|
||||
// by inserting vowel patterns (e.g. root p-r-s → iparras "he decides")
|
||||
// - Grammatical gender: masculine / feminine (no neuter)
|
||||
// - Cases: nominative (-um), accusative (-am), genitive (-im) — "mimation"
|
||||
// - Number: singular / plural (dual is vestigial in verbs)
|
||||
// - Verb stems: G (basic), D (intensive), Š (causative), N (passive);
|
||||
// this file implements G-stem throughout
|
||||
// - Two main tense/aspect systems:
|
||||
// Present-future (iparras pattern): action in progress or future
|
||||
// Perfect (iptaras pattern): completed action with present relevance
|
||||
// Stative (paris pattern): resultant state, often adjectival
|
||||
// - No definite or indefinite article; case endings convey
|
||||
// determination contextually
|
||||
// - Copula: bašû (to exist/be)
|
||||
//
|
||||
// Verb conjugation conventions:
|
||||
// person: "first" | "second" | "third"
|
||||
// gender: "m" | "f"
|
||||
// number: "singular" | "plural"
|
||||
// tense: "present" | "perfect" | "stative"
|
||||
//
|
||||
// Noun declension conventions:
|
||||
// gram_case: "nom" | "acc" | "gen"
|
||||
// number: "singular" | "plural"
|
||||
// gender: "m" | "f" (passed to akk_decline for gender-specific forms)
|
||||
//
|
||||
// Verbs covered (G-stem infinitive, transliterated):
|
||||
// "bašû" — to exist / be (copula)
|
||||
// "alāku" — to go
|
||||
// "amāru" — to see
|
||||
// "qabû" — to say
|
||||
// "epēšu" — to do / make
|
||||
//
|
||||
// Nouns covered with known mimation forms:
|
||||
// "šarrum" — king
|
||||
// "awīlum" — man / person
|
||||
// "bītum" — house
|
||||
// "ilum" — god
|
||||
//
|
||||
// Depends on: morphology.el (str_eq, str_len, str_slice, str_ends_with)
|
||||
|
||||
// ── String helpers ─────────────────────────────────────────────────────────────
|
||||
|
||||
import "morphology.el"
|
||||
fn akk_str_ends(s: String, suf: String) -> Bool {
|
||||
return str_ends_with(s, suf)
|
||||
}
|
||||
|
||||
fn akk_str_len(s: String) -> Int {
|
||||
return str_len(s)
|
||||
}
|
||||
|
||||
fn akk_str_drop_last(s: String, n: Int) -> String {
|
||||
let len: Int = str_len(s)
|
||||
if n >= len {
|
||||
return ""
|
||||
}
|
||||
return str_slice(s, 0, len - n)
|
||||
}
|
||||
|
||||
// ── Slot index ─────────────────────────────────────────────────────────────────
|
||||
//
|
||||
// Maps person × number to a 0-based slot for table lookups.
|
||||
// Akkadian verb agreement does not distinguish gender in 1st person,
|
||||
// and the 2nd person often conflates masc/fem in some paradigms.
|
||||
// We use a 6-cell paradigm matching the most common OB presentation:
|
||||
//
|
||||
// 0 = 1sg (I)
|
||||
// 1 = 2sg (you sg)
|
||||
// 2 = 3sg m (he)
|
||||
// 3 = 3sg f (she)
|
||||
// 4 = 1pl (we)
|
||||
// 5 = 3pl (they)
|
||||
//
|
||||
// Note: 2pl is rare / vestigial in attested OB texts; omitted here.
|
||||
|
||||
fn akk_slot(person: String, number: String) -> Int {
|
||||
if str_eq(person, "first") {
|
||||
if str_eq(number, "plural") { return 4 }
|
||||
return 0
|
||||
}
|
||||
if str_eq(person, "second") {
|
||||
return 1
|
||||
}
|
||||
// third
|
||||
if str_eq(number, "plural") { return 5 }
|
||||
return 2 // default: 3sg masc; caller may override with gender check below
|
||||
}
|
||||
|
||||
// akk_slot_g: gender-aware slot for third person singular.
|
||||
// Returns 3 (3sg fem) when person=third, number=singular, gender=f.
|
||||
fn akk_slot_g(person: String, gender: String, number: String) -> Int {
|
||||
let base: Int = akk_slot(person, number)
|
||||
if str_eq(person, "third") {
|
||||
if str_eq(number, "singular") {
|
||||
if str_eq(gender, "f") { return 3 }
|
||||
}
|
||||
}
|
||||
return base
|
||||
}
|
||||
|
||||
// ── Copula: bašû — to exist / be ──────────────────────────────────────────────
|
||||
//
|
||||
// bašû is suppletive and highly irregular.
|
||||
// Present: ibašši (3sg m/f), abašši (1sg), tabašši (2sg)
|
||||
// Stative: bašī (3sg m), bašiat (3sg f), bašāku (1sg)
|
||||
// Perfect: not commonly attested in G-stem; use present forms as fallback.
|
||||
|
||||
fn akk_copula_present(slot: Int) -> String {
|
||||
if slot == 0 { return "abašši" } // 1sg
|
||||
if slot == 1 { return "tabašši" } // 2sg
|
||||
if slot == 2 { return "ibašši" } // 3sg m
|
||||
if slot == 3 { return "ibašši" } // 3sg f (same form in attested OB)
|
||||
if slot == 4 { return "nibašši" } // 1pl
|
||||
return "ibaššū" // 3pl
|
||||
}
|
||||
|
||||
fn akk_copula_stative(slot: Int) -> String {
|
||||
if slot == 0 { return "bašāku" } // 1sg (stative 1sg: -āku suffix)
|
||||
if slot == 1 { return "bašāta" } // 2sg (-āta suffix)
|
||||
if slot == 2 { return "bašī" } // 3sg m (unmarked base)
|
||||
if slot == 3 { return "bašiat" } // 3sg f (-at suffix)
|
||||
if slot == 4 { return "bašānu" } // 1pl (-ānu suffix)
|
||||
return "bašū" // 3pl (-ū suffix)
|
||||
}
|
||||
|
||||
fn akk_is_copula(verb: String) -> Bool {
|
||||
if str_eq(verb, "bašû") { return true }
|
||||
if str_eq(verb, "bashu") { return true }
|
||||
if str_eq(verb, "be") { return true }
|
||||
return false
|
||||
}
|
||||
|
||||
fn akk_conjugate_copula(tense: String, slot: Int) -> String {
|
||||
if str_eq(tense, "stative") { return akk_copula_stative(slot) }
|
||||
// present and perfect both fall back to present forms for bašû
|
||||
return akk_copula_present(slot)
|
||||
}
|
||||
|
||||
// ── alāku — to go ─────────────────────────────────────────────────────────────
|
||||
//
|
||||
// Irregular: present stem is illak- (not the expected alakk-).
|
||||
// Present: illak (3sg), allak (1sg), tallak (2sg), nillak (1pl), illaku (3pl)
|
||||
// Perfect: ittalk- forms (less common, use illak- + perf marker)
|
||||
// Stative: use present as proxy
|
||||
|
||||
fn akk_alaku_present(slot: Int) -> String {
|
||||
if slot == 0 { return "allak" } // 1sg
|
||||
if slot == 1 { return "tallak" } // 2sg
|
||||
if slot == 2 { return "illak" } // 3sg m
|
||||
if slot == 3 { return "tallak" } // 3sg f (same as 2sg — OB pattern)
|
||||
if slot == 4 { return "nillak" } // 1pl
|
||||
return "illaku" // 3pl
|
||||
}
|
||||
|
||||
fn akk_alaku_perfect(slot: Int) -> String {
|
||||
if slot == 0 { return "ittalak" } // 1sg
|
||||
if slot == 1 { return "tattalak" } // 2sg
|
||||
if slot == 2 { return "ittalak" } // 3sg m
|
||||
if slot == 3 { return "tattalak" } // 3sg f
|
||||
if slot == 4 { return "nittalak" } // 1pl
|
||||
return "ittalku" // 3pl
|
||||
}
|
||||
|
||||
// ── amāru — to see ────────────────────────────────────────────────────────────
|
||||
//
|
||||
// Present (immar-): immar (3sg), ammar (1sg), tammar (2sg)
|
||||
// Perfect (imtamar-): imtamar (3sg), amtamar (1sg), tamtamar (2sg)
|
||||
|
||||
fn akk_amaru_present(slot: Int) -> String {
|
||||
if slot == 0 { return "ammar" } // 1sg
|
||||
if slot == 1 { return "tammar" } // 2sg
|
||||
if slot == 2 { return "immar" } // 3sg m
|
||||
if slot == 3 { return "tammar" } // 3sg f
|
||||
if slot == 4 { return "nimmar" } // 1pl
|
||||
return "immaru" // 3pl
|
||||
}
|
||||
|
||||
fn akk_amaru_perfect(slot: Int) -> String {
|
||||
if slot == 0 { return "amtamar" } // 1sg
|
||||
if slot == 1 { return "tamtamar" } // 2sg
|
||||
if slot == 2 { return "imtamar" } // 3sg m
|
||||
if slot == 3 { return "tamtamar" } // 3sg f
|
||||
if slot == 4 { return "nimtamar" } // 1pl
|
||||
return "imtamaru" // 3pl
|
||||
}
|
||||
|
||||
fn akk_amaru_stative(slot: Int) -> String {
|
||||
// amāru stative: 3sg "amir" (the one who saw / he has seen)
|
||||
if slot == 0 { return "amrāku" }
|
||||
if slot == 1 { return "amrāta" }
|
||||
if slot == 2 { return "amir" }
|
||||
if slot == 3 { return "amrat" }
|
||||
if slot == 4 { return "amrānu" }
|
||||
return "amrū"
|
||||
}
|
||||
|
||||
// ── qabû — to say / speak ─────────────────────────────────────────────────────
|
||||
//
|
||||
// Present: iqabbi (3sg), aqabbi (1sg), taqabbi (2sg)
|
||||
// Perfect: iqtabi (3sg), aqtabi (1sg), taqtabi (2sg)
|
||||
|
||||
fn akk_qabu_present(slot: Int) -> String {
|
||||
if slot == 0 { return "aqabbi" } // 1sg
|
||||
if slot == 1 { return "taqabbi" } // 2sg
|
||||
if slot == 2 { return "iqabbi" } // 3sg m
|
||||
if slot == 3 { return "taqabbi" } // 3sg f
|
||||
if slot == 4 { return "niqabbi" } // 1pl
|
||||
return "iqabbû" // 3pl
|
||||
}
|
||||
|
||||
fn akk_qabu_perfect(slot: Int) -> String {
|
||||
if slot == 0 { return "aqtabi" } // 1sg
|
||||
if slot == 1 { return "taqtabi" } // 2sg
|
||||
if slot == 2 { return "iqtabi" } // 3sg m
|
||||
if slot == 3 { return "taqtabi" } // 3sg f
|
||||
if slot == 4 { return "niqtabi" } // 1pl
|
||||
return "iqtabû" // 3pl
|
||||
}
|
||||
|
||||
fn akk_qabu_stative(slot: Int) -> String {
|
||||
if slot == 0 { return "qabāku" }
|
||||
if slot == 1 { return "qabāta" }
|
||||
if slot == 2 { return "qabi" }
|
||||
if slot == 3 { return "qabiat" }
|
||||
if slot == 4 { return "qabānu" }
|
||||
return "qabû"
|
||||
}
|
||||
|
||||
// ── epēšu — to do / make ──────────────────────────────────────────────────────
|
||||
//
|
||||
// Present (ieppuš / eppuš): ieppuš (3sg), eppuš (1sg), teppuš (2sg)
|
||||
// Perfect: iptešu forms
|
||||
|
||||
fn akk_epesu_present(slot: Int) -> String {
|
||||
if slot == 0 { return "eppuš" } // 1sg
|
||||
if slot == 1 { return "teppuš" } // 2sg
|
||||
if slot == 2 { return "ieppuš" } // 3sg m
|
||||
if slot == 3 { return "teppuš" } // 3sg f
|
||||
if slot == 4 { return "neppuš" } // 1pl
|
||||
return "ieppušu" // 3pl
|
||||
}
|
||||
|
||||
fn akk_epesu_perfect(slot: Int) -> String {
|
||||
if slot == 0 { return "iptešu" } // 1sg (irregular: root ʿ-p-š)
|
||||
if slot == 1 { return "taptešu" } // 2sg
|
||||
if slot == 2 { return "iptešu" } // 3sg m
|
||||
if slot == 3 { return "taptešu" } // 3sg f
|
||||
if slot == 4 { return "niptešu" } // 1pl
|
||||
return "iptešū" // 3pl
|
||||
}
|
||||
|
||||
fn akk_epesu_stative(slot: Int) -> String {
|
||||
if slot == 0 { return "epšāku" }
|
||||
if slot == 1 { return "epšāta" }
|
||||
if slot == 2 { return "epuš" }
|
||||
if slot == 3 { return "epšat" }
|
||||
if slot == 4 { return "epšānu" }
|
||||
return "epšū"
|
||||
}
|
||||
|
||||
// ── Regular G-stem paradigms (iparras model) ──────────────────────────────────
|
||||
//
|
||||
// For regular verbs not in the irregular table, we apply the standard
|
||||
// OB G-stem paradigm using a caller-supplied present stem and perfect stem.
|
||||
// The stems must be pre-computed by the caller (or vocabulary layer).
|
||||
//
|
||||
// iparras (present) endings by slot:
|
||||
// 1sg: a- prefix
|
||||
// 2sg: ta- prefix
|
||||
// 3sg m: i- prefix
|
||||
// 3sg f: ta- prefix (same prefix as 2sg)
|
||||
// 1pl: ni- prefix
|
||||
// 3pl: i- prefix + -ū suffix
|
||||
//
|
||||
// For the generic fallback we use "iparras" as the model template.
|
||||
|
||||
fn akk_regular_present(stem: String, slot: Int) -> String {
|
||||
// stem is the 3sg m form (i-prefix already present in conventional citation)
|
||||
// We rebuild from the bare root portion by stripping/adding prefixes.
|
||||
// Simplification: return prefixed forms using the provided present-3sg string.
|
||||
if slot == 0 { return "a" + stem } // 1sg: a + stem (strip i-, add a-)
|
||||
if slot == 1 { return "ta" + stem } // 2sg
|
||||
if slot == 2 { return "i" + stem } // 3sg m
|
||||
if slot == 3 { return "ta" + stem } // 3sg f
|
||||
if slot == 4 { return "ni" + stem } // 1pl
|
||||
return "i" + stem + "u" // 3pl: i + stem + -ū
|
||||
}
|
||||
|
||||
fn akk_regular_perfect(stem: String, slot: Int) -> String {
|
||||
// Perfect (iptaras) — uses infix -ta- after first root consonant.
|
||||
// stem here is the 3sg perfect form; we apply person endings.
|
||||
if slot == 0 { return "a" + stem } // 1sg
|
||||
if slot == 1 { return "ta" + stem } // 2sg
|
||||
if slot == 2 { return "i" + stem } // 3sg m
|
||||
if slot == 3 { return "ta" + stem } // 3sg f
|
||||
if slot == 4 { return "ni" + stem } // 1pl
|
||||
return "i" + stem + "u" // 3pl
|
||||
}
|
||||
|
||||
fn akk_regular_stative(stem: String, slot: Int) -> String {
|
||||
// Stative (paris): 3sg m has zero ending; others take person suffixes.
|
||||
if slot == 0 { return stem + "āku" } // 1sg
|
||||
if slot == 1 { return stem + "āta" } // 2sg
|
||||
if slot == 2 { return stem } // 3sg m: bare stem
|
||||
if slot == 3 { return stem + "at" } // 3sg f
|
||||
if slot == 4 { return stem + "ānu" } // 1pl
|
||||
return stem + "ū" // 3pl
|
||||
}
|
||||
|
||||
// ── Known-verb dispatcher ─────────────────────────────────────────────────────
|
||||
|
||||
fn akk_known_verb(verb: String, tense: String, slot: Int) -> String {
|
||||
// bašû — to be / exist
|
||||
if str_eq(verb, "bašû") {
|
||||
return akk_conjugate_copula(tense, slot)
|
||||
}
|
||||
if str_eq(verb, "bashu") {
|
||||
return akk_conjugate_copula(tense, slot)
|
||||
}
|
||||
|
||||
// alāku — to go
|
||||
if str_eq(verb, "alāku") {
|
||||
if str_eq(tense, "perfect") { return akk_alaku_perfect(slot) }
|
||||
if str_eq(tense, "stative") { return akk_alaku_present(slot) }
|
||||
return akk_alaku_present(slot)
|
||||
}
|
||||
if str_eq(verb, "alaku") {
|
||||
if str_eq(tense, "perfect") { return akk_alaku_perfect(slot) }
|
||||
return akk_alaku_present(slot)
|
||||
}
|
||||
|
||||
// amāru — to see
|
||||
if str_eq(verb, "amāru") {
|
||||
if str_eq(tense, "perfect") { return akk_amaru_perfect(slot) }
|
||||
if str_eq(tense, "stative") { return akk_amaru_stative(slot) }
|
||||
return akk_amaru_present(slot)
|
||||
}
|
||||
if str_eq(verb, "amaru") {
|
||||
if str_eq(tense, "perfect") { return akk_amaru_perfect(slot) }
|
||||
if str_eq(tense, "stative") { return akk_amaru_stative(slot) }
|
||||
return akk_amaru_present(slot)
|
||||
}
|
||||
|
||||
// qabû — to say
|
||||
if str_eq(verb, "qabû") {
|
||||
if str_eq(tense, "perfect") { return akk_qabu_perfect(slot) }
|
||||
if str_eq(tense, "stative") { return akk_qabu_stative(slot) }
|
||||
return akk_qabu_present(slot)
|
||||
}
|
||||
if str_eq(verb, "qabu") {
|
||||
if str_eq(tense, "perfect") { return akk_qabu_perfect(slot) }
|
||||
if str_eq(tense, "stative") { return akk_qabu_stative(slot) }
|
||||
return akk_qabu_present(slot)
|
||||
}
|
||||
|
||||
// epēšu — to do / make
|
||||
if str_eq(verb, "epēšu") {
|
||||
if str_eq(tense, "perfect") { return akk_epesu_perfect(slot) }
|
||||
if str_eq(tense, "stative") { return akk_epesu_stative(slot) }
|
||||
return akk_epesu_present(slot)
|
||||
}
|
||||
if str_eq(verb, "epesu") {
|
||||
if str_eq(tense, "perfect") { return akk_epesu_perfect(slot) }
|
||||
if str_eq(tense, "stative") { return akk_epesu_stative(slot) }
|
||||
return akk_epesu_present(slot)
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// ── Main conjugation entry point ──────────────────────────────────────────────
|
||||
//
|
||||
// akk_conjugate: conjugate an Akkadian verb (G-stem).
|
||||
//
|
||||
// verb: G-stem infinitive (transliterated, e.g. "alāku", "amāru")
|
||||
// tense: "present" | "perfect" | "stative"
|
||||
// person: "first" | "second" | "third"
|
||||
// number: "singular" | "plural"
|
||||
//
|
||||
// Returns:
|
||||
// - Inflected form for known verbs
|
||||
// - verb unchanged as safe fallback for unknown verbs
|
||||
|
||||
fn akk_conjugate(verb: String, tense: String, person: String, number: String) -> String {
|
||||
let slot: Int = akk_slot(person, number)
|
||||
|
||||
// Copula shortcut
|
||||
if akk_is_copula(verb) {
|
||||
return akk_conjugate_copula(tense, slot)
|
||||
}
|
||||
|
||||
// Known-verb table
|
||||
let known: String = akk_known_verb(verb, tense, slot)
|
||||
if !str_eq(known, "") {
|
||||
return known
|
||||
}
|
||||
|
||||
// Unknown verb: safe fallback
|
||||
return verb
|
||||
}
|
||||
|
||||
// ── Noun declension ────────────────────────────────────────────────────────────
|
||||
//
|
||||
// akk_decline: decline an Akkadian noun for gram_case and number.
|
||||
//
|
||||
// Mimation: OB nouns bear final -m in all case endings (mimation).
|
||||
// The base noun (dictionary form) is the nominative singular with mimation.
|
||||
// We strip the nominative -um ending (if present) to obtain the bare stem,
|
||||
// then apply the requested ending.
|
||||
//
|
||||
// Masculine case endings (singular):
|
||||
// Nominative: -um
|
||||
// Accusative: -am
|
||||
// Genitive: -im
|
||||
//
|
||||
// Masculine case endings (plural):
|
||||
// Nominative: -ūtum (or -ū in construct)
|
||||
// Accusative/Genitive: -ātim (or -ī in construct)
|
||||
//
|
||||
// Feminine nouns (identified by -tum nom sg ending):
|
||||
// Sg nominative: -tum, accusative: -tam, genitive: -tim
|
||||
// Pl nominative: -ātum, genitive/accusative: -ātim
|
||||
//
|
||||
// Known irregular stems (the vocabulary layer should pass dictionary forms):
|
||||
// šarrum → stem: šarr-
|
||||
// awīlum → stem: awīl-
|
||||
// bītum → stem: bīt-
|
||||
// ilum → stem: il-
|
||||
|
||||
fn akk_strip_nom(noun: String) -> String {
|
||||
// Strip -um (masc nom sg mimation ending) to get bare stem
|
||||
if akk_str_ends(noun, "um") {
|
||||
return akk_str_drop_last(noun, 2)
|
||||
}
|
||||
// Strip -tum (fem nom sg)
|
||||
if akk_str_ends(noun, "tum") {
|
||||
return akk_str_drop_last(noun, 3)
|
||||
}
|
||||
// Already a bare stem or unusual form: return as-is
|
||||
return noun
|
||||
}
|
||||
|
||||
fn akk_is_fem(noun: String) -> Bool {
|
||||
// Feminine nouns in OB typically end in -tum (nom sg)
|
||||
if akk_str_ends(noun, "tum") { return true }
|
||||
if akk_str_ends(noun, "tam") { return true }
|
||||
if akk_str_ends(noun, "tim") { return true }
|
||||
return false
|
||||
}
|
||||
|
||||
fn akk_decline(noun: String, gram_case: String, number: String) -> String {
|
||||
let fem: Bool = akk_is_fem(noun)
|
||||
let stem: String = akk_strip_nom(noun)
|
||||
|
||||
if str_eq(number, "singular") {
|
||||
if fem {
|
||||
if str_eq(gram_case, "nom") { return stem + "tum" }
|
||||
if str_eq(gram_case, "acc") { return stem + "tam" }
|
||||
if str_eq(gram_case, "gen") { return stem + "tim" }
|
||||
return stem + "tum"
|
||||
}
|
||||
// Masculine
|
||||
if str_eq(gram_case, "nom") { return stem + "um" }
|
||||
if str_eq(gram_case, "acc") { return stem + "am" }
|
||||
if str_eq(gram_case, "gen") { return stem + "im" }
|
||||
return stem + "um"
|
||||
}
|
||||
|
||||
// Plural
|
||||
if fem {
|
||||
if str_eq(gram_case, "nom") { return stem + "ātum" }
|
||||
// acc and gen merge in the oblique plural
|
||||
return stem + "ātim"
|
||||
}
|
||||
// Masculine plural
|
||||
if str_eq(gram_case, "nom") { return stem + "ūtum" }
|
||||
return stem + "ātim"
|
||||
}
|
||||
|
||||
// ── Noun phrase ────────────────────────────────────────────────────────────────
|
||||
//
|
||||
// akk_noun_phrase: produce the surface noun phrase.
|
||||
//
|
||||
// Akkadian has no definite or indefinite article. Determination is conveyed
|
||||
// by context, word order, and the genitive construct chain (status constructus).
|
||||
// The definite parameter is accepted but has no surface effect: the declined
|
||||
// noun is returned in either case.
|
||||
//
|
||||
// noun: dictionary form (nominative singular with mimation, e.g. "šarrum")
|
||||
// gram_case: "nom" | "acc" | "gen"
|
||||
// number: "singular" | "plural"
|
||||
// definite: "true" | "false" (no surface effect in Akkadian)
|
||||
|
||||
fn akk_noun_phrase(noun: String, gram_case: String, number: String, definite: String) -> String {
|
||||
return akk_decline(noun, gram_case, number)
|
||||
}
|
||||
|
||||
// ── Canonical verb mapping ─────────────────────────────────────────────────────
|
||||
//
|
||||
// akk_map_canonical: map cross-lingual English canonical verb labels to
|
||||
// their Akkadian G-stem infinitive equivalents.
|
||||
|
||||
fn akk_map_canonical(verb: String) -> String {
|
||||
if str_eq(verb, "be") { return "bašû" }
|
||||
if str_eq(verb, "go") { return "alāku" }
|
||||
if str_eq(verb, "see") { return "amāru" }
|
||||
if str_eq(verb, "say") { return "qabû" }
|
||||
if str_eq(verb, "speak") { return "qabû" }
|
||||
if str_eq(verb, "do") { return "epēšu" }
|
||||
if str_eq(verb, "make") { return "epēšu" }
|
||||
return verb
|
||||
}
|
||||
@@ -1,31 +0,0 @@
|
||||
// auto-generated by elc --emit-header — do not edit
|
||||
extern fn akk_str_ends(s: String, suf: String) -> Bool
|
||||
extern fn akk_str_len(s: String) -> Int
|
||||
extern fn akk_str_drop_last(s: String, n: Int) -> String
|
||||
extern fn akk_slot(person: String, number: String) -> Int
|
||||
extern fn akk_slot_g(person: String, gender: String, number: String) -> Int
|
||||
extern fn akk_copula_present(slot: Int) -> String
|
||||
extern fn akk_copula_stative(slot: Int) -> String
|
||||
extern fn akk_is_copula(verb: String) -> Bool
|
||||
extern fn akk_conjugate_copula(tense: String, slot: Int) -> String
|
||||
extern fn akk_alaku_present(slot: Int) -> String
|
||||
extern fn akk_alaku_perfect(slot: Int) -> String
|
||||
extern fn akk_amaru_present(slot: Int) -> String
|
||||
extern fn akk_amaru_perfect(slot: Int) -> String
|
||||
extern fn akk_amaru_stative(slot: Int) -> String
|
||||
extern fn akk_qabu_present(slot: Int) -> String
|
||||
extern fn akk_qabu_perfect(slot: Int) -> String
|
||||
extern fn akk_qabu_stative(slot: Int) -> String
|
||||
extern fn akk_epesu_present(slot: Int) -> String
|
||||
extern fn akk_epesu_perfect(slot: Int) -> String
|
||||
extern fn akk_epesu_stative(slot: Int) -> String
|
||||
extern fn akk_regular_present(stem: String, slot: Int) -> String
|
||||
extern fn akk_regular_perfect(stem: String, slot: Int) -> String
|
||||
extern fn akk_regular_stative(stem: String, slot: Int) -> String
|
||||
extern fn akk_known_verb(verb: String, tense: String, slot: Int) -> String
|
||||
extern fn akk_conjugate(verb: String, tense: String, person: String, number: String) -> String
|
||||
extern fn akk_strip_nom(noun: String) -> String
|
||||
extern fn akk_is_fem(noun: String) -> Bool
|
||||
extern fn akk_decline(noun: String, gram_case: String, number: String) -> String
|
||||
extern fn akk_noun_phrase(noun: String, gram_case: String, number: String, definite: String) -> String
|
||||
extern fn akk_map_canonical(verb: String) -> String
|
||||
@@ -1,752 +0,0 @@
|
||||
// morphology-ang.el - Old English (Anglo-Saxon) morphology for the NLG engine.
|
||||
//
|
||||
// Implements Old English verb conjugation, noun declension, and the definite
|
||||
// article/demonstrative pronoun. Designed as a companion to morphology.el and
|
||||
// called by the engine when the language profile code is "ang".
|
||||
//
|
||||
// Language profile: code=ang, name=Old English, morph_type=fusional,
|
||||
// word_order=SOV, question_strategy=intonation, script=latin, family=germanic.
|
||||
//
|
||||
// Typology note: Old English is a synthetic Germanic language with four
|
||||
// grammatical cases (nominative, accusative, genitive, dative), three genders,
|
||||
// and strong/weak noun and verb classes. Strong verbs form their past tense by
|
||||
// internal vowel change (ablaut); weak verbs use a dental (-de/-ode) suffix.
|
||||
// Long vowels are marked with a macron (ā ē ī ō ū) and are preserved in all
|
||||
// string literals; ǣ, æ, þ, ð, and ƿ (wynn) are used where historically
|
||||
// appropriate. V2 (verb-second) word order applies in main clauses but is not
|
||||
// enforced by this module — the realizer handles constituent ordering.
|
||||
//
|
||||
// Verb conjugation covered:
|
||||
// Tenses: present, past
|
||||
// Persons: first/second/third × singular/plural (slots 0-5)
|
||||
// Classes: weak (regular -ian), strong irregular table
|
||||
// Irregulars: wesan/beon (be), habban (have), gān (go), cuman (come),
|
||||
// secgan (say), sēon (see), dōn (do), willan (want), magan (can)
|
||||
// Canonical map: "be" -> "wesan" (past) / "beon" (present)
|
||||
//
|
||||
// Noun declension covered:
|
||||
// Strong masc a-stem (cyning pattern): nom/acc -∅, gen -es, dat -e; pl -as/-a/-um
|
||||
// Strong neut a-stem (word pattern): sg same as masc; pl nom/acc -∅
|
||||
// Weak n-stem (nama pattern): sg nom -a, obl -an; pl -an/-ena/-um
|
||||
//
|
||||
// Article: simplified demonstrative/article forms for masculine, feminine,
|
||||
// neuter (se/sēo/þæt), fully declined.
|
||||
//
|
||||
// Depends on: morphology.el (str_ends_with, str_len, str_slice, str_eq)
|
||||
|
||||
// ── String helpers ─────────────────────────────────────────────────────────────
|
||||
|
||||
import "morphology.el"
|
||||
fn ang_str_ends(s: String, suf: String) -> Bool {
|
||||
return str_ends_with(s, suf)
|
||||
}
|
||||
|
||||
fn ang_str_drop_last(s: String, n: Int) -> String {
|
||||
let len: Int = str_len(s)
|
||||
if n >= len {
|
||||
return ""
|
||||
}
|
||||
return str_slice(s, 0, len - n)
|
||||
}
|
||||
|
||||
fn ang_str_last_char(s: String) -> String {
|
||||
let n: Int = str_len(s)
|
||||
if n == 0 {
|
||||
return ""
|
||||
}
|
||||
return str_slice(s, n - 1, n)
|
||||
}
|
||||
|
||||
fn ang_str_last2(s: String) -> String {
|
||||
let n: Int = str_len(s)
|
||||
if n < 2 {
|
||||
return s
|
||||
}
|
||||
return str_slice(s, n - 2, n)
|
||||
}
|
||||
|
||||
// ── Person/number slot ─────────────────────────────────────────────────────────
|
||||
//
|
||||
// Maps person × number to a 0-based index for paradigm tables.
|
||||
// 0 = 1st singular (ic)
|
||||
// 1 = 2nd singular (þū)
|
||||
// 2 = 3rd singular (hē/hēo/hit)
|
||||
// 3 = 1st plural (wē)
|
||||
// 4 = 2nd plural (gē)
|
||||
// 5 = 3rd plural (hīe)
|
||||
//
|
||||
// Old English also has a dual (wit, git) — not handled; dual falls through
|
||||
// to plural.
|
||||
|
||||
fn ang_slot(person: String, number: String) -> Int {
|
||||
if str_eq(person, "first") {
|
||||
if str_eq(number, "singular") { return 0 }
|
||||
return 3
|
||||
}
|
||||
if str_eq(person, "second") {
|
||||
if str_eq(number, "singular") { return 1 }
|
||||
return 4
|
||||
}
|
||||
// third
|
||||
if str_eq(number, "singular") { return 2 }
|
||||
return 5
|
||||
}
|
||||
|
||||
// ── Canonical verb mapping ─────────────────────────────────────────────────────
|
||||
//
|
||||
// The semantic layer may pass English canonical labels. Map to Old English
|
||||
// citation (infinitive) forms. "be" maps to "beon" for present and "wesan"
|
||||
// for past — the caller selects tense, so we map "be" to "beon" and handle
|
||||
// the past-tense wesan forms inside the conjugation function.
|
||||
|
||||
fn ang_map_canonical(verb: String) -> String {
|
||||
if str_eq(verb, "be") { return "beon" }
|
||||
if str_eq(verb, "have") { return "habban" }
|
||||
if str_eq(verb, "go") { return "gān" }
|
||||
if str_eq(verb, "come") { return "cuman" }
|
||||
if str_eq(verb, "say") { return "secgan" }
|
||||
if str_eq(verb, "see") { return "sēon" }
|
||||
if str_eq(verb, "do") { return "dōn" }
|
||||
if str_eq(verb, "want") { return "willan" }
|
||||
if str_eq(verb, "will") { return "willan" }
|
||||
if str_eq(verb, "can") { return "magan" }
|
||||
if str_eq(verb, "know") { return "witan" }
|
||||
if str_eq(verb, "give") { return "giefan" }
|
||||
if str_eq(verb, "take") { return "niman" }
|
||||
if str_eq(verb, "find") { return "findan" }
|
||||
if str_eq(verb, "make") { return "macian" }
|
||||
return verb
|
||||
}
|
||||
|
||||
// ── Irregular: wesan (to be — past tense forms) ───────────────────────────────
|
||||
//
|
||||
// Past: wæs wǣre wæs wǣron wǣron wǣron
|
||||
|
||||
fn ang_wesan_past(slot: Int) -> String {
|
||||
if slot == 0 { return "wæs" }
|
||||
if slot == 1 { return "wǣre" }
|
||||
if slot == 2 { return "wæs" }
|
||||
if slot == 3 { return "wǣron" }
|
||||
if slot == 4 { return "wǣron" }
|
||||
return "wǣron"
|
||||
}
|
||||
|
||||
// ── Irregular: beon (to be — present / habitual / future) ────────────────────
|
||||
//
|
||||
// Present: bēo bist biþ bēoþ bēoþ bēoþ
|
||||
//
|
||||
// The present indicative of "wesan" is eom/eart/is/sind — that paradigm is
|
||||
// also provided below for completeness and for callers who specifically request
|
||||
// wesan present.
|
||||
|
||||
fn ang_beon_present(slot: Int) -> String {
|
||||
if slot == 0 { return "bēo" }
|
||||
if slot == 1 { return "bist" }
|
||||
if slot == 2 { return "biþ" }
|
||||
if slot == 3 { return "bēoþ" }
|
||||
if slot == 4 { return "bēoþ" }
|
||||
return "bēoþ"
|
||||
}
|
||||
|
||||
// ── Irregular: wesan present (eom/eart/is/sind) ───────────────────────────────
|
||||
//
|
||||
// Present: eom eart is sind/sindon sind sind
|
||||
|
||||
fn ang_wesan_present(slot: Int) -> String {
|
||||
if slot == 0 { return "eom" }
|
||||
if slot == 1 { return "eart" }
|
||||
if slot == 2 { return "is" }
|
||||
if slot == 3 { return "sind" }
|
||||
if slot == 4 { return "sind" }
|
||||
return "sind"
|
||||
}
|
||||
|
||||
// ── Irregular: habban (to have) ───────────────────────────────────────────────
|
||||
//
|
||||
// Present: hæbbe hæfst hæfþ habbað habbað habbað
|
||||
// Past: hæfde hæfdest hæfde hæfdon hæfdon hæfdon
|
||||
|
||||
fn ang_habban_present(slot: Int) -> String {
|
||||
if slot == 0 { return "hæbbe" }
|
||||
if slot == 1 { return "hæfst" }
|
||||
if slot == 2 { return "hæfþ" }
|
||||
if slot == 3 { return "habbað" }
|
||||
if slot == 4 { return "habbað" }
|
||||
return "habbað"
|
||||
}
|
||||
|
||||
fn ang_habban_past(slot: Int) -> String {
|
||||
if slot == 0 { return "hæfde" }
|
||||
if slot == 1 { return "hæfdest" }
|
||||
if slot == 2 { return "hæfde" }
|
||||
if slot == 3 { return "hæfdon" }
|
||||
if slot == 4 { return "hæfdon" }
|
||||
return "hæfdon"
|
||||
}
|
||||
|
||||
// ── Irregular: gān (to go) ────────────────────────────────────────────────────
|
||||
//
|
||||
// Present: gā gǣst gǣþ gāð gāð gāð
|
||||
// Past: ēode ēodest ēode ēodon ēodon ēodon
|
||||
|
||||
fn ang_gan_present(slot: Int) -> String {
|
||||
if slot == 0 { return "gā" }
|
||||
if slot == 1 { return "gǣst" }
|
||||
if slot == 2 { return "gǣþ" }
|
||||
if slot == 3 { return "gāð" }
|
||||
if slot == 4 { return "gāð" }
|
||||
return "gāð"
|
||||
}
|
||||
|
||||
fn ang_gan_past(slot: Int) -> String {
|
||||
if slot == 0 { return "ēode" }
|
||||
if slot == 1 { return "ēodest" }
|
||||
if slot == 2 { return "ēode" }
|
||||
if slot == 3 { return "ēodon" }
|
||||
if slot == 4 { return "ēodon" }
|
||||
return "ēodon"
|
||||
}
|
||||
|
||||
// ── Irregular: cuman (to come) ────────────────────────────────────────────────
|
||||
//
|
||||
// Present: cume cymst cymþ cumað cumað cumað
|
||||
// Past: cōm cōme cōm cōmon cōmon cōmon
|
||||
|
||||
fn ang_cuman_present(slot: Int) -> String {
|
||||
if slot == 0 { return "cume" }
|
||||
if slot == 1 { return "cymst" }
|
||||
if slot == 2 { return "cymþ" }
|
||||
if slot == 3 { return "cumað" }
|
||||
if slot == 4 { return "cumað" }
|
||||
return "cumað"
|
||||
}
|
||||
|
||||
fn ang_cuman_past(slot: Int) -> String {
|
||||
if slot == 0 { return "cōm" }
|
||||
if slot == 1 { return "cōme" }
|
||||
if slot == 2 { return "cōm" }
|
||||
if slot == 3 { return "cōmon" }
|
||||
if slot == 4 { return "cōmon" }
|
||||
return "cōmon"
|
||||
}
|
||||
|
||||
// ── Irregular: secgan (to say) ────────────────────────────────────────────────
|
||||
//
|
||||
// Present: secge sagast sagað secgað secgað secgað
|
||||
// Past: sægde sægdest sægde sægdon sægdon sægdon
|
||||
|
||||
fn ang_secgan_present(slot: Int) -> String {
|
||||
if slot == 0 { return "secge" }
|
||||
if slot == 1 { return "sagast" }
|
||||
if slot == 2 { return "sagað" }
|
||||
if slot == 3 { return "secgað" }
|
||||
if slot == 4 { return "secgað" }
|
||||
return "secgað"
|
||||
}
|
||||
|
||||
fn ang_secgan_past(slot: Int) -> String {
|
||||
if slot == 0 { return "sægde" }
|
||||
if slot == 1 { return "sægdest" }
|
||||
if slot == 2 { return "sægde" }
|
||||
if slot == 3 { return "sægdon" }
|
||||
if slot == 4 { return "sægdon" }
|
||||
return "sægdon"
|
||||
}
|
||||
|
||||
// ── Irregular: sēon (to see) ──────────────────────────────────────────────────
|
||||
//
|
||||
// Present: sēo siehst siehþ sēoð sēoð sēoð
|
||||
// Past: seah sāwe seah sāwon sāwon sāwon
|
||||
|
||||
fn ang_seon_present(slot: Int) -> String {
|
||||
if slot == 0 { return "sēo" }
|
||||
if slot == 1 { return "siehst" }
|
||||
if slot == 2 { return "siehþ" }
|
||||
if slot == 3 { return "sēoð" }
|
||||
if slot == 4 { return "sēoð" }
|
||||
return "sēoð"
|
||||
}
|
||||
|
||||
fn ang_seon_past(slot: Int) -> String {
|
||||
if slot == 0 { return "seah" }
|
||||
if slot == 1 { return "sāwe" }
|
||||
if slot == 2 { return "seah" }
|
||||
if slot == 3 { return "sāwon" }
|
||||
if slot == 4 { return "sāwon" }
|
||||
return "sāwon"
|
||||
}
|
||||
|
||||
// ── Irregular: dōn (to do) ────────────────────────────────────────────────────
|
||||
//
|
||||
// Present: dō dēst dēþ dōð dōð dōð
|
||||
// Past: dyde dydest dyde dydon dydon dydon
|
||||
|
||||
fn ang_don_present(slot: Int) -> String {
|
||||
if slot == 0 { return "dō" }
|
||||
if slot == 1 { return "dēst" }
|
||||
if slot == 2 { return "dēþ" }
|
||||
if slot == 3 { return "dōð" }
|
||||
if slot == 4 { return "dōð" }
|
||||
return "dōð"
|
||||
}
|
||||
|
||||
fn ang_don_past(slot: Int) -> String {
|
||||
if slot == 0 { return "dyde" }
|
||||
if slot == 1 { return "dydest" }
|
||||
if slot == 2 { return "dyde" }
|
||||
if slot == 3 { return "dydon" }
|
||||
if slot == 4 { return "dydon" }
|
||||
return "dydon"
|
||||
}
|
||||
|
||||
// ── Irregular: willan (to want / will) ────────────────────────────────────────
|
||||
//
|
||||
// Present: wille wilt wile willað willað willað
|
||||
// Past: wolde woldest wolde woldon woldon woldon
|
||||
|
||||
fn ang_willan_present(slot: Int) -> String {
|
||||
if slot == 0 { return "wille" }
|
||||
if slot == 1 { return "wilt" }
|
||||
if slot == 2 { return "wile" }
|
||||
if slot == 3 { return "willað" }
|
||||
if slot == 4 { return "willað" }
|
||||
return "willað"
|
||||
}
|
||||
|
||||
fn ang_willan_past(slot: Int) -> String {
|
||||
if slot == 0 { return "wolde" }
|
||||
if slot == 1 { return "woldest" }
|
||||
if slot == 2 { return "wolde" }
|
||||
if slot == 3 { return "woldon" }
|
||||
if slot == 4 { return "woldon" }
|
||||
return "woldon"
|
||||
}
|
||||
|
||||
// ── Irregular: magan (to be able / can) ──────────────────────────────────────
|
||||
//
|
||||
// Present: mæg meaht mæg magon magon magon
|
||||
// Past: meahte meahtest meahte meahton meahton meahton
|
||||
|
||||
fn ang_magan_present(slot: Int) -> String {
|
||||
if slot == 0 { return "mæg" }
|
||||
if slot == 1 { return "meaht" }
|
||||
if slot == 2 { return "mæg" }
|
||||
if slot == 3 { return "magon" }
|
||||
if slot == 4 { return "magon" }
|
||||
return "magon"
|
||||
}
|
||||
|
||||
fn ang_magan_past(slot: Int) -> String {
|
||||
if slot == 0 { return "meahte" }
|
||||
if slot == 1 { return "meahtest" }
|
||||
if slot == 2 { return "meahte" }
|
||||
if slot == 3 { return "meahton" }
|
||||
if slot == 4 { return "meahton" }
|
||||
return "meahton"
|
||||
}
|
||||
|
||||
// ── Irregular: witan (to know) ────────────────────────────────────────────────
|
||||
//
|
||||
// Present: wāt wāst wāt witon witon witon
|
||||
// Past: wisse/wiste wissest wisse wisson wisson wisson
|
||||
|
||||
fn ang_witan_present(slot: Int) -> String {
|
||||
if slot == 0 { return "wāt" }
|
||||
if slot == 1 { return "wāst" }
|
||||
if slot == 2 { return "wāt" }
|
||||
if slot == 3 { return "witon" }
|
||||
if slot == 4 { return "witon" }
|
||||
return "witon"
|
||||
}
|
||||
|
||||
fn ang_witan_past(slot: Int) -> String {
|
||||
if slot == 0 { return "wisse" }
|
||||
if slot == 1 { return "wissest" }
|
||||
if slot == 2 { return "wisse" }
|
||||
if slot == 3 { return "wisson" }
|
||||
if slot == 4 { return "wisson" }
|
||||
return "wisson"
|
||||
}
|
||||
|
||||
// ── Weak verb: present-tense endings ─────────────────────────────────────────
|
||||
//
|
||||
// Weak verbs with -ian infinitives form their present tense as:
|
||||
// stem + -e, -est, -eþ, -aþ, -aþ, -aþ
|
||||
//
|
||||
// The stem is the infinitive with -ian stripped (or -an for class-2 verbs).
|
||||
|
||||
fn ang_weak_present_ending(slot: Int) -> String {
|
||||
if slot == 0 { return "e" }
|
||||
if slot == 1 { return "est" }
|
||||
if slot == 2 { return "eþ" }
|
||||
if slot == 3 { return "aþ" }
|
||||
if slot == 4 { return "aþ" }
|
||||
return "aþ"
|
||||
}
|
||||
|
||||
// ── Weak verb: past-tense ending selection ────────────────────────────────────
|
||||
//
|
||||
// Class 1 (-ian with short stem): past -ede (e.g. nerian -> nerede)
|
||||
// Class 2 (-ian with long/heavy stem): past -ode (e.g. macian -> macode)
|
||||
// Class 3 (-ian, small group): past -de (e.g. habban -> hæfde — irregular)
|
||||
//
|
||||
// Heuristic: if the stem length is 1 char, use -ede; otherwise use -ode.
|
||||
// This is a simplification; correct assignment requires lexical class marking.
|
||||
//
|
||||
// For the past, all persons in the plural share -on, and all singulars share
|
||||
// the same dental-suffixed stem.
|
||||
|
||||
fn ang_weak_past_stem(stem: String) -> String {
|
||||
let slen: Int = str_len(stem)
|
||||
if slen <= 2 {
|
||||
return stem + "ede"
|
||||
}
|
||||
return stem + "ode"
|
||||
}
|
||||
|
||||
fn ang_weak_past(stem: String, slot: Int) -> String {
|
||||
let pstem: String = ang_weak_past_stem(stem)
|
||||
if slot == 0 { return pstem }
|
||||
if slot == 1 { return pstem + "st" }
|
||||
if slot == 2 { return pstem }
|
||||
if slot == 3 { return ang_str_drop_last(pstem, 1) + "on" }
|
||||
if slot == 4 { return ang_str_drop_last(pstem, 1) + "on" }
|
||||
return ang_str_drop_last(pstem, 1) + "on"
|
||||
}
|
||||
|
||||
// ── Stem extraction for weak verbs ────────────────────────────────────────────
|
||||
//
|
||||
// Strip the infinitive ending to recover the stem:
|
||||
// -ian -> strip 3 chars (nerian -> ner-, macian -> mac-)
|
||||
// -an -> strip 2 chars (habban -> habb-; fallback for non -ian)
|
||||
// otherwise: return as-is
|
||||
|
||||
fn ang_weak_stem(verb: String) -> String {
|
||||
if ang_str_ends(verb, "ian") {
|
||||
return ang_str_drop_last(verb, 3)
|
||||
}
|
||||
if ang_str_ends(verb, "an") {
|
||||
return ang_str_drop_last(verb, 2)
|
||||
}
|
||||
return verb
|
||||
}
|
||||
|
||||
// ── ang_conjugate: main conjugation entry point ───────────────────────────────
|
||||
//
|
||||
// verb: Old English infinitive or English canonical label
|
||||
// tense: "present" | "past"
|
||||
// person: "first" | "second" | "third"
|
||||
// number: "singular" | "plural"
|
||||
//
|
||||
// Strategy:
|
||||
// 1. Map canonical English labels to OE verbs.
|
||||
// 2. Check the full irregular table.
|
||||
// 3. Fall back to weak conjugation for unknown -ian/-an verbs.
|
||||
// 4. Return the base form if nothing matches.
|
||||
|
||||
fn ang_conjugate(verb: String, tense: String, person: String, number: String) -> String {
|
||||
let v: String = ang_map_canonical(verb)
|
||||
let slot: Int = ang_slot(person, number)
|
||||
|
||||
// ── Irregulars ────────────────────────────────────────────────────────────
|
||||
|
||||
// beon: present-tense "be" (habitual/future/general)
|
||||
if str_eq(v, "beon") {
|
||||
if str_eq(tense, "present") { return ang_beon_present(slot) }
|
||||
// past: use wesan past forms
|
||||
return ang_wesan_past(slot)
|
||||
}
|
||||
|
||||
// wesan: past "be" and present "be" (existential/stative)
|
||||
if str_eq(v, "wesan") {
|
||||
if str_eq(tense, "present") { return ang_wesan_present(slot) }
|
||||
return ang_wesan_past(slot)
|
||||
}
|
||||
|
||||
if str_eq(v, "habban") {
|
||||
if str_eq(tense, "present") { return ang_habban_present(slot) }
|
||||
return ang_habban_past(slot)
|
||||
}
|
||||
|
||||
if str_eq(v, "gān") {
|
||||
if str_eq(tense, "present") { return ang_gan_present(slot) }
|
||||
return ang_gan_past(slot)
|
||||
}
|
||||
|
||||
if str_eq(v, "cuman") {
|
||||
if str_eq(tense, "present") { return ang_cuman_present(slot) }
|
||||
return ang_cuman_past(slot)
|
||||
}
|
||||
|
||||
if str_eq(v, "secgan") {
|
||||
if str_eq(tense, "present") { return ang_secgan_present(slot) }
|
||||
return ang_secgan_past(slot)
|
||||
}
|
||||
|
||||
if str_eq(v, "sēon") {
|
||||
if str_eq(tense, "present") { return ang_seon_present(slot) }
|
||||
return ang_seon_past(slot)
|
||||
}
|
||||
|
||||
if str_eq(v, "dōn") {
|
||||
if str_eq(tense, "present") { return ang_don_present(slot) }
|
||||
return ang_don_past(slot)
|
||||
}
|
||||
|
||||
if str_eq(v, "willan") {
|
||||
if str_eq(tense, "present") { return ang_willan_present(slot) }
|
||||
return ang_willan_past(slot)
|
||||
}
|
||||
|
||||
if str_eq(v, "magan") {
|
||||
if str_eq(tense, "present") { return ang_magan_present(slot) }
|
||||
return ang_magan_past(slot)
|
||||
}
|
||||
|
||||
if str_eq(v, "witan") {
|
||||
if str_eq(tense, "present") { return ang_witan_present(slot) }
|
||||
return ang_witan_past(slot)
|
||||
}
|
||||
|
||||
// ── Regular weak conjugation ──────────────────────────────────────────────
|
||||
|
||||
let stem: String = ang_weak_stem(v)
|
||||
|
||||
if str_eq(tense, "present") {
|
||||
return stem + ang_weak_present_ending(slot)
|
||||
}
|
||||
|
||||
if str_eq(tense, "past") {
|
||||
return ang_weak_past(stem, slot)
|
||||
}
|
||||
|
||||
// Unknown tense: return infinitive
|
||||
return v
|
||||
}
|
||||
|
||||
// ── Noun declension class detection ───────────────────────────────────────────
|
||||
//
|
||||
// Infer the declension class from the nominative singular form and an optional
|
||||
// gender hint. Without a full lexicon, ending-based heuristics are used:
|
||||
//
|
||||
// ends in -a -> weak n-stem (nama pattern)
|
||||
// ends in -e (long) -> may be various; default to strong masc a-stem
|
||||
// any other ending -> strong a-stem; gender distinguishes masc vs neut
|
||||
//
|
||||
// The caller may pass gender as a hint:
|
||||
// "masculine" | "feminine" | "neuter" | "" (empty = infer)
|
||||
//
|
||||
// For simplicity this module handles three paradigms:
|
||||
// "strong_masc" — a-stem masculine (cyning, mann)
|
||||
// "strong_neut" — a-stem neuter (word, scip)
|
||||
// "weak" — n-stem (nama, ēage)
|
||||
|
||||
fn ang_declension(noun: String, gender: String) -> String {
|
||||
if ang_str_ends(noun, "a") { return "weak" }
|
||||
if str_eq(gender, "neuter") { return "strong_neut" }
|
||||
return "strong_masc"
|
||||
}
|
||||
|
||||
// ── Strong masculine a-stem (cyning pattern) ──────────────────────────────────
|
||||
//
|
||||
// Stem: the noun as given (nom sg lacks an inflectional ending in this class).
|
||||
//
|
||||
// Singular: nom -∅ acc -∅ gen -es dat -e
|
||||
// Plural: nom -as acc -as gen -a dat -um
|
||||
|
||||
fn ang_decline_strong_masc(noun: String, gram_case: String, number: String) -> String {
|
||||
if str_eq(number, "singular") {
|
||||
if str_eq(gram_case, "nominative") { return noun }
|
||||
if str_eq(gram_case, "accusative") { return noun }
|
||||
if str_eq(gram_case, "genitive") { return noun + "es" }
|
||||
if str_eq(gram_case, "dative") { return noun + "e" }
|
||||
return noun
|
||||
}
|
||||
// plural
|
||||
if str_eq(gram_case, "nominative") { return noun + "as" }
|
||||
if str_eq(gram_case, "accusative") { return noun + "as" }
|
||||
if str_eq(gram_case, "genitive") { return noun + "a" }
|
||||
if str_eq(gram_case, "dative") { return noun + "um" }
|
||||
return noun + "as"
|
||||
}
|
||||
|
||||
// ── Strong neuter a-stem (word pattern) ───────────────────────────────────────
|
||||
//
|
||||
// Singular: same as strong masc
|
||||
// Plural: nom/acc -∅ gen -a dat -um
|
||||
|
||||
fn ang_decline_strong_neut(noun: String, gram_case: String, number: String) -> String {
|
||||
if str_eq(number, "singular") {
|
||||
if str_eq(gram_case, "nominative") { return noun }
|
||||
if str_eq(gram_case, "accusative") { return noun }
|
||||
if str_eq(gram_case, "genitive") { return noun + "es" }
|
||||
if str_eq(gram_case, "dative") { return noun + "e" }
|
||||
return noun
|
||||
}
|
||||
// plural: neuters have zero ending in nom/acc
|
||||
if str_eq(gram_case, "nominative") { return noun }
|
||||
if str_eq(gram_case, "accusative") { return noun }
|
||||
if str_eq(gram_case, "genitive") { return noun + "a" }
|
||||
if str_eq(gram_case, "dative") { return noun + "um" }
|
||||
return noun
|
||||
}
|
||||
|
||||
// ── Weak n-stem (nama pattern) ────────────────────────────────────────────────
|
||||
//
|
||||
// The nom sg ends in -a; the oblique stem is formed by stripping -a and adding
|
||||
// -an. Plural genitive is -ena.
|
||||
//
|
||||
// Singular: nom -a acc -an gen -an dat -an
|
||||
// Plural: nom -an acc -an gen -ena dat -um
|
||||
|
||||
fn ang_decline_weak(noun: String, gram_case: String, number: String) -> String {
|
||||
// Oblique stem: strip the final -a
|
||||
let stem: String = ang_str_drop_last(noun, 1)
|
||||
|
||||
if str_eq(number, "singular") {
|
||||
if str_eq(gram_case, "nominative") { return noun }
|
||||
if str_eq(gram_case, "accusative") { return stem + "an" }
|
||||
if str_eq(gram_case, "genitive") { return stem + "an" }
|
||||
if str_eq(gram_case, "dative") { return stem + "an" }
|
||||
return noun
|
||||
}
|
||||
// plural
|
||||
if str_eq(gram_case, "nominative") { return stem + "an" }
|
||||
if str_eq(gram_case, "accusative") { return stem + "an" }
|
||||
if str_eq(gram_case, "genitive") { return stem + "ena" }
|
||||
if str_eq(gram_case, "dative") { return stem + "um" }
|
||||
return stem + "an"
|
||||
}
|
||||
|
||||
// ── ang_decline: main declension entry point ──────────────────────────────────
|
||||
//
|
||||
// noun: nominative singular Old English noun (e.g. "cyning", "word", "nama")
|
||||
// gram_case: "nominative" | "accusative" | "genitive" | "dative"
|
||||
// number: "singular" | "plural"
|
||||
// gender: "masculine" | "neuter" | "feminine" | "" (empty triggers inference)
|
||||
//
|
||||
// Returns the inflected form. Falls back to the nominative singular for any
|
||||
// unrecognised combination.
|
||||
|
||||
fn ang_decline(noun: String, gram_case: String, number: String, gender: String) -> String {
|
||||
let decl: String = ang_declension(noun, gender)
|
||||
|
||||
if str_eq(decl, "strong_masc") {
|
||||
return ang_decline_strong_masc(noun, gram_case, number)
|
||||
}
|
||||
|
||||
if str_eq(decl, "strong_neut") {
|
||||
return ang_decline_strong_neut(noun, gram_case, number)
|
||||
}
|
||||
|
||||
if str_eq(decl, "weak") {
|
||||
return ang_decline_weak(noun, gram_case, number)
|
||||
}
|
||||
|
||||
// Unknown: return nominative unchanged
|
||||
return noun
|
||||
}
|
||||
|
||||
// ── Definite article / demonstrative: se/sēo/þæt ─────────────────────────────
|
||||
//
|
||||
// Old English used the demonstrative pronoun se/sēo/þæt as a definite article.
|
||||
// The full paradigm (gender × case × number) is given below.
|
||||
//
|
||||
// Masculine:
|
||||
// sg: nom se acc þone gen þæs dat þǣm
|
||||
// pl: nom þā acc þā gen þāra dat þǣm
|
||||
//
|
||||
// Feminine:
|
||||
// sg: nom sēo acc þā gen þǣre dat þǣre
|
||||
// pl: nom þā acc þā gen þāra dat þǣm
|
||||
//
|
||||
// Neuter:
|
||||
// sg: nom þæt acc þæt gen þæs dat þǣm
|
||||
// pl: nom þā acc þā gen þāra dat þǣm
|
||||
|
||||
fn ang_article_masculine(gram_case: String, number: String) -> String {
|
||||
if str_eq(number, "singular") {
|
||||
if str_eq(gram_case, "nominative") { return "se" }
|
||||
if str_eq(gram_case, "accusative") { return "þone" }
|
||||
if str_eq(gram_case, "genitive") { return "þæs" }
|
||||
if str_eq(gram_case, "dative") { return "þǣm" }
|
||||
return "se"
|
||||
}
|
||||
// plural
|
||||
if str_eq(gram_case, "nominative") { return "þā" }
|
||||
if str_eq(gram_case, "accusative") { return "þā" }
|
||||
if str_eq(gram_case, "genitive") { return "þāra" }
|
||||
if str_eq(gram_case, "dative") { return "þǣm" }
|
||||
return "þā"
|
||||
}
|
||||
|
||||
fn ang_article_feminine(gram_case: String, number: String) -> String {
|
||||
if str_eq(number, "singular") {
|
||||
if str_eq(gram_case, "nominative") { return "sēo" }
|
||||
if str_eq(gram_case, "accusative") { return "þā" }
|
||||
if str_eq(gram_case, "genitive") { return "þǣre" }
|
||||
if str_eq(gram_case, "dative") { return "þǣre" }
|
||||
return "sēo"
|
||||
}
|
||||
// plural
|
||||
if str_eq(gram_case, "nominative") { return "þā" }
|
||||
if str_eq(gram_case, "accusative") { return "þā" }
|
||||
if str_eq(gram_case, "genitive") { return "þāra" }
|
||||
if str_eq(gram_case, "dative") { return "þǣm" }
|
||||
return "þā"
|
||||
}
|
||||
|
||||
fn ang_article_neuter(gram_case: String, number: String) -> String {
|
||||
if str_eq(number, "singular") {
|
||||
if str_eq(gram_case, "nominative") { return "þæt" }
|
||||
if str_eq(gram_case, "accusative") { return "þæt" }
|
||||
if str_eq(gram_case, "genitive") { return "þæs" }
|
||||
if str_eq(gram_case, "dative") { return "þǣm" }
|
||||
return "þæt"
|
||||
}
|
||||
// plural
|
||||
if str_eq(gram_case, "nominative") { return "þā" }
|
||||
if str_eq(gram_case, "accusative") { return "þā" }
|
||||
if str_eq(gram_case, "genitive") { return "þāra" }
|
||||
if str_eq(gram_case, "dative") { return "þǣm" }
|
||||
return "þā"
|
||||
}
|
||||
|
||||
fn ang_article(gender: String, gram_case: String, number: String) -> String {
|
||||
if str_eq(gender, "masculine") { return ang_article_masculine(gram_case, number) }
|
||||
if str_eq(gender, "feminine") { return ang_article_feminine(gram_case, number) }
|
||||
// neuter
|
||||
return ang_article_neuter(gram_case, number)
|
||||
}
|
||||
|
||||
// ── Gender inference from noun form ───────────────────────────────────────────
|
||||
//
|
||||
// A last-resort heuristic when the caller provides no gender hint.
|
||||
// -a ending strongly suggests weak masculine or neuter (but most -a nouns are
|
||||
// masculine weak). Without a full lexicon, masculine is the safe default.
|
||||
|
||||
fn ang_infer_gender(noun: String) -> String {
|
||||
if ang_str_ends(noun, "u") { return "feminine" }
|
||||
if ang_str_ends(noun, "e") { return "feminine" }
|
||||
return "masculine"
|
||||
}
|
||||
|
||||
// ── ang_noun_phrase: noun phrase builder ──────────────────────────────────────
|
||||
//
|
||||
// Produces a declined noun with optional definite article (demonstrative)
|
||||
// prepended. When gender is empty ("") it is inferred from the noun form.
|
||||
//
|
||||
// noun: nominative singular Old English noun
|
||||
// gram_case: "nominative" | "accusative" | "genitive" | "dative"
|
||||
// number: "singular" | "plural"
|
||||
// definite: "true" | "false"
|
||||
|
||||
fn ang_noun_phrase(noun: String, gram_case: String, number: String, definite: String) -> String {
|
||||
let gender: String = ang_infer_gender(noun)
|
||||
let declined: String = ang_decline(noun, gram_case, number, gender)
|
||||
if str_eq(definite, "true") {
|
||||
let art: String = ang_article(gender, gram_case, number)
|
||||
return art + " " + declined
|
||||
}
|
||||
return declined
|
||||
}
|
||||
@@ -1,44 +0,0 @@
|
||||
// auto-generated by elc --emit-header — do not edit
|
||||
extern fn ang_str_ends(s: String, suf: String) -> Bool
|
||||
extern fn ang_str_drop_last(s: String, n: Int) -> String
|
||||
extern fn ang_str_last_char(s: String) -> String
|
||||
extern fn ang_str_last2(s: String) -> String
|
||||
extern fn ang_slot(person: String, number: String) -> Int
|
||||
extern fn ang_map_canonical(verb: String) -> String
|
||||
extern fn ang_wesan_past(slot: Int) -> String
|
||||
extern fn ang_beon_present(slot: Int) -> String
|
||||
extern fn ang_wesan_present(slot: Int) -> String
|
||||
extern fn ang_habban_present(slot: Int) -> String
|
||||
extern fn ang_habban_past(slot: Int) -> String
|
||||
extern fn ang_gan_present(slot: Int) -> String
|
||||
extern fn ang_gan_past(slot: Int) -> String
|
||||
extern fn ang_cuman_present(slot: Int) -> String
|
||||
extern fn ang_cuman_past(slot: Int) -> String
|
||||
extern fn ang_secgan_present(slot: Int) -> String
|
||||
extern fn ang_secgan_past(slot: Int) -> String
|
||||
extern fn ang_seon_present(slot: Int) -> String
|
||||
extern fn ang_seon_past(slot: Int) -> String
|
||||
extern fn ang_don_present(slot: Int) -> String
|
||||
extern fn ang_don_past(slot: Int) -> String
|
||||
extern fn ang_willan_present(slot: Int) -> String
|
||||
extern fn ang_willan_past(slot: Int) -> String
|
||||
extern fn ang_magan_present(slot: Int) -> String
|
||||
extern fn ang_magan_past(slot: Int) -> String
|
||||
extern fn ang_witan_present(slot: Int) -> String
|
||||
extern fn ang_witan_past(slot: Int) -> String
|
||||
extern fn ang_weak_present_ending(slot: Int) -> String
|
||||
extern fn ang_weak_past_stem(stem: String) -> String
|
||||
extern fn ang_weak_past(stem: String, slot: Int) -> String
|
||||
extern fn ang_weak_stem(verb: String) -> String
|
||||
extern fn ang_conjugate(verb: String, tense: String, person: String, number: String) -> String
|
||||
extern fn ang_declension(noun: String, gender: String) -> String
|
||||
extern fn ang_decline_strong_masc(noun: String, gram_case: String, number: String) -> String
|
||||
extern fn ang_decline_strong_neut(noun: String, gram_case: String, number: String) -> String
|
||||
extern fn ang_decline_weak(noun: String, gram_case: String, number: String) -> String
|
||||
extern fn ang_decline(noun: String, gram_case: String, number: String, gender: String) -> String
|
||||
extern fn ang_article_masculine(gram_case: String, number: String) -> String
|
||||
extern fn ang_article_feminine(gram_case: String, number: String) -> String
|
||||
extern fn ang_article_neuter(gram_case: String, number: String) -> String
|
||||
extern fn ang_article(gender: String, gram_case: String, number: String) -> String
|
||||
extern fn ang_infer_gender(noun: String) -> String
|
||||
extern fn ang_noun_phrase(noun: String, gram_case: String, number: String, definite: String) -> String
|
||||
@@ -1,729 +0,0 @@
|
||||
// morphology-ar.el - Arabic morphology for the NLG engine.
|
||||
//
|
||||
// Implements Arabic verb conjugation, noun inflection (gram_case, gender, number,
|
||||
// definiteness), and definite-article attachment with sun/moon letter handling.
|
||||
//
|
||||
// Arabic is a Semitic language with a trilateral root system: most words derive
|
||||
// from 3-consonant roots by inserting vowel patterns (أوزان awzan) around the
|
||||
// root consonants. Verb conjugation is realised as prefix + stem + suffix.
|
||||
//
|
||||
// Strategy: the engine takes the 3ms perfect (past tense) form as the canonical
|
||||
// dictionary key (e.g. كَتَبَ kataba) and applies affix patterns to derive all
|
||||
// other conjugated forms for Form I (الفعل المجرد) regular verbs. A lookup
|
||||
// table covers essential irregular and hollow verbs.
|
||||
//
|
||||
// Verb tenses covered: "past" (perfect/الماضي), "present" (imperfect/المضارع),
|
||||
// "future" (سَيَفْعَلُ = sa- + imperfect).
|
||||
// Persons: first/second/third × masculine/feminine × singular/plural (+ dual stubs).
|
||||
// Gender params: "m" (masculine) | "f" (feminine).
|
||||
//
|
||||
// Depends on: morphology.el (str_ends_with, str_len, str_slice, str_eq, str_drop_last concept)
|
||||
|
||||
// ── String helpers ────────────────────────────────────────────────────────────
|
||||
|
||||
import "morphology.el"
|
||||
fn ar_str_ends(s: String, suf: String) -> Bool {
|
||||
return str_ends_with(s, suf)
|
||||
}
|
||||
|
||||
fn ar_str_len(s: String) -> Int {
|
||||
return str_len(s)
|
||||
}
|
||||
|
||||
fn ar_str_drop_last(s: String, n: Int) -> String {
|
||||
let len: Int = str_len(s)
|
||||
if n >= len {
|
||||
return ""
|
||||
}
|
||||
return str_slice(s, 0, len - n)
|
||||
}
|
||||
|
||||
fn ar_str_last_char(s: String) -> String {
|
||||
let n: Int = str_len(s)
|
||||
if n == 0 {
|
||||
return ""
|
||||
}
|
||||
return str_slice(s, n - 1, n)
|
||||
}
|
||||
|
||||
// ── Slot index ────────────────────────────────────────────────────────────────
|
||||
//
|
||||
// Maps person × gender × number to a 0-based slot for table lookups.
|
||||
// Slot layout (10 cells, matching classical Arabic conjugation paradigm):
|
||||
// 0 = 3ms (he)
|
||||
// 1 = 3fs (she)
|
||||
// 2 = 2ms (you m sg)
|
||||
// 3 = 2fs (you f sg)
|
||||
// 4 = 1s (I)
|
||||
// 5 = 3mp (they m pl)
|
||||
// 6 = 3fp (they f pl)
|
||||
// 7 = 2mp (you m pl)
|
||||
// 8 = 2fp (you f pl)
|
||||
// 9 = 1p (we)
|
||||
|
||||
fn ar_slot(person: String, gender: String, number: String) -> Int {
|
||||
if str_eq(person, "third") {
|
||||
if str_eq(number, "singular") {
|
||||
if str_eq(gender, "f") { return 1 }
|
||||
return 0
|
||||
}
|
||||
// plural
|
||||
if str_eq(gender, "f") { return 6 }
|
||||
return 5
|
||||
}
|
||||
if str_eq(person, "second") {
|
||||
if str_eq(number, "singular") {
|
||||
if str_eq(gender, "f") { return 3 }
|
||||
return 2
|
||||
}
|
||||
// plural
|
||||
if str_eq(gender, "f") { return 8 }
|
||||
return 7
|
||||
}
|
||||
// first
|
||||
if str_eq(number, "plural") { return 9 }
|
||||
return 4
|
||||
}
|
||||
|
||||
// ── Perfect (past) suffixes ───────────────────────────────────────────────────
|
||||
//
|
||||
// Form I perfect: root-past-stem (e.g. كَتَبَ kataba) + suffix.
|
||||
// The 3ms form IS the base (no suffix added). All other persons add a suffix
|
||||
// that replaces or follows the final short vowel of the base.
|
||||
//
|
||||
// Pattern (dropping the final -a of the 3ms base, then adding):
|
||||
// 3ms: -a (base as given)
|
||||
// 3fs: -at
|
||||
// 2ms: -ta
|
||||
// 2fs: -ti
|
||||
// 1s: -tu
|
||||
// 3mp: -uu
|
||||
// 3fp: -na
|
||||
// 2mp: -tum
|
||||
// 2fp: -tunna
|
||||
// 1p: -naa
|
||||
//
|
||||
// The base passed to ar_conjugate_form1 is the full 3ms form (ends in -a).
|
||||
// For suffixed forms we drop the final vowel character (1 byte = the -a) then
|
||||
// apply the suffix. In Arabic script the final short vowel (fatha ـَ) on the
|
||||
// last consonant of the base is part of the grapheme cluster of that consonant;
|
||||
// for our stored strings the form كَتَبَ is stored with the final fatha attached
|
||||
// to the ب. The suffix strings already include the vowel that replaces it, so
|
||||
// we drop 1 character from the base.
|
||||
//
|
||||
// For simplicity the suffixes below are given as Arabic transliteration that
|
||||
// the El string system handles as UTF-8. The actual Arabic forms are stored
|
||||
// as UTF-8 Arabic script literals.
|
||||
//
|
||||
// Returns the suffix string (including the vowel carried on the junction
|
||||
// consonant for suffixed forms). Returns "" for 3ms (base is the full form).
|
||||
|
||||
fn ar_perfect_suffix(slot: Int) -> String {
|
||||
if slot == 0 { return "" } // 3ms: base is already complete
|
||||
if slot == 1 { return "ت" } // 3fs: -at (تْ taa saakina)
|
||||
if slot == 2 { return "تَ" } // 2ms: -ta
|
||||
if slot == 3 { return "تِ" } // 2fs: -ti
|
||||
if slot == 4 { return "تُ" } // 1s: -tu
|
||||
if slot == 5 { return "وا" } // 3mp: -uu (واو + alif farika)
|
||||
if slot == 6 { return "نَ" } // 3fp: -na
|
||||
if slot == 7 { return "تُمْ" } // 2mp: -tum
|
||||
if slot == 8 { return "تُنَّ" } // 2fp: -tunna
|
||||
return "نَا" // 1p: -naa (9)
|
||||
}
|
||||
|
||||
// ── Imperfect (present) prefixes ──────────────────────────────────────────────
|
||||
//
|
||||
// Form I imperfect: prefix + middle vowel pattern + suffix.
|
||||
// Prefix depends on person (and for 1s the prefix is أَ).
|
||||
|
||||
fn ar_imperfect_prefix(slot: Int) -> String {
|
||||
if slot == 0 { return "يَ" } // 3ms: ya-
|
||||
if slot == 1 { return "تَ" } // 3fs: ta-
|
||||
if slot == 2 { return "تَ" } // 2ms: ta-
|
||||
if slot == 3 { return "تَ" } // 2fs: ta-
|
||||
if slot == 4 { return "أَ" } // 1s: a-
|
||||
if slot == 5 { return "يَ" } // 3mp: ya-
|
||||
if slot == 6 { return "يَ" } // 3fp: ya-
|
||||
if slot == 7 { return "تَ" } // 2mp: ta-
|
||||
if slot == 8 { return "تَ" } // 2fp: ta-
|
||||
return "نَ" // 1p: na- (9)
|
||||
}
|
||||
|
||||
// ── Imperfect (present) suffixes ──────────────────────────────────────────────
|
||||
//
|
||||
// Standard Form I imperfect — yaf'ulu / yaf'alu / yaf'ilu vowel class.
|
||||
// The stem vowel is encoded in the verb's imperfect stem (stored in the lookup
|
||||
// table or derived from the base). The suffix encodes number/gender/person.
|
||||
//
|
||||
// Suffix pattern (after the u-class stem: yaktubu):
|
||||
// 3ms: -u (yaktub-u)
|
||||
// 3fs: -u (taktub-u)
|
||||
// 2ms: -u (taktub-u)
|
||||
// 2fs: -iina (taktub-iina)
|
||||
// 1s: -u (aktub-u)
|
||||
// 3mp: -uuna (yaktub-uuna)
|
||||
// 3fp: -na (yaktub-na)
|
||||
// 2mp: -uuna (taktub-uuna)
|
||||
// 2fp: -na (taktub-na)
|
||||
// 1p: -u (naktub-u)
|
||||
|
||||
fn ar_imperfect_suffix(slot: Int) -> String {
|
||||
if slot == 0 { return "ُ" } // 3ms: -u
|
||||
if slot == 1 { return "ُ" } // 3fs: -u
|
||||
if slot == 2 { return "ُ" } // 2ms: -u
|
||||
if slot == 3 { return "ِينَ" } // 2fs: -iina
|
||||
if slot == 4 { return "ُ" } // 1s: -u
|
||||
if slot == 5 { return "ُونَ" } // 3mp: -uuna
|
||||
if slot == 6 { return "نَ" } // 3fp: -na
|
||||
if slot == 7 { return "ُونَ" } // 2mp: -uuna
|
||||
if slot == 8 { return "نَ" } // 2fp: -na
|
||||
return "ُ" // 1p: -u (9)
|
||||
}
|
||||
|
||||
// ── Form I conjugation ────────────────────────────────────────────────────────
|
||||
//
|
||||
// ar_conjugate_form1: conjugate a regular Form I verb.
|
||||
//
|
||||
// past_base: the 3ms perfect form (e.g. "كَتَبَ")
|
||||
// present_stem: the imperfect stem without prefix (e.g. "كْتُبُ" for yaktubu)
|
||||
// This is the middle part after stripping the prefix: for يَكْتُبُ
|
||||
// the stem = "كْتُبُ". We strip the final -u vowel diacritic
|
||||
// (1 char) from the stem and re-add via the suffix.
|
||||
// tense: "past" | "present" | "future"
|
||||
// slot: ar_slot result
|
||||
|
||||
fn ar_conjugate_form1(past_base: String, present_stem: String, tense: String, slot: Int) -> String {
|
||||
if str_eq(tense, "past") {
|
||||
// 3ms: return base as-is
|
||||
if slot == 0 { return past_base }
|
||||
// All other forms: drop final character of base (the short -a vowel mark
|
||||
// on the last root consonant), then append the suffix.
|
||||
let suf: String = ar_perfect_suffix(slot)
|
||||
// Drop the last character (the fatha diacritic or final vowel-letter)
|
||||
let stem: String = ar_str_drop_last(past_base, 1)
|
||||
return stem + suf
|
||||
}
|
||||
|
||||
if str_eq(tense, "present") {
|
||||
let pre: String = ar_imperfect_prefix(slot)
|
||||
let suf: String = ar_imperfect_suffix(slot)
|
||||
// present_stem already includes the medial vowel pattern (e.g. "كْتُبُ")
|
||||
// Drop its final character (the -u diacritic) before adding the suffix.
|
||||
let mid: String = ar_str_drop_last(present_stem, 1)
|
||||
return pre + mid + suf
|
||||
}
|
||||
|
||||
if str_eq(tense, "future") {
|
||||
// Future = سَ (sa-) + imperfect 3ms form
|
||||
let pres_3ms: String = ar_conjugate_form1(past_base, present_stem, "present", 0)
|
||||
return "سَ" + pres_3ms
|
||||
}
|
||||
|
||||
// Unknown tense: return base form
|
||||
return past_base
|
||||
}
|
||||
|
||||
// ── Irregular verb lookup table ───────────────────────────────────────────────
|
||||
//
|
||||
// Returns the inflected form for verbs that cannot be derived by Form I rules,
|
||||
// or "" if the verb is not in the table.
|
||||
//
|
||||
// Covered verbs (by their 3ms past / dictionary key):
|
||||
// كَانَ kaana — to be (hollow verb, waw-medial)
|
||||
// ذَهَبَ dhahaba — to go (Form I, regular; explicit table for certainty)
|
||||
// جَاءَ jaa'a — to come (hamzated + defective)
|
||||
// قَالَ qaala — to say (hollow verb, waw-medial)
|
||||
// رَأَى ra'aa — to see (hamzated + defective)
|
||||
// أَكَلَ akala — to eat (hamzated initial)
|
||||
// شَرِبَ shariba — to drink (Form I i-class)
|
||||
// عَرَفَ arafa — to know (Form I a-class)
|
||||
// أَرَادَ araada — to want (Form IV hollow)
|
||||
// اِسْتَطَاعَ istata'a — can/be able (Form X)
|
||||
// فَعَلَ fa'ala — to do/act (Form I; paradigm verb)
|
||||
// أَخَذَ akhadha — to take (hamzated initial)
|
||||
// عَمِلَ amila — to work (Form I i-class)
|
||||
//
|
||||
// For each verb: [past_3ms, past_3fs, past_2ms, past_2fs, past_1s,
|
||||
// past_3mp, past_3fp, past_2mp, past_2fp, past_1p,
|
||||
// pres_3ms, pres_3fs, pres_2ms, pres_2fs, pres_1s,
|
||||
// pres_3mp, pres_3fp, pres_2mp, pres_2fp, pres_1p]
|
||||
|
||||
fn ar_irregular_kaana(slot: Int, tense: String) -> String {
|
||||
// كَانَ — to be
|
||||
if str_eq(tense, "past") {
|
||||
if slot == 0 { return "كَانَ" }
|
||||
if slot == 1 { return "كَانَتْ" }
|
||||
if slot == 2 { return "كُنْتَ" }
|
||||
if slot == 3 { return "كُنْتِ" }
|
||||
if slot == 4 { return "كُنْتُ" }
|
||||
if slot == 5 { return "كَانُوا" }
|
||||
if slot == 6 { return "كُنَّ" }
|
||||
if slot == 7 { return "كُنْتُمْ" }
|
||||
if slot == 8 { return "كُنْتُنَّ" }
|
||||
return "كُنَّا"
|
||||
}
|
||||
if str_eq(tense, "present") {
|
||||
if slot == 0 { return "يَكُونُ" }
|
||||
if slot == 1 { return "تَكُونُ" }
|
||||
if slot == 2 { return "تَكُونُ" }
|
||||
if slot == 3 { return "تَكُونِينَ" }
|
||||
if slot == 4 { return "أَكُونُ" }
|
||||
if slot == 5 { return "يَكُونُونَ" }
|
||||
if slot == 6 { return "يَكُنَّ" }
|
||||
if slot == 7 { return "تَكُونُونَ" }
|
||||
if slot == 8 { return "تَكُنَّ" }
|
||||
return "نَكُونُ"
|
||||
}
|
||||
if str_eq(tense, "future") {
|
||||
let pres: String = ar_irregular_kaana(slot, "present")
|
||||
return "سَ" + pres
|
||||
}
|
||||
return "كَانَ"
|
||||
}
|
||||
|
||||
fn ar_irregular_qaala(slot: Int, tense: String) -> String {
|
||||
// قَالَ — to say (hollow waw-medial)
|
||||
if str_eq(tense, "past") {
|
||||
if slot == 0 { return "قَالَ" }
|
||||
if slot == 1 { return "قَالَتْ" }
|
||||
if slot == 2 { return "قُلْتَ" }
|
||||
if slot == 3 { return "قُلْتِ" }
|
||||
if slot == 4 { return "قُلْتُ" }
|
||||
if slot == 5 { return "قَالُوا" }
|
||||
if slot == 6 { return "قُلْنَ" }
|
||||
if slot == 7 { return "قُلْتُمْ" }
|
||||
if slot == 8 { return "قُلْتُنَّ" }
|
||||
return "قُلْنَا"
|
||||
}
|
||||
if str_eq(tense, "present") {
|
||||
if slot == 0 { return "يَقُولُ" }
|
||||
if slot == 1 { return "تَقُولُ" }
|
||||
if slot == 2 { return "تَقُولُ" }
|
||||
if slot == 3 { return "تَقُولِينَ" }
|
||||
if slot == 4 { return "أَقُولُ" }
|
||||
if slot == 5 { return "يَقُولُونَ" }
|
||||
if slot == 6 { return "يَقُلْنَ" }
|
||||
if slot == 7 { return "تَقُولُونَ" }
|
||||
if slot == 8 { return "تَقُلْنَ" }
|
||||
return "نَقُولُ"
|
||||
}
|
||||
if str_eq(tense, "future") {
|
||||
let pres: String = ar_irregular_qaala(slot, "present")
|
||||
return "سَ" + pres
|
||||
}
|
||||
return "قَالَ"
|
||||
}
|
||||
|
||||
fn ar_irregular_jaa(slot: Int, tense: String) -> String {
|
||||
// جَاءَ — to come (hamzated defective)
|
||||
if str_eq(tense, "past") {
|
||||
if slot == 0 { return "جَاءَ" }
|
||||
if slot == 1 { return "جَاءَتْ" }
|
||||
if slot == 2 { return "جِئْتَ" }
|
||||
if slot == 3 { return "جِئْتِ" }
|
||||
if slot == 4 { return "جِئْتُ" }
|
||||
if slot == 5 { return "جَاءُوا" }
|
||||
if slot == 6 { return "جِئْنَ" }
|
||||
if slot == 7 { return "جِئْتُمْ" }
|
||||
if slot == 8 { return "جِئْتُنَّ" }
|
||||
return "جِئْنَا"
|
||||
}
|
||||
if str_eq(tense, "present") {
|
||||
if slot == 0 { return "يَجِيءُ" }
|
||||
if slot == 1 { return "تَجِيءُ" }
|
||||
if slot == 2 { return "تَجِيءُ" }
|
||||
if slot == 3 { return "تَجِيئِينَ" }
|
||||
if slot == 4 { return "أَجِيءُ" }
|
||||
if slot == 5 { return "يَجِيئُونَ" }
|
||||
if slot == 6 { return "يَجِئْنَ" }
|
||||
if slot == 7 { return "تَجِيئُونَ" }
|
||||
if slot == 8 { return "تَجِئْنَ" }
|
||||
return "نَجِيءُ"
|
||||
}
|
||||
if str_eq(tense, "future") {
|
||||
let pres: String = ar_irregular_jaa(slot, "present")
|
||||
return "سَ" + pres
|
||||
}
|
||||
return "جَاءَ"
|
||||
}
|
||||
|
||||
fn ar_irregular_raaa(slot: Int, tense: String) -> String {
|
||||
// رَأَى — to see (hamzated defective)
|
||||
if str_eq(tense, "past") {
|
||||
if slot == 0 { return "رَأَى" }
|
||||
if slot == 1 { return "رَأَتْ" }
|
||||
if slot == 2 { return "رَأَيْتَ" }
|
||||
if slot == 3 { return "رَأَيْتِ" }
|
||||
if slot == 4 { return "رَأَيْتُ" }
|
||||
if slot == 5 { return "رَأَوْا" }
|
||||
if slot == 6 { return "رَأَيْنَ" }
|
||||
if slot == 7 { return "رَأَيْتُمْ" }
|
||||
if slot == 8 { return "رَأَيْتُنَّ" }
|
||||
return "رَأَيْنَا"
|
||||
}
|
||||
if str_eq(tense, "present") {
|
||||
if slot == 0 { return "يَرَى" }
|
||||
if slot == 1 { return "تَرَى" }
|
||||
if slot == 2 { return "تَرَى" }
|
||||
if slot == 3 { return "تَرَيْنَ" }
|
||||
if slot == 4 { return "أَرَى" }
|
||||
if slot == 5 { return "يَرَوْنَ" }
|
||||
if slot == 6 { return "يَرَيْنَ" }
|
||||
if slot == 7 { return "تَرَوْنَ" }
|
||||
if slot == 8 { return "تَرَيْنَ" }
|
||||
return "نَرَى"
|
||||
}
|
||||
if str_eq(tense, "future") {
|
||||
let pres: String = ar_irregular_raaa(slot, "present")
|
||||
return "سَ" + pres
|
||||
}
|
||||
return "رَأَى"
|
||||
}
|
||||
|
||||
fn ar_irregular_araada(slot: Int, tense: String) -> String {
|
||||
// أَرَادَ — to want (Form IV hollow)
|
||||
if str_eq(tense, "past") {
|
||||
if slot == 0 { return "أَرَادَ" }
|
||||
if slot == 1 { return "أَرَادَتْ" }
|
||||
if slot == 2 { return "أَرَدْتَ" }
|
||||
if slot == 3 { return "أَرَدْتِ" }
|
||||
if slot == 4 { return "أَرَدْتُ" }
|
||||
if slot == 5 { return "أَرَادُوا" }
|
||||
if slot == 6 { return "أَرَدْنَ" }
|
||||
if slot == 7 { return "أَرَدْتُمْ" }
|
||||
if slot == 8 { return "أَرَدْتُنَّ" }
|
||||
return "أَرَدْنَا"
|
||||
}
|
||||
if str_eq(tense, "present") {
|
||||
if slot == 0 { return "يُرِيدُ" }
|
||||
if slot == 1 { return "تُرِيدُ" }
|
||||
if slot == 2 { return "تُرِيدُ" }
|
||||
if slot == 3 { return "تُرِيدِينَ" }
|
||||
if slot == 4 { return "أُرِيدُ" }
|
||||
if slot == 5 { return "يُرِيدُونَ" }
|
||||
if slot == 6 { return "يُرِدْنَ" }
|
||||
if slot == 7 { return "تُرِيدُونَ" }
|
||||
if slot == 8 { return "تُرِدْنَ" }
|
||||
return "نُرِيدُ"
|
||||
}
|
||||
if str_eq(tense, "future") {
|
||||
let pres: String = ar_irregular_araada(slot, "present")
|
||||
return "سَ" + pres
|
||||
}
|
||||
return "أَرَادَ"
|
||||
}
|
||||
|
||||
fn ar_irregular_istata(slot: Int, tense: String) -> String {
|
||||
// اِسْتَطَاعَ — can / be able (Form X hollow)
|
||||
if str_eq(tense, "past") {
|
||||
if slot == 0 { return "اِسْتَطَاعَ" }
|
||||
if slot == 1 { return "اِسْتَطَاعَتْ" }
|
||||
if slot == 2 { return "اِسْتَطَعْتَ" }
|
||||
if slot == 3 { return "اِسْتَطَعْتِ" }
|
||||
if slot == 4 { return "اِسْتَطَعْتُ" }
|
||||
if slot == 5 { return "اِسْتَطَاعُوا" }
|
||||
if slot == 6 { return "اِسْتَطَعْنَ" }
|
||||
if slot == 7 { return "اِسْتَطَعْتُمْ" }
|
||||
if slot == 8 { return "اِسْتَطَعْتُنَّ" }
|
||||
return "اِسْتَطَعْنَا"
|
||||
}
|
||||
if str_eq(tense, "present") {
|
||||
if slot == 0 { return "يَسْتَطِيعُ" }
|
||||
if slot == 1 { return "تَسْتَطِيعُ" }
|
||||
if slot == 2 { return "تَسْتَطِيعُ" }
|
||||
if slot == 3 { return "تَسْتَطِيعِينَ" }
|
||||
if slot == 4 { return "أَسْتَطِيعُ" }
|
||||
if slot == 5 { return "يَسْتَطِيعُونَ" }
|
||||
if slot == 6 { return "يَسْتَطِعْنَ" }
|
||||
if slot == 7 { return "تَسْتَطِيعُونَ" }
|
||||
if slot == 8 { return "تَسْتَطِعْنَ" }
|
||||
return "نَسْتَطِيعُ"
|
||||
}
|
||||
if str_eq(tense, "future") {
|
||||
let pres: String = ar_irregular_istata(slot, "present")
|
||||
return "سَ" + pres
|
||||
}
|
||||
return "اِسْتَطَاعَ"
|
||||
}
|
||||
|
||||
// ── Irregular verb dispatcher ─────────────────────────────────────────────────
|
||||
//
|
||||
// ar_irregular: returns the inflected form if verb is in the lookup table,
|
||||
// or "" if not found (caller should use Form I rules).
|
||||
//
|
||||
// verb: 3ms past form (dictionary key) as Arabic string
|
||||
// tense: "past" | "present" | "future"
|
||||
// slot: ar_slot result
|
||||
|
||||
fn ar_irregular(verb: String, tense: String, slot: Int) -> String {
|
||||
if str_eq(verb, "كَانَ") { return ar_irregular_kaana(slot, tense) }
|
||||
if str_eq(verb, "قَالَ") { return ar_irregular_qaala(slot, tense) }
|
||||
if str_eq(verb, "جَاءَ") { return ar_irregular_jaa(slot, tense) }
|
||||
if str_eq(verb, "رَأَى") { return ar_irregular_raaa(slot, tense) }
|
||||
if str_eq(verb, "أَرَادَ") { return ar_irregular_araada(slot, tense) }
|
||||
if str_eq(verb, "اِسْتَطَاعَ") { return ar_irregular_istata(slot, tense) }
|
||||
return ""
|
||||
}
|
||||
|
||||
// ── Regular Form I verb table ─────────────────────────────────────────────────
|
||||
//
|
||||
// For regular Form I verbs that would be correctly generated by ar_conjugate_form1
|
||||
// but whose imperfect stem must be looked up (Arabic verbs have three vowel
|
||||
// classes for the imperfect medial vowel: a, i, u — فَعَلَ/يَفْعَلُ,
|
||||
// فَعِلَ/يَفْعَلُ, فَعَلَ/يَفْعُلُ). We store the present stem for each.
|
||||
//
|
||||
// Returns present_stem (the imperfect without prefix, e.g. "كْتُبُ" for yaktubu),
|
||||
// or "" if not in table.
|
||||
|
||||
fn ar_present_stem(verb: String) -> String {
|
||||
if str_eq(verb, "كَتَبَ") { return "كْتُبُ" } // kataba -> yaktubu (u-class)
|
||||
if str_eq(verb, "ذَهَبَ") { return "ذْهَبُ" } // dhahaba -> yadhhabu (a-class)
|
||||
if str_eq(verb, "أَكَلَ") { return "أْكُلُ" } // akala -> yaakulu (u-class)
|
||||
if str_eq(verb, "شَرِبَ") { return "شْرَبُ" } // shariba -> yashrabu (a-class)
|
||||
if str_eq(verb, "عَرَفَ") { return "عْرِفُ" } // arafa -> yarifu (i-class)
|
||||
if str_eq(verb, "فَعَلَ") { return "فْعَلُ" } // fa'ala -> yaf'alu (a-class)
|
||||
if str_eq(verb, "أَخَذَ") { return "أْخُذُ" } // akhadha -> yaakhudhu (u-class)
|
||||
if str_eq(verb, "عَمِلَ") { return "عْمَلُ" } // amila -> ya'malu (a-class)
|
||||
if str_eq(verb, "دَرَسَ") { return "دْرُسُ" } // darasa -> yadrusu (u-class)
|
||||
if str_eq(verb, "فَهِمَ") { return "فْهَمُ" } // fahima -> yafhamu (a-class)
|
||||
if str_eq(verb, "سَمِعَ") { return "سْمَعُ" } // sami'a -> yasma'u (a-class)
|
||||
if str_eq(verb, "جَلَسَ") { return "جْلِسُ" } // jalasa -> yajlisu (i-class)
|
||||
if str_eq(verb, "فَتَحَ") { return "فْتَحُ" } // fataha -> yaftahu (a-class)
|
||||
if str_eq(verb, "خَرَجَ") { return "خْرُجُ" } // kharaja -> yakhruju (u-class)
|
||||
if str_eq(verb, "دَخَلَ") { return "دْخُلُ" } // dakhala -> yadkhulu (u-class)
|
||||
if str_eq(verb, "وَجَدَ") { return "جِدُ" } // wajada -> yajidu (i-class, waw-initial)
|
||||
if str_eq(verb, "صَنَعَ") { return "صْنَعُ" } // sana'a -> yasna'u (a-class)
|
||||
if str_eq(verb, "رَجَعَ") { return "رْجِعُ" } // raja'a -> yarji'u (i-class)
|
||||
if str_eq(verb, "وَقَفَ") { return "قِفُ" } // waqafa -> yaqifu (i-class, waw-initial)
|
||||
if str_eq(verb, "قَرَأَ") { return "قْرَأُ" } // qara'a -> yaqra'u (a-class)
|
||||
if str_eq(verb, "كَذَبَ") { return "كْذِبُ" } // kadhaba -> yakdhibu (i-class)
|
||||
return ""
|
||||
}
|
||||
|
||||
// ── Main conjugation dispatcher ───────────────────────────────────────────────
|
||||
//
|
||||
// ar_conjugate: conjugate an Arabic verb.
|
||||
//
|
||||
// verb: 3ms perfect form (dictionary key), e.g. "كَتَبَ"
|
||||
// tense: "past" | "present" | "future"
|
||||
// person: "first" | "second" | "third"
|
||||
// gender: "m" | "f"
|
||||
// number: "singular" | "plural"
|
||||
|
||||
fn ar_conjugate(verb: String, tense: String, person: String, gender: String, number: String) -> String {
|
||||
let slot: Int = ar_slot(person, gender, number)
|
||||
|
||||
// 1. Check irregular table
|
||||
let irreg: String = ar_irregular(verb, tense, slot)
|
||||
if !str_eq(irreg, "") {
|
||||
return irreg
|
||||
}
|
||||
|
||||
// 2. Look up present stem for regular Form I
|
||||
let present_stem: String = ar_present_stem(verb)
|
||||
if !str_eq(present_stem, "") {
|
||||
return ar_conjugate_form1(verb, present_stem, tense, slot)
|
||||
}
|
||||
|
||||
// 3. Fallback: return base form (3ms past) — unknown verb
|
||||
return verb
|
||||
}
|
||||
|
||||
// ── Definite article ──────────────────────────────────────────────────────────
|
||||
//
|
||||
// ar_definite_article: prefix ال (al-) to a noun with sun/moon letter handling.
|
||||
//
|
||||
// Sun letters (الحروف الشمسية) cause the lam of the article to assimilate to
|
||||
// the first letter of the noun. Moon letters (الحروف القمرية) do not.
|
||||
//
|
||||
// Sun letters (Unicode Arabic code points):
|
||||
// ت ث د ذ ر ز س ش ص ض ط ظ ل ن
|
||||
//
|
||||
// Moon letters (all others):
|
||||
// أ ب ج ح خ ع غ ف ق ك م ه و ي
|
||||
//
|
||||
// In Arabic orthography the assimilation is shown with a shadda on the sun letter.
|
||||
// Here we return "ال" (al-) for moon letters and the assimilated form for sun
|
||||
// letters. The noun is prefixed with the article; the article lam is replaced
|
||||
// by a shadda on the sun consonant.
|
||||
|
||||
fn ar_is_sun_letter(c: String) -> Bool {
|
||||
if str_eq(c, "ت") { return true }
|
||||
if str_eq(c, "ث") { return true }
|
||||
if str_eq(c, "د") { return true }
|
||||
if str_eq(c, "ذ") { return true }
|
||||
if str_eq(c, "ر") { return true }
|
||||
if str_eq(c, "ز") { return true }
|
||||
if str_eq(c, "س") { return true }
|
||||
if str_eq(c, "ش") { return true }
|
||||
if str_eq(c, "ص") { return true }
|
||||
if str_eq(c, "ض") { return true }
|
||||
if str_eq(c, "ط") { return true }
|
||||
if str_eq(c, "ظ") { return true }
|
||||
if str_eq(c, "ل") { return true }
|
||||
if str_eq(c, "ن") { return true }
|
||||
return false
|
||||
}
|
||||
|
||||
fn ar_definite_article(noun: String) -> String {
|
||||
// Extract first character to determine sun/moon
|
||||
let n: Int = ar_str_len(noun)
|
||||
if n == 0 {
|
||||
return noun
|
||||
}
|
||||
let first: String = str_slice(noun, 0, 1)
|
||||
if ar_is_sun_letter(first) {
|
||||
// Sun letter: article lam assimilates -> الـ + shadda on first letter
|
||||
// Written as: أَلْ + first + shadda + rest
|
||||
// We represent this as "ال" + first_with_shadda + rest_of_noun
|
||||
// The shadda diacritic (U+0651) attaches to the sun letter.
|
||||
let shadda: String = "ّ"
|
||||
let rest: String = str_slice(noun, 1, n)
|
||||
return "ال" + first + shadda + rest
|
||||
}
|
||||
// Moon letter: simple al- prefix
|
||||
return "ال" + noun
|
||||
}
|
||||
|
||||
// ── Case endings ──────────────────────────────────────────────────────────────
|
||||
//
|
||||
// ar_case_ending: return the short vowel ending for a noun given its gram_case
|
||||
// and definiteness.
|
||||
//
|
||||
// case: "nom" | "acc" | "gen"
|
||||
// definite: "true" | "false"
|
||||
//
|
||||
// Indefinite endings carry nunation (tanwin):
|
||||
// nom: -un (ٌ)
|
||||
// acc: -an (ً)
|
||||
// gen: -in (ٍ)
|
||||
//
|
||||
// Definite endings are single short vowels:
|
||||
// nom: -u (ُ)
|
||||
// acc: -a (َ)
|
||||
// gen: -i (ِ)
|
||||
|
||||
fn ar_case_ending(kase: String, definite: String) -> String {
|
||||
let is_def: Bool = str_eq(definite, "true")
|
||||
if str_eq(kase, "nom") {
|
||||
if is_def { return "ُ" }
|
||||
return "ٌ"
|
||||
}
|
||||
if str_eq(kase, "acc") {
|
||||
if is_def { return "َ" }
|
||||
return "ً"
|
||||
}
|
||||
if str_eq(kase, "gen") {
|
||||
if is_def { return "ِ" }
|
||||
return "ٍ"
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// ── Gender inference ──────────────────────────────────────────────────────────
|
||||
//
|
||||
// ar_gender: infer gender from noun form.
|
||||
// Returns "f" for nouns ending in taa marbuta (ة or ـة), otherwise "m".
|
||||
// This covers the most reliable heuristic; broken plurals and loanwords may
|
||||
// vary but are handled by explicit lookup in the Engram.
|
||||
|
||||
fn ar_gender(noun: String) -> String {
|
||||
if ar_str_ends(noun, "ة") { return "f" }
|
||||
if ar_str_ends(noun, "ـة") { return "f" }
|
||||
return "m"
|
||||
}
|
||||
|
||||
// ── Sound plurals ─────────────────────────────────────────────────────────────
|
||||
//
|
||||
// ar_sound_plural: form the sound masculine or feminine plural.
|
||||
//
|
||||
// Sound masculine plural (جمع المذكر السالم):
|
||||
// nom: -uuna (ونَ)
|
||||
// acc/gen: -iina (ينَ)
|
||||
//
|
||||
// Sound feminine plural (جمع المؤنث السالم):
|
||||
// Remove final ة (taa marbuta) if present, then add -aat (اتٌ/اتُ).
|
||||
//
|
||||
// This function returns the base plural form (without case ending) suitable
|
||||
// for passing to ar_noun_form. For masculine plural case variation, callers
|
||||
// should use ar_masc_pl_ending.
|
||||
|
||||
fn ar_masc_pl_ending(kase: String) -> String {
|
||||
if str_eq(kase, "nom") { return "ونَ" }
|
||||
// acc and gen both use -iina in sound masculine plural
|
||||
return "ينَ"
|
||||
}
|
||||
|
||||
fn ar_sound_plural(noun: String, gender: String) -> String {
|
||||
if str_eq(gender, "f") {
|
||||
// Feminine sound plural: drop ة, add ات
|
||||
if ar_str_ends(noun, "ة") {
|
||||
let base: String = ar_str_drop_last(noun, 1)
|
||||
return base + "ات"
|
||||
}
|
||||
return noun + "ات"
|
||||
}
|
||||
// Masculine sound plural (nominative form as default): -uuna
|
||||
return noun + "ون"
|
||||
}
|
||||
|
||||
// ── Full noun inflection ──────────────────────────────────────────────────────
|
||||
//
|
||||
// ar_noun_form: produce the inflected noun form.
|
||||
//
|
||||
// noun: base (singular) noun string
|
||||
// gender: "m" | "f" (pass "" to infer from noun ending)
|
||||
// kase: "nom" | "acc" | "gen" | "" (no case ending added)
|
||||
// number: "singular" | "plural"
|
||||
// definite: "true" | "false"
|
||||
//
|
||||
// For plurals, the function applies the sound plural (broken plurals are
|
||||
// language-external and must be supplied via Engram vocabulary nodes).
|
||||
|
||||
fn ar_noun_form(noun: String, gender: String, kase: String, number: String, definite: String) -> String {
|
||||
// Resolve gender
|
||||
let g: String = gender
|
||||
if str_eq(g, "") {
|
||||
let g = ar_gender(noun)
|
||||
}
|
||||
|
||||
// Build the stem (with definiteness and number)
|
||||
let stem: String = noun
|
||||
if str_eq(number, "plural") {
|
||||
if str_eq(g, "m") {
|
||||
// Masculine sound plural: stem + case-dependent ending
|
||||
let pl_suf: String = ar_masc_pl_ending(kase)
|
||||
if str_eq(definite, "true") {
|
||||
let def_stem: String = ar_definite_article(noun)
|
||||
return def_stem + pl_suf
|
||||
}
|
||||
return noun + pl_suf
|
||||
}
|
||||
// Feminine plural: drop ة, add ات + case ending
|
||||
let fem_pl: String = ar_sound_plural(noun, "f")
|
||||
let case_end: String = ar_case_ending(kase, definite)
|
||||
if str_eq(definite, "true") {
|
||||
return ar_definite_article(fem_pl) + case_end
|
||||
}
|
||||
return fem_pl + case_end
|
||||
}
|
||||
|
||||
// Singular
|
||||
let case_end: String = ar_case_ending(kase, definite)
|
||||
if str_eq(definite, "true") {
|
||||
let def_stem: String = ar_definite_article(noun)
|
||||
return def_stem + case_end
|
||||
}
|
||||
return noun + case_end
|
||||
}
|
||||
|
||||
// ── Convenience: verb inflect entry point ─────────────────────────────────────
|
||||
//
|
||||
// ar_verb_form: thin wrapper matching the signature style of the main engine.
|
||||
// Accepts gender as part of person encoding: "third_m" | "third_f" | "first" | "second_m" | "second_f".
|
||||
// Alternatively accepts explicit gender param.
|
||||
|
||||
fn ar_verb_form(verb: String, tense: String, person: String, number: String) -> String {
|
||||
// Default gender to masculine
|
||||
return ar_conjugate(verb, tense, person, "m", number)
|
||||
}
|
||||
@@ -1,27 +0,0 @@
|
||||
// auto-generated by elc --emit-header — do not edit
|
||||
extern fn ar_str_ends(s: String, suf: String) -> Bool
|
||||
extern fn ar_str_len(s: String) -> Int
|
||||
extern fn ar_str_drop_last(s: String, n: Int) -> String
|
||||
extern fn ar_str_last_char(s: String) -> String
|
||||
extern fn ar_slot(person: String, gender: String, number: String) -> Int
|
||||
extern fn ar_perfect_suffix(slot: Int) -> String
|
||||
extern fn ar_imperfect_prefix(slot: Int) -> String
|
||||
extern fn ar_imperfect_suffix(slot: Int) -> String
|
||||
extern fn ar_conjugate_form1(past_base: String, present_stem: String, tense: String, slot: Int) -> String
|
||||
extern fn ar_irregular_kaana(slot: Int, tense: String) -> String
|
||||
extern fn ar_irregular_qaala(slot: Int, tense: String) -> String
|
||||
extern fn ar_irregular_jaa(slot: Int, tense: String) -> String
|
||||
extern fn ar_irregular_raaa(slot: Int, tense: String) -> String
|
||||
extern fn ar_irregular_araada(slot: Int, tense: String) -> String
|
||||
extern fn ar_irregular_istata(slot: Int, tense: String) -> String
|
||||
extern fn ar_irregular(verb: String, tense: String, slot: Int) -> String
|
||||
extern fn ar_present_stem(verb: String) -> String
|
||||
extern fn ar_conjugate(verb: String, tense: String, person: String, gender: String, number: String) -> String
|
||||
extern fn ar_is_sun_letter(c: String) -> Bool
|
||||
extern fn ar_definite_article(noun: String) -> String
|
||||
extern fn ar_case_ending(kase: String, definite: String) -> String
|
||||
extern fn ar_gender(noun: String) -> String
|
||||
extern fn ar_masc_pl_ending(kase: String) -> String
|
||||
extern fn ar_sound_plural(noun: String, gender: String) -> String
|
||||
extern fn ar_noun_form(noun: String, gender: String, kase: String, number: String, definite: String) -> String
|
||||
extern fn ar_verb_form(verb: String, tense: String, person: String, number: String) -> String
|
||||
@@ -1,577 +0,0 @@
|
||||
// morphology-cop.el - Coptic (Sahidic dialect) morphology for the NLG engine.
|
||||
//
|
||||
// Implements Coptic verb conjugation (bipartite and tripartite patterns), noun
|
||||
// phrase assembly with definite and indefinite articles, and noun number marking.
|
||||
// Designed as a companion to morphology.el; called when language code is "cop".
|
||||
//
|
||||
// Language profile: code=cop, name=Coptic, morph_type=agglutinative,
|
||||
// word_order=SVO, question_strategy=particle, script=coptic, family=afro-asiatic-egyptian.
|
||||
//
|
||||
// Script: Coptic uses the Greek alphabet plus seven additional letters borrowed
|
||||
// from Demotic Egyptian. All Coptic-script characters in this file use their
|
||||
// correct Unicode code points (Coptic block U+2C80–U+2CFF; Coptic letters also
|
||||
// appear in the Greek block: ϣ U+03E3, ϥ U+03E5, ϩ U+03E9, ϫ U+03EB, ϭ U+03ED).
|
||||
//
|
||||
// The El runtime stores strings as byte arrays. String literals with Coptic
|
||||
// Unicode characters are encoded as UTF-8 and compared via str_eq byte equality.
|
||||
// The runtime limitation on non-ASCII *output display* does not affect internal
|
||||
// string logic — str_eq and concatenation work correctly.
|
||||
//
|
||||
// Grammatical notes (Sahidic Coptic, ca. 200–1000 CE):
|
||||
// - SVO word order (Greek influence; reversed from classical Egyptian)
|
||||
// - Definite articles prefixed directly to the noun (no space):
|
||||
// p- (masc sg), t- (fem sg), n- (plural) — definite
|
||||
// ou- (sg indefinite), hen- (pl indefinite)
|
||||
// - Grammatical gender: masculine / feminine (still active)
|
||||
// - No case endings — grammatical role expressed by word order + prepositions
|
||||
// - Verb tense/aspect expressed by conjugation base (bipartite pattern):
|
||||
// Present I: pronoun prefix + verb stem ("f-bwk" = he goes)
|
||||
// Perfect: a- + pronoun prefix + verb ("a-f-bwk" = he went)
|
||||
// Future: pronoun prefix + na- + verb ("f-na-bwk" = he will go)
|
||||
// - Pronoun prefixes (Sahidic — used as subject markers in bipartite conjugation):
|
||||
// 1sg: a-/t- (full: ⲁⲛⲟⲕ) 2sg m: k- 2sg f: te-
|
||||
// 3sg m: f- 3sg f: s-
|
||||
// 1pl: n- 2pl: teten- 3pl: se-
|
||||
// - Copula: "pe" (m sg), "te" (f sg), "ne" (pl); zero copula for adj predicates
|
||||
// - "to be/become": ϣωπε (Sahidic; present: fϣoop / sϣoop; past: afϣwpe)
|
||||
//
|
||||
// Verbs covered (Sahidic transliteration / Coptic script):
|
||||
// ϣωπε (shwpe) — to be / become bwk — to go
|
||||
// nau — to see jw — to say / speak
|
||||
// di — to give
|
||||
//
|
||||
// Canonical English → Coptic mapping:
|
||||
// "be" → ϣωπε / zero copula "go" → bwk
|
||||
// "see" → nau "say" → jw
|
||||
// "give" → di
|
||||
//
|
||||
// Persons/numbers covered:
|
||||
// person: "first" | "second" | "third"
|
||||
// gender: "m" | "f" (relevant for 2sg and 3sg pronoun prefix selection)
|
||||
// number: "singular" | "plural"
|
||||
//
|
||||
// Depends on: morphology.el (str_eq, str_len, str_slice, str_ends_with)
|
||||
|
||||
// ── String helpers ──────────────────────────────────────────────────────────────
|
||||
|
||||
import "morphology.el"
|
||||
fn cop_str_ends(s: String, suf: String) -> Bool {
|
||||
return str_ends_with(s, suf)
|
||||
}
|
||||
|
||||
fn cop_str_len(s: String) -> Int {
|
||||
return str_len(s)
|
||||
}
|
||||
|
||||
fn cop_drop(s: String, n: Int) -> String {
|
||||
let len: Int = str_len(s)
|
||||
if n >= len { return "" }
|
||||
return str_slice(s, 0, len - n)
|
||||
}
|
||||
|
||||
fn cop_last_char(s: String) -> String {
|
||||
let n: Int = str_len(s)
|
||||
if n == 0 { return "" }
|
||||
return str_slice(s, n - 1, n)
|
||||
}
|
||||
|
||||
// ── Person/number slot ──────────────────────────────────────────────────────────
|
||||
//
|
||||
// Maps person × number to a 0-based index used in paradigm tables.
|
||||
// Gender is not encoded in the slot index here; it is passed separately to
|
||||
// cop_subject_prefix where it matters (2sg and 3sg distinction).
|
||||
//
|
||||
// Slot layout:
|
||||
// 0 = 1st singular (ⲁⲛⲟⲕ anok)
|
||||
// 1 = 2nd singular (ⲛⲧⲟⲕ/ⲛⲧⲟ ntok/nto) — gender resolved in cop_subject_prefix
|
||||
// 2 = 3rd singular (ⲛⲧⲟϥ/ⲛⲧⲟⲥ ntof/ntos) — gender resolved in cop_subject_prefix
|
||||
// 3 = 1st plural (ⲁⲛⲟⲛ anon)
|
||||
// 4 = 2nd plural (ⲛⲧⲱⲧⲉⲛ ntwten)
|
||||
// 5 = 3rd plural (ⲛⲧⲟⲩ ntou)
|
||||
|
||||
fn cop_slot(person: String, number: String) -> Int {
|
||||
if str_eq(person, "first") {
|
||||
if str_eq(number, "singular") { return 0 }
|
||||
return 3
|
||||
}
|
||||
if str_eq(person, "second") {
|
||||
if str_eq(number, "singular") { return 1 }
|
||||
return 4
|
||||
}
|
||||
// third
|
||||
if str_eq(number, "singular") { return 2 }
|
||||
return 5
|
||||
}
|
||||
|
||||
// ── Subject pronoun prefixes ─────────────────────────────────────────────────────
|
||||
//
|
||||
// Coptic bipartite conjugation uses short pronoun prefixes attached directly to
|
||||
// the verb stem (or to the tense base in tripartite). These are the Sahidic
|
||||
// bound subject pronouns.
|
||||
//
|
||||
// Full independent pronouns (for reference):
|
||||
// 1sg: ⲁⲛⲟⲕ (anok) prefix: ⲁ- / ⲧ- (varies by tense base)
|
||||
// 2sg m: ⲛⲧⲟⲕ (ntok) prefix: ⲕ-
|
||||
// 2sg f: ⲛⲧⲟ (nto) prefix: ⲧⲉ-
|
||||
// 3sg m: ⲛⲧⲟϥ (ntof) prefix: ϥ-
|
||||
// 3sg f: ⲛⲧⲟⲥ (ntos) prefix: ⲥ-
|
||||
// 1pl: ⲁⲛⲟⲛ (anon) prefix: ⲛ-
|
||||
// 2pl: ⲛⲧⲱⲧⲉⲛ (ntwten) prefix: ⲧⲉⲧⲉⲛ-
|
||||
// 3pl: ⲛⲧⲟⲩ (ntou) prefix: ⲥⲉ-
|
||||
//
|
||||
// cop_subject_prefix returns the short bound prefix used in bipartite conjugation.
|
||||
// For the perfect (a-prefix tense base), the subject prefix follows "a-" directly.
|
||||
|
||||
fn cop_subject_prefix(person: String, number: String) -> String {
|
||||
if str_eq(person, "first") {
|
||||
if str_eq(number, "singular") { return "ⲁ" }
|
||||
return "ⲛ"
|
||||
}
|
||||
if str_eq(person, "second") {
|
||||
if str_eq(number, "singular") { return "ⲕ" }
|
||||
return "ⲧⲉⲧⲉⲛ"
|
||||
}
|
||||
// third
|
||||
if str_eq(number, "singular") { return "ϥ" }
|
||||
return "ⲥⲉ"
|
||||
}
|
||||
|
||||
// cop_subject_prefix_gendered: like cop_subject_prefix but handles the
|
||||
// 2sg feminine (ⲧⲉ-) and 3sg feminine (ⲥ-) distinction.
|
||||
|
||||
fn cop_subject_prefix_gendered(person: String, gender: String, number: String) -> String {
|
||||
if str_eq(person, "first") {
|
||||
if str_eq(number, "singular") { return "ⲁ" }
|
||||
return "ⲛ"
|
||||
}
|
||||
if str_eq(person, "second") {
|
||||
if str_eq(number, "singular") {
|
||||
if str_eq(gender, "f") { return "ⲧⲉ" }
|
||||
return "ⲕ"
|
||||
}
|
||||
return "ⲧⲉⲧⲉⲛ"
|
||||
}
|
||||
// third person
|
||||
if str_eq(number, "singular") {
|
||||
if str_eq(gender, "f") { return "ⲥ" }
|
||||
return "ϥ"
|
||||
}
|
||||
return "ⲥⲉ"
|
||||
}
|
||||
|
||||
// ── Copula ──────────────────────────────────────────────────────────────────────
|
||||
//
|
||||
// The Coptic nominal/adjectival copula is a standalone particle that agrees with
|
||||
// the gender and number of the subject:
|
||||
// Masculine sg: ⲡⲉ (pe)
|
||||
// Feminine sg: ⲧⲉ (te)
|
||||
// Plural: ⲛⲉ (ne)
|
||||
//
|
||||
// For adjective predicates in the present tense, the copula is often zero
|
||||
// (following the inherited Egyptian zero-copula rule). This engine returns ""
|
||||
// for the present adjective predicate and the full copula particle otherwise.
|
||||
|
||||
fn cop_copula_particle(gender: String, number: String) -> String {
|
||||
if str_eq(number, "plural") { return "ⲛⲉ" }
|
||||
if str_eq(gender, "f") { return "ⲧⲉ" }
|
||||
return "ⲡⲉ"
|
||||
}
|
||||
|
||||
// ── Verb: ϣωπε (to be / become) ────────────────────────────────────────────────
|
||||
//
|
||||
// ϣωπε is the Sahidic verb meaning "to be" or "to become". It is used as a
|
||||
// substantive/existential copula. For adjective predicate sentences the zero
|
||||
// copula is preferred (inherited from Egyptian).
|
||||
//
|
||||
// Sahidic forms:
|
||||
// Present I (bipartite): prefix + ϣⲟⲟⲡ (e.g. ϥϣⲟⲟⲡ "he is/exists")
|
||||
// Perfect (a- base): ⲁ + prefix + ϣⲱⲡⲉ (e.g. ⲁϥϣⲱⲡⲉ "he became")
|
||||
// Future (na- infix): prefix + ⲛⲁϣⲱⲡⲉ (e.g. ϥⲛⲁϣⲱⲡⲉ "he will become")
|
||||
//
|
||||
// Note: ϣⲟⲟⲡ (shoop) is the present stem; ϣⲱⲡⲉ (shwpe) is the infinitive/perfect stem.
|
||||
|
||||
fn cop_shwpe_present(prefix: String) -> String {
|
||||
return prefix + "ϣⲟⲟⲡ"
|
||||
}
|
||||
|
||||
fn cop_shwpe_perfect(prefix: String) -> String {
|
||||
return "ⲁ" + prefix + "ϣⲱⲡⲉ"
|
||||
}
|
||||
|
||||
fn cop_shwpe_future(prefix: String) -> String {
|
||||
return prefix + "ⲛⲁϣⲱⲡⲉ"
|
||||
}
|
||||
|
||||
// ── Verb: bwk (to go) — written ⲃⲱⲕ ───────────────────────────────────────────
|
||||
//
|
||||
// A common strong verb. The standard bipartite/tripartite pattern applies.
|
||||
// Present: prefix + ⲃⲱⲕ (e.g. ϥⲃⲱⲕ "he goes")
|
||||
// Perfect: ⲁ + prefix + ⲃⲱⲕ (e.g. ⲁϥⲃⲱⲕ "he went")
|
||||
// Future: prefix + ⲛⲁⲃⲱⲕ (e.g. ϥⲛⲁⲃⲱⲕ "he will go")
|
||||
|
||||
fn cop_bwk_present(prefix: String) -> String {
|
||||
return prefix + "ⲃⲱⲕ"
|
||||
}
|
||||
|
||||
fn cop_bwk_perfect(prefix: String) -> String {
|
||||
return "ⲁ" + prefix + "ⲃⲱⲕ"
|
||||
}
|
||||
|
||||
fn cop_bwk_future(prefix: String) -> String {
|
||||
return prefix + "ⲛⲁⲃⲱⲕ"
|
||||
}
|
||||
|
||||
// ── Verb: nau (to see) — written ⲛⲁⲩ ──────────────────────────────────────────
|
||||
//
|
||||
// nau is a biconsonantal verb. Regular bipartite conjugation:
|
||||
// Present: prefix + ⲛⲁⲩ (e.g. ϥⲛⲁⲩ "he sees")
|
||||
// Perfect: ⲁ + prefix + ⲛⲁⲩ (e.g. ⲁϥⲛⲁⲩ "he saw")
|
||||
// Future: prefix + ⲛⲁⲛⲁⲩ (e.g. ϥⲛⲁⲛⲁⲩ "he will see")
|
||||
//
|
||||
// Note: the future prefix "na-" followed by "nau" produces "nanau" — standard.
|
||||
|
||||
fn cop_nau_present(prefix: String) -> String {
|
||||
return prefix + "ⲛⲁⲩ"
|
||||
}
|
||||
|
||||
fn cop_nau_perfect(prefix: String) -> String {
|
||||
return "ⲁ" + prefix + "ⲛⲁⲩ"
|
||||
}
|
||||
|
||||
fn cop_nau_future(prefix: String) -> String {
|
||||
return prefix + "ⲛⲁⲛⲁⲩ"
|
||||
}
|
||||
|
||||
// ── Verb: jw (to say / speak) — written ϫⲱ ────────────────────────────────────
|
||||
//
|
||||
// ϫⲱ is the Sahidic verb for "to say". Bipartite pattern:
|
||||
// Present: prefix + ϫⲱ (e.g. ϥϫⲱ "he says")
|
||||
// Perfect: ⲁ + prefix + ϫⲱ (e.g. ⲁϥϫⲱ "he said")
|
||||
// Future: prefix + ⲛⲁϫⲱ (e.g. ϥⲛⲁϫⲱ "he will say")
|
||||
|
||||
fn cop_jw_present(prefix: String) -> String {
|
||||
return prefix + "ϫⲱ"
|
||||
}
|
||||
|
||||
fn cop_jw_perfect(prefix: String) -> String {
|
||||
return "ⲁ" + prefix + "ϫⲱ"
|
||||
}
|
||||
|
||||
fn cop_jw_future(prefix: String) -> String {
|
||||
return prefix + "ⲛⲁϫⲱ"
|
||||
}
|
||||
|
||||
// ── Verb: di (to give) — written ϯ ─────────────────────────────────────────────
|
||||
//
|
||||
// ϯ (ti/di) is a monosyllabic verb meaning "to give". It is very common in
|
||||
// Coptic texts. Bipartite pattern:
|
||||
// Present: prefix + ϯ (e.g. ϥϯ "he gives")
|
||||
// Perfect: ⲁ + prefix + ϯ (e.g. ⲁϥϯ "he gave")
|
||||
// Future: prefix + ⲛⲁϯ (e.g. ϥⲛⲁϯ "he will give")
|
||||
|
||||
fn cop_di_present(prefix: String) -> String {
|
||||
return prefix + "ϯ"
|
||||
}
|
||||
|
||||
fn cop_di_perfect(prefix: String) -> String {
|
||||
return "ⲁ" + prefix + "ϯ"
|
||||
}
|
||||
|
||||
fn cop_di_future(prefix: String) -> String {
|
||||
return prefix + "ⲛⲁϯ"
|
||||
}
|
||||
|
||||
// ── Copula detection ─────────────────────────────────────────────────────────────
|
||||
|
||||
fn cop_is_copula(verb: String) -> Bool {
|
||||
if str_eq(verb, "ϣωπε") { return true }
|
||||
if str_eq(verb, "shwpe") { return true }
|
||||
if str_eq(verb, "be") { return true }
|
||||
return false
|
||||
}
|
||||
|
||||
// ── Known-verb dispatcher ────────────────────────────────────────────────────────
|
||||
//
|
||||
// Returns the inflected form for a known verb given the subject prefix string
|
||||
// and tense. Returns "" if the verb is not in the table.
|
||||
|
||||
fn cop_known_verb_prefixed(verb: String, tense: String, prefix: String) -> String {
|
||||
// ── ϣωπε / shwpe / "be" — to be / become ────────────────────────────────────
|
||||
if str_eq(verb, "ϣωπε") {
|
||||
if str_eq(tense, "present") { return cop_shwpe_present(prefix) }
|
||||
if str_eq(tense, "past") { return cop_shwpe_perfect(prefix) }
|
||||
if str_eq(tense, "future") { return cop_shwpe_future(prefix) }
|
||||
return cop_shwpe_present(prefix)
|
||||
}
|
||||
if str_eq(verb, "shwpe") {
|
||||
if str_eq(tense, "present") { return cop_shwpe_present(prefix) }
|
||||
if str_eq(tense, "past") { return cop_shwpe_perfect(prefix) }
|
||||
if str_eq(tense, "future") { return cop_shwpe_future(prefix) }
|
||||
return cop_shwpe_present(prefix)
|
||||
}
|
||||
|
||||
// ── bwk / ⲃⲱⲕ — to go ────────────────────────────────────────────────────────
|
||||
if str_eq(verb, "bwk") {
|
||||
if str_eq(tense, "present") { return cop_bwk_present(prefix) }
|
||||
if str_eq(tense, "past") { return cop_bwk_perfect(prefix) }
|
||||
if str_eq(tense, "future") { return cop_bwk_future(prefix) }
|
||||
return cop_bwk_present(prefix)
|
||||
}
|
||||
if str_eq(verb, "ⲃⲱⲕ") {
|
||||
if str_eq(tense, "present") { return cop_bwk_present(prefix) }
|
||||
if str_eq(tense, "past") { return cop_bwk_perfect(prefix) }
|
||||
if str_eq(tense, "future") { return cop_bwk_future(prefix) }
|
||||
return cop_bwk_present(prefix)
|
||||
}
|
||||
if str_eq(verb, "go") {
|
||||
if str_eq(tense, "present") { return cop_bwk_present(prefix) }
|
||||
if str_eq(tense, "past") { return cop_bwk_perfect(prefix) }
|
||||
if str_eq(tense, "future") { return cop_bwk_future(prefix) }
|
||||
return cop_bwk_present(prefix)
|
||||
}
|
||||
|
||||
// ── nau / ⲛⲁⲩ — to see ───────────────────────────────────────────────────────
|
||||
if str_eq(verb, "nau") {
|
||||
if str_eq(tense, "present") { return cop_nau_present(prefix) }
|
||||
if str_eq(tense, "past") { return cop_nau_perfect(prefix) }
|
||||
if str_eq(tense, "future") { return cop_nau_future(prefix) }
|
||||
return cop_nau_present(prefix)
|
||||
}
|
||||
if str_eq(verb, "ⲛⲁⲩ") {
|
||||
if str_eq(tense, "present") { return cop_nau_present(prefix) }
|
||||
if str_eq(tense, "past") { return cop_nau_perfect(prefix) }
|
||||
if str_eq(tense, "future") { return cop_nau_future(prefix) }
|
||||
return cop_nau_present(prefix)
|
||||
}
|
||||
if str_eq(verb, "see") {
|
||||
if str_eq(tense, "present") { return cop_nau_present(prefix) }
|
||||
if str_eq(tense, "past") { return cop_nau_perfect(prefix) }
|
||||
if str_eq(tense, "future") { return cop_nau_future(prefix) }
|
||||
return cop_nau_present(prefix)
|
||||
}
|
||||
|
||||
// ── jw / ϫⲱ — to say / speak ─────────────────────────────────────────────────
|
||||
if str_eq(verb, "jw") {
|
||||
if str_eq(tense, "present") { return cop_jw_present(prefix) }
|
||||
if str_eq(tense, "past") { return cop_jw_perfect(prefix) }
|
||||
if str_eq(tense, "future") { return cop_jw_future(prefix) }
|
||||
return cop_jw_present(prefix)
|
||||
}
|
||||
if str_eq(verb, "ϫⲱ") {
|
||||
if str_eq(tense, "present") { return cop_jw_present(prefix) }
|
||||
if str_eq(tense, "past") { return cop_jw_perfect(prefix) }
|
||||
if str_eq(tense, "future") { return cop_jw_future(prefix) }
|
||||
return cop_jw_present(prefix)
|
||||
}
|
||||
if str_eq(verb, "say") {
|
||||
if str_eq(tense, "present") { return cop_jw_present(prefix) }
|
||||
if str_eq(tense, "past") { return cop_jw_perfect(prefix) }
|
||||
if str_eq(tense, "future") { return cop_jw_future(prefix) }
|
||||
return cop_jw_present(prefix)
|
||||
}
|
||||
|
||||
// ── di / ϯ — to give ──────────────────────────────────────────────────────────
|
||||
if str_eq(verb, "di") {
|
||||
if str_eq(tense, "present") { return cop_di_present(prefix) }
|
||||
if str_eq(tense, "past") { return cop_di_perfect(prefix) }
|
||||
if str_eq(tense, "future") { return cop_di_future(prefix) }
|
||||
return cop_di_present(prefix)
|
||||
}
|
||||
if str_eq(verb, "ϯ") {
|
||||
if str_eq(tense, "present") { return cop_di_present(prefix) }
|
||||
if str_eq(tense, "past") { return cop_di_perfect(prefix) }
|
||||
if str_eq(tense, "future") { return cop_di_future(prefix) }
|
||||
return cop_di_present(prefix)
|
||||
}
|
||||
if str_eq(verb, "give") {
|
||||
if str_eq(tense, "present") { return cop_di_present(prefix) }
|
||||
if str_eq(tense, "past") { return cop_di_perfect(prefix) }
|
||||
if str_eq(tense, "future") { return cop_di_future(prefix) }
|
||||
return cop_di_present(prefix)
|
||||
}
|
||||
|
||||
// Verb not in table
|
||||
return ""
|
||||
}
|
||||
|
||||
// ── Regular verb conjugation ─────────────────────────────────────────────────────
|
||||
//
|
||||
// For verbs not in the explicit table, apply the productive bipartite pattern:
|
||||
// Present: prefix + stem
|
||||
// Perfect: ⲁ + prefix + stem
|
||||
// Future: prefix + ⲛⲁ + stem
|
||||
|
||||
fn cop_regular_present(prefix: String, stem: String) -> String {
|
||||
return prefix + stem
|
||||
}
|
||||
|
||||
fn cop_regular_perfect(prefix: String, stem: String) -> String {
|
||||
return "ⲁ" + prefix + stem
|
||||
}
|
||||
|
||||
fn cop_regular_future(prefix: String, stem: String) -> String {
|
||||
return prefix + "ⲛⲁ" + stem
|
||||
}
|
||||
|
||||
// ── cop_conjugate: main conjugation entry point ──────────────────────────────────
|
||||
//
|
||||
// verb: Coptic verb (Sahidic stem, transliterated, or English canonical label)
|
||||
// tense: "present" | "past" | "future"
|
||||
// person: "first" | "second" | "third"
|
||||
// number: "singular" | "plural"
|
||||
//
|
||||
// Returns the fully conjugated form with subject prefix embedded.
|
||||
// Zero copula ("") is returned for present "be" (adj predicate context).
|
||||
// For unknown verbs the regular bipartite pattern is applied as a productive fallback.
|
||||
|
||||
fn cop_conjugate(verb: String, tense: String, person: String, number: String) -> String {
|
||||
let prefix: String = cop_subject_prefix(person, number)
|
||||
|
||||
// Handle "be" canonical → zero copula in present; ϣωπε otherwise
|
||||
if str_eq(verb, "be") {
|
||||
if str_eq(tense, "present") { return "" }
|
||||
if str_eq(tense, "past") { return cop_shwpe_perfect(prefix) }
|
||||
if str_eq(tense, "future") { return cop_shwpe_future(prefix) }
|
||||
return ""
|
||||
}
|
||||
|
||||
// Try the known-verb table
|
||||
let known: String = cop_known_verb_prefixed(verb, tense, prefix)
|
||||
if !str_eq(known, "") {
|
||||
return known
|
||||
}
|
||||
|
||||
// Regular productive bipartite conjugation
|
||||
if str_eq(tense, "present") { return cop_regular_present(prefix, verb) }
|
||||
if str_eq(tense, "past") { return cop_regular_perfect(prefix, verb) }
|
||||
if str_eq(tense, "future") { return cop_regular_future(prefix, verb) }
|
||||
|
||||
// Unknown tense: return verb as safe fallback
|
||||
return verb
|
||||
}
|
||||
|
||||
// ── Article system ────────────────────────────────────────────────────────────────
|
||||
//
|
||||
// cop_article: return the Coptic article string for the given gender/number/definiteness.
|
||||
//
|
||||
// Definite articles (prefixed directly to noun, no space):
|
||||
// Masculine singular: ⲡ- (p-)
|
||||
// Feminine singular: ⲧ- (t-)
|
||||
// Plural (both): ⲛ- (n-)
|
||||
//
|
||||
// Indefinite articles:
|
||||
// Singular (both genders): ⲟⲩ- (ou-)
|
||||
// Plural: ϩⲉⲛ- (hen-)
|
||||
//
|
||||
// gender: "m" | "f"
|
||||
// number: "singular" | "plural"
|
||||
// definite: "true" | "false"
|
||||
//
|
||||
// Returns the article prefix string (to be concatenated with the noun).
|
||||
|
||||
fn cop_article(gender: String, number: String, definite: String) -> String {
|
||||
if str_eq(definite, "true") {
|
||||
if str_eq(number, "plural") { return "ⲛ" }
|
||||
if str_eq(gender, "f") { return "ⲧ" }
|
||||
return "ⲡ"
|
||||
}
|
||||
// Indefinite
|
||||
if str_eq(number, "plural") { return "ϩⲉⲛ" }
|
||||
return "ⲟⲩ"
|
||||
}
|
||||
|
||||
// ── Noun number ───────────────────────────────────────────────────────────────────
|
||||
//
|
||||
// cop_decline: return the noun in the appropriate number form.
|
||||
//
|
||||
// Coptic nouns have no case endings. Grammatical role is expressed entirely by
|
||||
// word order and prepositions. The gram_case parameter is accepted for API
|
||||
// symmetry with other morphology modules but has no effect.
|
||||
//
|
||||
// Plural formation:
|
||||
// Coptic plural morphology is highly irregular (inherited from Egyptian and
|
||||
// influenced by Greek loanwords). Common patterns:
|
||||
// - Many nouns show no suffix change — plurality is indicated only by the plural article ⲛ-.
|
||||
// - Some nouns take -ⲟⲟⲩⲉ (-ooue): e.g. ϩⲟ (face) → ϩⲟⲟⲩⲉ
|
||||
// - Greek loanwords often add -ⲟⲥ / -ⲟⲩ in Greek fashion
|
||||
//
|
||||
// This function implements:
|
||||
// - No suffix change (base form) as the productive default — the article carries number.
|
||||
// - Words ending in ⲉ (a common Coptic nominal ending) may take -ⲟⲟⲩⲉ in the plural;
|
||||
// this suffix is applied only when the caller explicitly requests plural and the
|
||||
// noun ends in ⲉ (productive pattern).
|
||||
// Vocabulary-layer irregular plurals should be stored in vocabulary-cop.el and
|
||||
// passed already inflected.
|
||||
|
||||
fn cop_decline(noun: String, gram_case: String, number: String) -> String {
|
||||
if str_eq(number, "singular") { return noun }
|
||||
// Plural: if noun ends in ⲉ, attempt -ooue suffix (common productive pattern)
|
||||
if cop_str_ends(noun, "ⲉ") {
|
||||
let stem: String = cop_drop(noun, 1)
|
||||
return stem + "ⲟⲟⲩⲉ"
|
||||
}
|
||||
// Default: base form (article carries the plural signal)
|
||||
return noun
|
||||
}
|
||||
|
||||
// ── Noun phrase assembly ──────────────────────────────────────────────────────────
|
||||
//
|
||||
// cop_noun_phrase: build a complete Coptic noun phrase.
|
||||
//
|
||||
// noun: base noun (Coptic script or transliteration)
|
||||
// gram_case: accepted for API symmetry; has no effect (Coptic is caseless)
|
||||
// number: "singular" | "plural"
|
||||
// definite: "true" | "false"
|
||||
//
|
||||
// The article is prefixed directly to the noun with no intervening space,
|
||||
// following standard Coptic orthographic convention.
|
||||
// Gender defaults to masculine when not determinable from context; the caller
|
||||
// should supply the declined noun already in its correct form if gender-sensitive
|
||||
// plural forms are needed.
|
||||
|
||||
fn cop_noun_phrase(noun: String, gram_case: String, number: String, definite: String) -> String {
|
||||
let form: String = cop_decline(noun, gram_case, number)
|
||||
// Infer gender from number: if plural, gender is moot for the article (always ⲛ-)
|
||||
// For singular, default to masculine (caller provides gender via article if known)
|
||||
let art: String = cop_article("m", number, definite)
|
||||
if str_eq(definite, "true") {
|
||||
return art + form
|
||||
}
|
||||
if str_eq(definite, "false") {
|
||||
// Indefinite article + noun (no space — Coptic convention for proclitic articles)
|
||||
return art + form
|
||||
}
|
||||
return form
|
||||
}
|
||||
|
||||
// cop_noun_phrase_gendered: noun phrase with explicit gender for correct article selection.
|
||||
//
|
||||
// gender: "m" | "f"
|
||||
|
||||
fn cop_noun_phrase_gendered(noun: String, gram_case: String, number: String, definite: String, gender: String) -> String {
|
||||
let form: String = cop_decline(noun, gram_case, number)
|
||||
let art: String = cop_article(gender, number, definite)
|
||||
if str_eq(definite, "true") {
|
||||
return art + form
|
||||
}
|
||||
if str_eq(definite, "false") {
|
||||
return art + form
|
||||
}
|
||||
return form
|
||||
}
|
||||
|
||||
// ── Canonical verb mapping ────────────────────────────────────────────────────────
|
||||
//
|
||||
// cop_map_canonical: map cross-lingual English canonical verb labels to their
|
||||
// Sahidic Coptic equivalents before dispatching to cop_conjugate.
|
||||
|
||||
fn cop_map_canonical(verb: String) -> String {
|
||||
if str_eq(verb, "be") { return "be" }
|
||||
if str_eq(verb, "go") { return "bwk" }
|
||||
if str_eq(verb, "see") { return "nau" }
|
||||
if str_eq(verb, "say") { return "jw" }
|
||||
if str_eq(verb, "speak") { return "jw" }
|
||||
if str_eq(verb, "give") { return "di" }
|
||||
// Unknown: return as-is; cop_conjugate will apply the regular pattern
|
||||
return verb
|
||||
}
|
||||
@@ -1,35 +0,0 @@
|
||||
// auto-generated by elc --emit-header — do not edit
|
||||
extern fn cop_str_ends(s: String, suf: String) -> Bool
|
||||
extern fn cop_str_len(s: String) -> Int
|
||||
extern fn cop_drop(s: String, n: Int) -> String
|
||||
extern fn cop_last_char(s: String) -> String
|
||||
extern fn cop_slot(person: String, number: String) -> Int
|
||||
extern fn cop_subject_prefix(person: String, number: String) -> String
|
||||
extern fn cop_subject_prefix_gendered(person: String, gender: String, number: String) -> String
|
||||
extern fn cop_copula_particle(gender: String, number: String) -> String
|
||||
extern fn cop_shwpe_present(prefix: String) -> String
|
||||
extern fn cop_shwpe_perfect(prefix: String) -> String
|
||||
extern fn cop_shwpe_future(prefix: String) -> String
|
||||
extern fn cop_bwk_present(prefix: String) -> String
|
||||
extern fn cop_bwk_perfect(prefix: String) -> String
|
||||
extern fn cop_bwk_future(prefix: String) -> String
|
||||
extern fn cop_nau_present(prefix: String) -> String
|
||||
extern fn cop_nau_perfect(prefix: String) -> String
|
||||
extern fn cop_nau_future(prefix: String) -> String
|
||||
extern fn cop_jw_present(prefix: String) -> String
|
||||
extern fn cop_jw_perfect(prefix: String) -> String
|
||||
extern fn cop_jw_future(prefix: String) -> String
|
||||
extern fn cop_di_present(prefix: String) -> String
|
||||
extern fn cop_di_perfect(prefix: String) -> String
|
||||
extern fn cop_di_future(prefix: String) -> String
|
||||
extern fn cop_is_copula(verb: String) -> Bool
|
||||
extern fn cop_known_verb_prefixed(verb: String, tense: String, prefix: String) -> String
|
||||
extern fn cop_regular_present(prefix: String, stem: String) -> String
|
||||
extern fn cop_regular_perfect(prefix: String, stem: String) -> String
|
||||
extern fn cop_regular_future(prefix: String, stem: String) -> String
|
||||
extern fn cop_conjugate(verb: String, tense: String, person: String, number: String) -> String
|
||||
extern fn cop_article(gender: String, number: String, definite: String) -> String
|
||||
extern fn cop_decline(noun: String, gram_case: String, number: String) -> String
|
||||
extern fn cop_noun_phrase(noun: String, gram_case: String, number: String, definite: String) -> String
|
||||
extern fn cop_noun_phrase_gendered(noun: String, gram_case: String, number: String, definite: String, gender: String) -> String
|
||||
extern fn cop_map_canonical(verb: String) -> String
|
||||
@@ -1,814 +0,0 @@
|
||||
// morphology-de.el - German morphology: articles, adjective endings, noun
|
||||
// plurals, and verb conjugation.
|
||||
//
|
||||
// German is a fusional language with:
|
||||
// - 4 grammatical cases: nominative, accusative, dative, genitive
|
||||
// - 3 genders: masculine (m), feminine (f), neuter (n)
|
||||
// - 2 numbers: singular, plural
|
||||
// - Strong and weak verb classes
|
||||
//
|
||||
// Conventions used throughout:
|
||||
// gender: "m" | "f" | "n"
|
||||
// case: "nom" | "acc" | "dat" | "gen"
|
||||
// number: "sg" | "pl"
|
||||
// person: "1" | "2" | "3"
|
||||
// tense: "present" | "past" | "future"
|
||||
// article_type: "def" | "indef" | "none"
|
||||
//
|
||||
// Depends on: language-profile (str_eq, str_len, str_slice, str_drop_last,
|
||||
// str_ends_with)
|
||||
|
||||
// ── Definite articles (der-words) ─────────────────────────────────────────────
|
||||
//
|
||||
// Masc Fem Neut Plural
|
||||
// Nom: der die das die
|
||||
// Acc: den die das die
|
||||
// Dat: dem der dem den
|
||||
// Gen: des der des der
|
||||
|
||||
import "morphology.el"
|
||||
fn de_article_def(gender: String, gram_case: String, number: String) -> String {
|
||||
if str_eq(number, "pl") {
|
||||
if str_eq(gram_case, "nom") { return "die" }
|
||||
if str_eq(gram_case, "acc") { return "die" }
|
||||
if str_eq(gram_case, "dat") { return "den" }
|
||||
if str_eq(gram_case, "gen") { return "der" }
|
||||
return "die"
|
||||
}
|
||||
if str_eq(gender, "m") {
|
||||
if str_eq(gram_case, "nom") { return "der" }
|
||||
if str_eq(gram_case, "acc") { return "den" }
|
||||
if str_eq(gram_case, "dat") { return "dem" }
|
||||
if str_eq(gram_case, "gen") { return "des" }
|
||||
return "der"
|
||||
}
|
||||
if str_eq(gender, "f") {
|
||||
if str_eq(gram_case, "nom") { return "die" }
|
||||
if str_eq(gram_case, "acc") { return "die" }
|
||||
if str_eq(gram_case, "dat") { return "der" }
|
||||
if str_eq(gram_case, "gen") { return "der" }
|
||||
return "die"
|
||||
}
|
||||
// neuter
|
||||
if str_eq(gram_case, "nom") { return "das" }
|
||||
if str_eq(gram_case, "acc") { return "das" }
|
||||
if str_eq(gram_case, "dat") { return "dem" }
|
||||
if str_eq(gram_case, "gen") { return "des" }
|
||||
return "das"
|
||||
}
|
||||
|
||||
// ── Indefinite articles (ein-words) ──────────────────────────────────────────
|
||||
//
|
||||
// Masc Fem Neut Plural
|
||||
// Nom: ein eine ein —
|
||||
// Acc: einen eine ein —
|
||||
// Dat: einem einer einem —
|
||||
// Gen: eines einer eines —
|
||||
|
||||
fn de_article_indef(gender: String, gram_case: String, number: String) -> String {
|
||||
if str_eq(number, "pl") {
|
||||
// Indefinite article has no plural form
|
||||
return ""
|
||||
}
|
||||
if str_eq(gender, "m") {
|
||||
if str_eq(gram_case, "nom") { return "ein" }
|
||||
if str_eq(gram_case, "acc") { return "einen" }
|
||||
if str_eq(gram_case, "dat") { return "einem" }
|
||||
if str_eq(gram_case, "gen") { return "eines" }
|
||||
return "ein"
|
||||
}
|
||||
if str_eq(gender, "f") {
|
||||
if str_eq(gram_case, "nom") { return "eine" }
|
||||
if str_eq(gram_case, "acc") { return "eine" }
|
||||
if str_eq(gram_case, "dat") { return "einer" }
|
||||
if str_eq(gram_case, "gen") { return "einer" }
|
||||
return "eine"
|
||||
}
|
||||
// neuter
|
||||
if str_eq(gram_case, "nom") { return "ein" }
|
||||
if str_eq(gram_case, "acc") { return "ein" }
|
||||
if str_eq(gram_case, "dat") { return "einem" }
|
||||
if str_eq(gram_case, "gen") { return "eines" }
|
||||
return "ein"
|
||||
}
|
||||
|
||||
// de_article: unified article dispatch.
|
||||
// definite: "def" | "indef" | "none"
|
||||
fn de_article(gender: String, gram_case: String, number: String, definite: String) -> String {
|
||||
if str_eq(definite, "def") { return de_article_def(gender, gram_case, number) }
|
||||
if str_eq(definite, "indef") { return de_article_indef(gender, gram_case, number) }
|
||||
return ""
|
||||
}
|
||||
|
||||
// ── Adjective endings ─────────────────────────────────────────────────────────
|
||||
//
|
||||
// Weak endings (after a definite article or der-word):
|
||||
//
|
||||
// Masc Fem Neut Plural
|
||||
// Nom: -e -e -e -en
|
||||
// Acc: -en -e -e -en
|
||||
// Dat: -en -en -en -en
|
||||
// Gen: -en -en -en -en
|
||||
//
|
||||
// Mixed endings (after ein-words with no marking, i.e. indef article):
|
||||
//
|
||||
// Masc Fem Neut Plural
|
||||
// Nom: -er -e -es -en
|
||||
// Acc: -en -e -es -en
|
||||
// Dat: -en -en -en -en
|
||||
// Gen: -en -en -en -en
|
||||
//
|
||||
// Strong endings (no preceding article):
|
||||
//
|
||||
// Masc Fem Neut Plural
|
||||
// Nom: -er -e -es -e
|
||||
// Acc: -en -e -es -e
|
||||
// Dat: -em -er -em -en
|
||||
// Gen: -en -er -en -er
|
||||
//
|
||||
// article_type: "def" | "indef" | "none"
|
||||
|
||||
fn de_adj_ending(gender: String, gram_case: String, number: String, article_type: String) -> String {
|
||||
if str_eq(article_type, "def") {
|
||||
// Weak declension
|
||||
if str_eq(number, "pl") {
|
||||
return "en"
|
||||
}
|
||||
if str_eq(gender, "m") {
|
||||
if str_eq(gram_case, "nom") { return "e" }
|
||||
return "en"
|
||||
}
|
||||
if str_eq(gender, "f") {
|
||||
if str_eq(gram_case, "nom") { return "e" }
|
||||
if str_eq(gram_case, "acc") { return "e" }
|
||||
return "en"
|
||||
}
|
||||
// neuter
|
||||
if str_eq(gram_case, "nom") { return "e" }
|
||||
if str_eq(gram_case, "acc") { return "e" }
|
||||
return "en"
|
||||
}
|
||||
|
||||
if str_eq(article_type, "indef") {
|
||||
// Mixed declension
|
||||
if str_eq(number, "pl") {
|
||||
return "en"
|
||||
}
|
||||
if str_eq(gender, "m") {
|
||||
if str_eq(gram_case, "nom") { return "er" }
|
||||
return "en"
|
||||
}
|
||||
if str_eq(gender, "f") {
|
||||
if str_eq(gram_case, "nom") { return "e" }
|
||||
if str_eq(gram_case, "acc") { return "e" }
|
||||
return "en"
|
||||
}
|
||||
// neuter
|
||||
if str_eq(gram_case, "nom") { return "es" }
|
||||
if str_eq(gram_case, "acc") { return "es" }
|
||||
return "en"
|
||||
}
|
||||
|
||||
// Strong declension (no article)
|
||||
if str_eq(number, "pl") {
|
||||
if str_eq(gram_case, "nom") { return "e" }
|
||||
if str_eq(gram_case, "acc") { return "e" }
|
||||
if str_eq(gram_case, "dat") { return "en" }
|
||||
if str_eq(gram_case, "gen") { return "er" }
|
||||
return "e"
|
||||
}
|
||||
if str_eq(gender, "m") {
|
||||
if str_eq(gram_case, "nom") { return "er" }
|
||||
if str_eq(gram_case, "acc") { return "en" }
|
||||
if str_eq(gram_case, "dat") { return "em" }
|
||||
if str_eq(gram_case, "gen") { return "en" }
|
||||
return "er"
|
||||
}
|
||||
if str_eq(gender, "f") {
|
||||
if str_eq(gram_case, "nom") { return "e" }
|
||||
if str_eq(gram_case, "acc") { return "e" }
|
||||
if str_eq(gram_case, "dat") { return "er" }
|
||||
if str_eq(gram_case, "gen") { return "er" }
|
||||
return "e"
|
||||
}
|
||||
// neuter
|
||||
if str_eq(gram_case, "nom") { return "es" }
|
||||
if str_eq(gram_case, "acc") { return "es" }
|
||||
if str_eq(gram_case, "dat") { return "em" }
|
||||
if str_eq(gram_case, "gen") { return "en" }
|
||||
return "es"
|
||||
}
|
||||
|
||||
// ── Noun plural formation ─────────────────────────────────────────────────────
|
||||
//
|
||||
// Major patterns, keyed on lemma. Where a noun is known irregular, the full
|
||||
// plural is returned. Otherwise a productive heuristic by gender and ending
|
||||
// is applied:
|
||||
//
|
||||
// Masculine hard nouns → +e (der Tag → Tage)
|
||||
// Feminine nouns in -e → +n (die Katze → Katzen)
|
||||
// Feminine nouns → +en (die Frau → Frauen)
|
||||
// Neuter nouns in -chen/-lein → ∅ (das Mädchen → Mädchen)
|
||||
// Neuter nouns in -um → -um +en (das Zentrum → Zentren)
|
||||
// Loanwords in -a,-o,-i → +s (das Auto → Autos)
|
||||
// Default → +e
|
||||
|
||||
fn de_noun_plural(noun: String, gender: String) -> String {
|
||||
// ── Lexical irregulars ──────────────────────────────────────────────────
|
||||
if str_eq(noun, "Mann") { return "Männer" }
|
||||
if str_eq(noun, "Kind") { return "Kinder" }
|
||||
if str_eq(noun, "Haus") { return "Häuser" }
|
||||
if str_eq(noun, "Buch") { return "Bücher" }
|
||||
if str_eq(noun, "Mutter") { return "Mütter" }
|
||||
if str_eq(noun, "Vater") { return "Väter" }
|
||||
if str_eq(noun, "Bruder") { return "Brüder" }
|
||||
if str_eq(noun, "Tochter") { return "Töchter" }
|
||||
if str_eq(noun, "Nacht") { return "Nächte" }
|
||||
if str_eq(noun, "Stadt") { return "Städte" }
|
||||
if str_eq(noun, "Wort") { return "Wörter" }
|
||||
if str_eq(noun, "Gott") { return "Götter" }
|
||||
if str_eq(noun, "Wald") { return "Wälder" }
|
||||
if str_eq(noun, "Band") { return "Bände" }
|
||||
if str_eq(noun, "Hund") { return "Hunde" }
|
||||
if str_eq(noun, "Baum") { return "Bäume" }
|
||||
if str_eq(noun, "Raum") { return "Räume" }
|
||||
if str_eq(noun, "Traum") { return "Träume" }
|
||||
if str_eq(noun, "Zug") { return "Züge" }
|
||||
if str_eq(noun, "Flug") { return "Flüge" }
|
||||
if str_eq(noun, "Fuß") { return "Füße" }
|
||||
if str_eq(noun, "Gruß") { return "Grüße" }
|
||||
if str_eq(noun, "Geist") { return "Geister" }
|
||||
if str_eq(noun, "Schwanz") { return "Schwänze" }
|
||||
if str_eq(noun, "Stuhl") { return "Stühle" }
|
||||
if str_eq(noun, "Stuhl") { return "Stühle" }
|
||||
if str_eq(noun, "Sohn") { return "Söhne" }
|
||||
if str_eq(noun, "Ton") { return "Töne" }
|
||||
if str_eq(noun, "Fluss") { return "Flüsse" }
|
||||
if str_eq(noun, "Frau") { return "Frauen" }
|
||||
if str_eq(noun, "Straße") { return "Straßen" }
|
||||
if str_eq(noun, "Schule") { return "Schulen" }
|
||||
if str_eq(noun, "Blume") { return "Blumen" }
|
||||
if str_eq(noun, "Katze") { return "Katzen" }
|
||||
if str_eq(noun, "Sprache") { return "Sprachen" }
|
||||
if str_eq(noun, "Kirche") { return "Kirchen" }
|
||||
if str_eq(noun, "Tür") { return "Türen" }
|
||||
if str_eq(noun, "Uhr") { return "Uhren" }
|
||||
if str_eq(noun, "Zahl") { return "Zahlen" }
|
||||
if str_eq(noun, "Wahl") { return "Wahlen" }
|
||||
if str_eq(noun, "Bahn") { return "Bahnen" }
|
||||
if str_eq(noun, "Zahn") { return "Zähne" }
|
||||
if str_eq(noun, "Nase") { return "Nasen" }
|
||||
if str_eq(noun, "Maus") { return "Mäuse" }
|
||||
if str_eq(noun, "Mädchen") { return "Mädchen" }
|
||||
if str_eq(noun, "Messer") { return "Messer" }
|
||||
if str_eq(noun, "Fenster") { return "Fenster" }
|
||||
if str_eq(noun, "Zimmer") { return "Zimmer" }
|
||||
if str_eq(noun, "Wasser") { return "Wasser" }
|
||||
if str_eq(noun, "Bett") { return "Betten" }
|
||||
if str_eq(noun, "Auto") { return "Autos" }
|
||||
if str_eq(noun, "Kino") { return "Kinos" }
|
||||
if str_eq(noun, "Radio") { return "Radios" }
|
||||
if str_eq(noun, "Foto") { return "Fotos" }
|
||||
if str_eq(noun, "Cafe") { return "Cafes" }
|
||||
if str_eq(noun, "Zentrum") { return "Zentren" }
|
||||
if str_eq(noun, "Museum") { return "Museen" }
|
||||
if str_eq(noun, "Gymnasium") { return "Gymnasien" }
|
||||
if str_eq(noun, "Studium") { return "Studien" }
|
||||
if str_eq(noun, "Datum") { return "Daten" }
|
||||
|
||||
// ── Productive heuristics ───────────────────────────────────────────────
|
||||
|
||||
// Nouns ending in -chen or -lein: no change (diminutives)
|
||||
if str_ends_with(noun, "chen") { return noun }
|
||||
if str_ends_with(noun, "lein") { return noun }
|
||||
|
||||
// Nouns ending in -um: replace with -en
|
||||
if str_ends_with(noun, "um") {
|
||||
return str_drop_last(noun, 2) + "en"
|
||||
}
|
||||
|
||||
// Loanwords ending in vowel or -s: add -s
|
||||
if str_ends_with(noun, "a") { return noun + "s" }
|
||||
if str_ends_with(noun, "o") { return noun + "s" }
|
||||
if str_ends_with(noun, "i") { return noun + "s" }
|
||||
if str_ends_with(noun, "u") { return noun + "s" }
|
||||
if str_ends_with(noun, "y") { return noun + "s" }
|
||||
|
||||
// Feminine nouns ending in -e: add -n
|
||||
if str_eq(gender, "f") {
|
||||
if str_ends_with(noun, "e") {
|
||||
return noun + "n"
|
||||
}
|
||||
// Feminine nouns ending in -in: add -nen
|
||||
if str_ends_with(noun, "in") {
|
||||
return noun + "nen"
|
||||
}
|
||||
// Most other feminines: add -en
|
||||
return noun + "en"
|
||||
}
|
||||
|
||||
// Neuter and masculine: default to +e
|
||||
return noun + "e"
|
||||
}
|
||||
|
||||
// ── Noun case endings ─────────────────────────────────────────────────────────
|
||||
//
|
||||
// In German, noun case inflection is mostly carried by the article and
|
||||
// adjective. The noun itself only changes in two regular situations:
|
||||
// - Genitive singular masculine/neuter: -(e)s
|
||||
// - Dative plural: -n (if not already ending in -n or -s)
|
||||
//
|
||||
// Irregular genitive forms (e.g. N-declension: Herr → Herrn) are
|
||||
// handled per-lemma in de_case_ending.
|
||||
|
||||
fn de_case_ending(noun: String, gender: String, gram_case: String, number: String) -> String {
|
||||
// N-declension masculines (weak nouns): all non-nominative singular forms + all plural add -(e)n
|
||||
if str_eq(noun, "Herr") {
|
||||
if str_eq(number, "sg") {
|
||||
if str_eq(gram_case, "nom") { return "Herr" }
|
||||
return "Herrn"
|
||||
}
|
||||
return "Herren"
|
||||
}
|
||||
if str_eq(noun, "Mensch") {
|
||||
if str_eq(number, "sg") {
|
||||
if str_eq(gram_case, "nom") { return "Mensch" }
|
||||
return "Menschen"
|
||||
}
|
||||
return "Menschen"
|
||||
}
|
||||
if str_eq(noun, "Student") {
|
||||
if str_eq(number, "sg") {
|
||||
if str_eq(gram_case, "nom") { return "Student" }
|
||||
return "Studenten"
|
||||
}
|
||||
return "Studenten"
|
||||
}
|
||||
if str_eq(noun, "Kollege") {
|
||||
if str_eq(number, "sg") {
|
||||
if str_eq(gram_case, "nom") { return "Kollege" }
|
||||
return "Kollegen"
|
||||
}
|
||||
return "Kollegen"
|
||||
}
|
||||
if str_eq(noun, "Name") {
|
||||
if str_eq(number, "sg") {
|
||||
if str_eq(gram_case, "nom") { return "Name" }
|
||||
if str_eq(gram_case, "gen") { return "Namens" }
|
||||
return "Namen"
|
||||
}
|
||||
return "Namen"
|
||||
}
|
||||
|
||||
// Regular masculine/neuter: genitive singular gets -(e)s
|
||||
if str_eq(number, "sg") {
|
||||
if str_eq(gram_case, "gen") {
|
||||
if str_eq(gender, "m") {
|
||||
if str_ends_with(noun, "s") { return noun + "es" }
|
||||
if str_ends_with(noun, "x") { return noun + "es" }
|
||||
if str_ends_with(noun, "z") { return noun + "es" }
|
||||
if str_ends_with(noun, "sch") { return noun + "es" }
|
||||
return noun + "s"
|
||||
}
|
||||
if str_eq(gender, "n") {
|
||||
if str_ends_with(noun, "s") { return noun + "es" }
|
||||
if str_ends_with(noun, "x") { return noun + "es" }
|
||||
if str_ends_with(noun, "z") { return noun + "es" }
|
||||
return noun + "s"
|
||||
}
|
||||
}
|
||||
// All other singular cases: noun unchanged
|
||||
return noun
|
||||
}
|
||||
|
||||
// Plural dative: add -n unless already ending in -n or -s
|
||||
if str_eq(gram_case, "dat") {
|
||||
let pl: String = de_noun_plural(noun, gender)
|
||||
if str_ends_with(pl, "n") { return pl }
|
||||
if str_ends_with(pl, "s") { return pl }
|
||||
return pl + "n"
|
||||
}
|
||||
|
||||
// All other plural cases: return the standard plural form
|
||||
return de_noun_plural(noun, gender)
|
||||
}
|
||||
|
||||
// ── Weak verb conjugation ─────────────────────────────────────────────────────
|
||||
//
|
||||
// Model: machen (mach-)
|
||||
//
|
||||
// Present:
|
||||
// 1sg ich mache 2sg du machst 3sg er/sie/es macht
|
||||
// 1pl wir machen 2pl ihr macht 3pl sie machen
|
||||
//
|
||||
// Past (Präteritum):
|
||||
// 1sg ich machte 2sg du machtest 3sg er/sie/es machte
|
||||
// 1pl wir machten 2pl ihr machtet 3pl sie machten
|
||||
|
||||
fn de_conjugate_weak(stem: String, tense: String, person: String, number: String) -> String {
|
||||
if str_eq(tense, "present") {
|
||||
if str_eq(number, "sg") {
|
||||
if str_eq(person, "1") { return stem + "e" }
|
||||
if str_eq(person, "2") {
|
||||
// Stems ending in -t or -d insert -e- before -st
|
||||
if str_ends_with(stem, "t") { return stem + "est" }
|
||||
if str_ends_with(stem, "d") { return stem + "est" }
|
||||
return stem + "st"
|
||||
}
|
||||
// 3sg
|
||||
if str_ends_with(stem, "t") { return stem + "et" }
|
||||
if str_ends_with(stem, "d") { return stem + "et" }
|
||||
return stem + "t"
|
||||
}
|
||||
// plural
|
||||
if str_eq(person, "1") { return stem + "en" }
|
||||
if str_eq(person, "2") {
|
||||
if str_ends_with(stem, "t") { return stem + "et" }
|
||||
if str_ends_with(stem, "d") { return stem + "et" }
|
||||
return stem + "t"
|
||||
}
|
||||
// 3pl
|
||||
return stem + "en"
|
||||
}
|
||||
|
||||
if str_eq(tense, "past") {
|
||||
if str_eq(number, "sg") {
|
||||
if str_eq(person, "1") { return stem + "te" }
|
||||
if str_eq(person, "2") { return stem + "test" }
|
||||
return stem + "te"
|
||||
}
|
||||
if str_eq(person, "1") { return stem + "ten" }
|
||||
if str_eq(person, "2") { return stem + "tet" }
|
||||
return stem + "ten"
|
||||
}
|
||||
|
||||
// Future: werden + infinitive — caller must prepend the auxiliary
|
||||
return stem + "en"
|
||||
}
|
||||
|
||||
// ── Strong / irregular verb present-tense forms ───────────────────────────────
|
||||
//
|
||||
// Returns the correct surface form if the verb is irregular, or "" if unknown.
|
||||
// Only present-tense irregulars are encoded here because past tense for strong
|
||||
// verbs is stored as a separate stem (Ablaut) — see de_conjugate.
|
||||
|
||||
fn de_irregular_present(verb: String, person: String, number: String) -> String {
|
||||
// sein — fully irregular
|
||||
if str_eq(verb, "sein") {
|
||||
if str_eq(number, "sg") {
|
||||
if str_eq(person, "1") { return "bin" }
|
||||
if str_eq(person, "2") { return "bist" }
|
||||
return "ist"
|
||||
}
|
||||
if str_eq(person, "1") { return "sind" }
|
||||
if str_eq(person, "2") { return "seid" }
|
||||
return "sind"
|
||||
}
|
||||
|
||||
// haben
|
||||
if str_eq(verb, "haben") {
|
||||
if str_eq(number, "sg") {
|
||||
if str_eq(person, "1") { return "habe" }
|
||||
if str_eq(person, "2") { return "hast" }
|
||||
return "hat"
|
||||
}
|
||||
if str_eq(person, "1") { return "haben" }
|
||||
if str_eq(person, "2") { return "habt" }
|
||||
return "haben"
|
||||
}
|
||||
|
||||
// werden
|
||||
if str_eq(verb, "werden") {
|
||||
if str_eq(number, "sg") {
|
||||
if str_eq(person, "1") { return "werde" }
|
||||
if str_eq(person, "2") { return "wirst" }
|
||||
return "wird"
|
||||
}
|
||||
if str_eq(person, "1") { return "werden" }
|
||||
if str_eq(person, "2") { return "werdet" }
|
||||
return "werden"
|
||||
}
|
||||
|
||||
// gehen
|
||||
if str_eq(verb, "gehen") {
|
||||
if str_eq(number, "sg") {
|
||||
if str_eq(person, "1") { return "gehe" }
|
||||
if str_eq(person, "2") { return "gehst" }
|
||||
return "geht"
|
||||
}
|
||||
if str_eq(person, "1") { return "gehen" }
|
||||
if str_eq(person, "2") { return "geht" }
|
||||
return "gehen"
|
||||
}
|
||||
|
||||
// kommen
|
||||
if str_eq(verb, "kommen") {
|
||||
if str_eq(number, "sg") {
|
||||
if str_eq(person, "1") { return "komme" }
|
||||
if str_eq(person, "2") { return "kommst" }
|
||||
return "kommt"
|
||||
}
|
||||
if str_eq(person, "1") { return "kommen" }
|
||||
if str_eq(person, "2") { return "kommt" }
|
||||
return "kommen"
|
||||
}
|
||||
|
||||
// sehen — vowel change e→ie in 2sg/3sg
|
||||
if str_eq(verb, "sehen") {
|
||||
if str_eq(number, "sg") {
|
||||
if str_eq(person, "1") { return "sehe" }
|
||||
if str_eq(person, "2") { return "siehst" }
|
||||
return "sieht"
|
||||
}
|
||||
if str_eq(person, "1") { return "sehen" }
|
||||
if str_eq(person, "2") { return "seht" }
|
||||
return "sehen"
|
||||
}
|
||||
|
||||
// essen — vowel change e→i in 2sg/3sg
|
||||
if str_eq(verb, "essen") {
|
||||
if str_eq(number, "sg") {
|
||||
if str_eq(person, "1") { return "esse" }
|
||||
if str_eq(person, "2") { return "isst" }
|
||||
return "isst"
|
||||
}
|
||||
if str_eq(person, "1") { return "essen" }
|
||||
if str_eq(person, "2") { return "esst" }
|
||||
return "essen"
|
||||
}
|
||||
|
||||
// geben — vowel change e→i in 2sg/3sg
|
||||
if str_eq(verb, "geben") {
|
||||
if str_eq(number, "sg") {
|
||||
if str_eq(person, "1") { return "gebe" }
|
||||
if str_eq(person, "2") { return "gibst" }
|
||||
return "gibt"
|
||||
}
|
||||
if str_eq(person, "1") { return "geben" }
|
||||
if str_eq(person, "2") { return "gebt" }
|
||||
return "geben"
|
||||
}
|
||||
|
||||
// nehmen — vowel change e→i + consonant change in 2sg/3sg
|
||||
if str_eq(verb, "nehmen") {
|
||||
if str_eq(number, "sg") {
|
||||
if str_eq(person, "1") { return "nehme" }
|
||||
if str_eq(person, "2") { return "nimmst" }
|
||||
return "nimmt"
|
||||
}
|
||||
if str_eq(person, "1") { return "nehmen" }
|
||||
if str_eq(person, "2") { return "nehmt" }
|
||||
return "nehmen"
|
||||
}
|
||||
|
||||
// fahren — vowel change a→ä in 2sg/3sg
|
||||
if str_eq(verb, "fahren") {
|
||||
if str_eq(number, "sg") {
|
||||
if str_eq(person, "1") { return "fahre" }
|
||||
if str_eq(person, "2") { return "fährst" }
|
||||
return "fährt"
|
||||
}
|
||||
if str_eq(person, "1") { return "fahren" }
|
||||
if str_eq(person, "2") { return "fahrt" }
|
||||
return "fahren"
|
||||
}
|
||||
|
||||
// laufen — vowel change au→äu in 2sg/3sg
|
||||
if str_eq(verb, "laufen") {
|
||||
if str_eq(number, "sg") {
|
||||
if str_eq(person, "1") { return "laufe" }
|
||||
if str_eq(person, "2") { return "läufst" }
|
||||
return "läuft"
|
||||
}
|
||||
if str_eq(person, "1") { return "laufen" }
|
||||
if str_eq(person, "2") { return "lauft" }
|
||||
return "laufen"
|
||||
}
|
||||
|
||||
// wissen — irregular throughout
|
||||
if str_eq(verb, "wissen") {
|
||||
if str_eq(number, "sg") {
|
||||
if str_eq(person, "1") { return "weiß" }
|
||||
if str_eq(person, "2") { return "weißt" }
|
||||
return "weiß"
|
||||
}
|
||||
if str_eq(person, "1") { return "wissen" }
|
||||
if str_eq(person, "2") { return "wisst" }
|
||||
return "wissen"
|
||||
}
|
||||
|
||||
// können — modal
|
||||
if str_eq(verb, "können") {
|
||||
if str_eq(number, "sg") {
|
||||
if str_eq(person, "1") { return "kann" }
|
||||
if str_eq(person, "2") { return "kannst" }
|
||||
return "kann"
|
||||
}
|
||||
if str_eq(person, "1") { return "können" }
|
||||
if str_eq(person, "2") { return "könnt" }
|
||||
return "können"
|
||||
}
|
||||
|
||||
// müssen — modal
|
||||
if str_eq(verb, "müssen") {
|
||||
if str_eq(number, "sg") {
|
||||
if str_eq(person, "1") { return "muss" }
|
||||
if str_eq(person, "2") { return "musst" }
|
||||
return "muss"
|
||||
}
|
||||
if str_eq(person, "1") { return "müssen" }
|
||||
if str_eq(person, "2") { return "müsst" }
|
||||
return "müssen"
|
||||
}
|
||||
|
||||
// wollen — modal
|
||||
if str_eq(verb, "wollen") {
|
||||
if str_eq(number, "sg") {
|
||||
if str_eq(person, "1") { return "will" }
|
||||
if str_eq(person, "2") { return "willst" }
|
||||
return "will"
|
||||
}
|
||||
if str_eq(person, "1") { return "wollen" }
|
||||
if str_eq(person, "2") { return "wollt" }
|
||||
return "wollen"
|
||||
}
|
||||
|
||||
// Unknown: signal caller to fall through to weak conjugation
|
||||
return ""
|
||||
}
|
||||
|
||||
// ── Strong verb past-tense (Präteritum) Ablaut stems ─────────────────────────
|
||||
//
|
||||
// Returns the past stem for strong verbs (Ablaut form), or "" if unknown/weak.
|
||||
// The past-tense endings for strong verbs differ from weak:
|
||||
// 1sg/3sg: bare stem (no ending)
|
||||
// 2sg: stem + -st
|
||||
// 1pl/3pl: stem + -en
|
||||
// 2pl: stem + -t
|
||||
|
||||
fn de_strong_past_stem(verb: String) -> String {
|
||||
if str_eq(verb, "gehen") { return "ging" }
|
||||
if str_eq(verb, "kommen") { return "kam" }
|
||||
if str_eq(verb, "sehen") { return "sah" }
|
||||
if str_eq(verb, "geben") { return "gab" }
|
||||
if str_eq(verb, "nehmen") { return "nahm" }
|
||||
if str_eq(verb, "fahren") { return "fuhr" }
|
||||
if str_eq(verb, "laufen") { return "lief" }
|
||||
if str_eq(verb, "schreiben") { return "schrieb" }
|
||||
if str_eq(verb, "bleiben") { return "blieb" }
|
||||
if str_eq(verb, "steigen") { return "stieg" }
|
||||
if str_eq(verb, "lesen") { return "las" }
|
||||
if str_eq(verb, "sprechen") { return "sprach" }
|
||||
if str_eq(verb, "treffen") { return "traf" }
|
||||
if str_eq(verb, "essen") { return "aß" }
|
||||
if str_eq(verb, "trinken") { return "trank" }
|
||||
if str_eq(verb, "finden") { return "fand" }
|
||||
if str_eq(verb, "denken") { return "dachte" }
|
||||
if str_eq(verb, "bringen") { return "brachte" }
|
||||
if str_eq(verb, "stehen") { return "stand" }
|
||||
if str_eq(verb, "liegen") { return "lag" }
|
||||
if str_eq(verb, "sitzen") { return "saß" }
|
||||
if str_eq(verb, "fallen") { return "fiel" }
|
||||
if str_eq(verb, "halten") { return "hielt" }
|
||||
if str_eq(verb, "rufen") { return "rief" }
|
||||
if str_eq(verb, "tragen") { return "trug" }
|
||||
if str_eq(verb, "schlagen") { return "schlug" }
|
||||
if str_eq(verb, "ziehen") { return "zog" }
|
||||
if str_eq(verb, "wachsen") { return "wuchs" }
|
||||
if str_eq(verb, "helfen") { return "half" }
|
||||
if str_eq(verb, "werfen") { return "warf" }
|
||||
return ""
|
||||
}
|
||||
|
||||
// ── Normalization helpers ─────────────────────────────────────────────────────
|
||||
//
|
||||
// The realizer sends long-form labels ("singular", "first").
|
||||
// German morphology uses short forms ("sg", "1"). Normalize on entry.
|
||||
|
||||
fn de_norm_number(number: String) -> String {
|
||||
if str_eq(number, "singular") { return "sg" }
|
||||
if str_eq(number, "plural") { return "pl" }
|
||||
return number
|
||||
}
|
||||
|
||||
fn de_norm_person(person: String) -> String {
|
||||
if str_eq(person, "first") { return "1" }
|
||||
if str_eq(person, "second") { return "2" }
|
||||
if str_eq(person, "third") { return "3" }
|
||||
return person
|
||||
}
|
||||
|
||||
// ── Unified German verb conjugation ──────────────────────────────────────────
|
||||
//
|
||||
// tense: "present" | "past" | "future"
|
||||
// person: "1" | "2" | "3" (also accepts "first" | "second" | "third")
|
||||
// number: "sg" | "pl" (also accepts "singular" | "plural")
|
||||
|
||||
fn de_conjugate(verb: String, tense: String, person: String, number: String) -> String {
|
||||
let number = de_norm_number(number)
|
||||
let person = de_norm_person(person)
|
||||
// Future tense: werden (conjugated) + infinitive
|
||||
if str_eq(tense, "future") {
|
||||
let aux: String = de_irregular_present("werden", person, number)
|
||||
return aux + " " + verb
|
||||
}
|
||||
|
||||
// sein — past is also fully irregular
|
||||
if str_eq(verb, "sein") {
|
||||
if str_eq(tense, "present") {
|
||||
return de_irregular_present("sein", person, number)
|
||||
}
|
||||
// Past (war)
|
||||
if str_eq(number, "sg") {
|
||||
if str_eq(person, "1") { return "war" }
|
||||
if str_eq(person, "2") { return "warst" }
|
||||
return "war"
|
||||
}
|
||||
if str_eq(person, "1") { return "waren" }
|
||||
if str_eq(person, "2") { return "wart" }
|
||||
return "waren"
|
||||
}
|
||||
|
||||
// haben — past: hatte
|
||||
if str_eq(verb, "haben") {
|
||||
if str_eq(tense, "present") {
|
||||
return de_irregular_present("haben", person, number)
|
||||
}
|
||||
if str_eq(number, "sg") {
|
||||
if str_eq(person, "1") { return "hatte" }
|
||||
if str_eq(person, "2") { return "hattest" }
|
||||
return "hatte"
|
||||
}
|
||||
if str_eq(person, "1") { return "hatten" }
|
||||
if str_eq(person, "2") { return "hattet" }
|
||||
return "hatten"
|
||||
}
|
||||
|
||||
// wissen — past: wusste (mixed/irregular)
|
||||
if str_eq(verb, "wissen") {
|
||||
if str_eq(tense, "present") {
|
||||
return de_irregular_present("wissen", person, number)
|
||||
}
|
||||
if str_eq(number, "sg") {
|
||||
if str_eq(person, "1") { return "wusste" }
|
||||
if str_eq(person, "2") { return "wusstest" }
|
||||
return "wusste"
|
||||
}
|
||||
if str_eq(person, "1") { return "wussten" }
|
||||
if str_eq(person, "2") { return "wusstet" }
|
||||
return "wussten"
|
||||
}
|
||||
|
||||
// Modals: können, müssen, wollen — past uses weak -te suffix on preterite stem
|
||||
if str_eq(verb, "können") {
|
||||
if str_eq(tense, "present") {
|
||||
return de_irregular_present("können", person, number)
|
||||
}
|
||||
return de_conjugate_weak("konnt", "past", person, number)
|
||||
}
|
||||
if str_eq(verb, "müssen") {
|
||||
if str_eq(tense, "present") {
|
||||
return de_irregular_present("müssen", person, number)
|
||||
}
|
||||
return de_conjugate_weak("musst", "past", person, number)
|
||||
}
|
||||
if str_eq(verb, "wollen") {
|
||||
if str_eq(tense, "present") {
|
||||
return de_irregular_present("wollen", person, number)
|
||||
}
|
||||
return de_conjugate_weak("wollt", "past", person, number)
|
||||
}
|
||||
|
||||
// Present: try irregular table first
|
||||
if str_eq(tense, "present") {
|
||||
let irr: String = de_irregular_present(verb, person, number)
|
||||
if !str_eq(irr, "") {
|
||||
return irr
|
||||
}
|
||||
// Fall through to weak conjugation using infinitive stem (drop -en)
|
||||
let stem: String = str_drop_last(verb, 2)
|
||||
return de_conjugate_weak(stem, "present", person, number)
|
||||
}
|
||||
|
||||
// Past: try strong Ablaut first
|
||||
if str_eq(tense, "past") {
|
||||
let ps: String = de_strong_past_stem(verb)
|
||||
if !str_eq(ps, "") {
|
||||
// Strong past endings: 1sg/3sg bare, 2sg+st, 1pl/3pl+en, 2pl+t
|
||||
if str_eq(number, "sg") {
|
||||
if str_eq(person, "1") { return ps }
|
||||
if str_eq(person, "2") { return ps + "st" }
|
||||
return ps
|
||||
}
|
||||
if str_eq(person, "1") { return ps + "en" }
|
||||
if str_eq(person, "2") { return ps + "t" }
|
||||
return ps + "en"
|
||||
}
|
||||
// Weak past
|
||||
let stem: String = str_drop_last(verb, 2)
|
||||
return de_conjugate_weak(stem, "past", person, number)
|
||||
}
|
||||
|
||||
// Fallback: return infinitive
|
||||
return verb
|
||||
}
|
||||
@@ -1,13 +0,0 @@
|
||||
// auto-generated by elc --emit-header — do not edit
|
||||
extern fn de_article_def(gender: String, gram_case: String, number: String) -> String
|
||||
extern fn de_article_indef(gender: String, gram_case: String, number: String) -> String
|
||||
extern fn de_article(gender: String, gram_case: String, number: String, definite: String) -> String
|
||||
extern fn de_adj_ending(gender: String, gram_case: String, number: String, article_type: String) -> String
|
||||
extern fn de_noun_plural(noun: String, gender: String) -> String
|
||||
extern fn de_case_ending(noun: String, gender: String, gram_case: String, number: String) -> String
|
||||
extern fn de_conjugate_weak(stem: String, tense: String, person: String, number: String) -> String
|
||||
extern fn de_irregular_present(verb: String, person: String, number: String) -> String
|
||||
extern fn de_strong_past_stem(verb: String) -> String
|
||||
extern fn de_norm_number(number: String) -> String
|
||||
extern fn de_norm_person(person: String) -> String
|
||||
extern fn de_conjugate(verb: String, tense: String, person: String, number: String) -> String
|
||||
@@ -1,571 +0,0 @@
|
||||
// morphology-egy.el - Ancient Egyptian (Middle Egyptian) morphology for the NLG engine.
|
||||
//
|
||||
// Implements Middle Egyptian verb conjugation (sdm=f / sdm.n=f paradigm),
|
||||
// noun number marking, suffix pronouns, and noun phrase assembly.
|
||||
// Designed as a companion to morphology.el; called when language code is "egy".
|
||||
//
|
||||
// Language profile: code=egy, name=Ancient Egyptian, morph_type=agglutinative,
|
||||
// word_order=SVO (Middle Egyptian nominal sentences), question_strategy=particle,
|
||||
// script=hieroglyphic (transliterated here as ASCII), family=afro-asiatic-egyptian.
|
||||
//
|
||||
// Script note: Classical transliteration uses special characters (ꜣ ꜥ ḥ ḫ ẖ š q ṯ ḏ).
|
||||
// This engine uses a safe ASCII mapping:
|
||||
// A = ꜣ (aleph/glottal stop) a = ꜥ (ayin)
|
||||
// H = ḥ (h with dot) x = ḫ (velar fricative)
|
||||
// X = ẖ (emphatic h) sh = š (sh sound)
|
||||
// q = q (emphatic k) T = ṯ (tj sound)
|
||||
// D = ḏ (dj sound)
|
||||
// This mapping keeps all string literals ASCII-safe for the El runtime.
|
||||
//
|
||||
// Grammatical notes (Middle Egyptian, ca. 2000–1300 BCE):
|
||||
// - Aspectual system: Imperfective (sdm=f), Perfective (sdm.n=f), Prospective
|
||||
// - "tense" labels used here: "present" (imperfective), "past" (perfective),
|
||||
// "future" (prospective/sdm.xr=f), "infinitive"
|
||||
// - Two grammatical genders: masculine (unmarked) and feminine (suffix -t)
|
||||
// - Number: singular (unmarked), dual (-wy masc / -ty fem), plural (-w masc / -wt fem)
|
||||
// - No case endings — syntactic role expressed by word order and prepositions
|
||||
// - No definite/indefinite article in Middle Egyptian (Late Egyptian introduced pꜣ/tꜣ/nꜣ)
|
||||
// - Zero copula: adjectival predicates need no verb "to be" ("nfr sw" = "he is good")
|
||||
// - Suffix pronouns attach directly to the verb stem with = (e.g. sdm=f "he hears")
|
||||
//
|
||||
// Persons/numbers covered (suffix pronoun paradigm):
|
||||
// person: "first" | "second" | "third"
|
||||
// gender: "m" | "f" (relevant for 2sg, 3sg; 1sg and plurals often unmarked)
|
||||
// number: "singular" | "dual" | "plural"
|
||||
//
|
||||
// Verbs covered (ASCII transliteration → gloss):
|
||||
// wnn — to be/exist (copular auxiliary)
|
||||
// rdi / di — to give
|
||||
// mAA — to see
|
||||
// Dd — to say
|
||||
// Sm — to go
|
||||
// iri — to do / make
|
||||
// sdm — to hear (the paradigm verb for the sdm=f construction)
|
||||
//
|
||||
// Canonical English → Egyptian mapping:
|
||||
// "be" → wnn / zero copula "give" → rdi
|
||||
// "see" → mAA "say" → Dd
|
||||
// "go" → Sm "do" → iri
|
||||
// "make" → iri "hear" → sdm
|
||||
//
|
||||
// Depends on: morphology.el (str_eq, str_len, str_slice, str_ends_with)
|
||||
|
||||
// ── String helpers ──────────────────────────────────────────────────────────────
|
||||
|
||||
import "morphology.el"
|
||||
fn egy_str_ends(s: String, suf: String) -> Bool {
|
||||
return str_ends_with(s, suf)
|
||||
}
|
||||
|
||||
fn egy_str_len(s: String) -> Int {
|
||||
return str_len(s)
|
||||
}
|
||||
|
||||
fn egy_drop(s: String, n: Int) -> String {
|
||||
let len: Int = str_len(s)
|
||||
if n >= len { return "" }
|
||||
return str_slice(s, 0, len - n)
|
||||
}
|
||||
|
||||
fn egy_last_char(s: String) -> String {
|
||||
let n: Int = str_len(s)
|
||||
if n == 0 { return "" }
|
||||
return str_slice(s, n - 1, n)
|
||||
}
|
||||
|
||||
// ── Person/number slot ──────────────────────────────────────────────────────────
|
||||
//
|
||||
// Maps person × gender × number to a 0-based index used in paradigm tables.
|
||||
// Egyptian suffix pronouns distinguish gender in 2nd and 3rd person singular.
|
||||
//
|
||||
// Slot layout:
|
||||
// 0 = 1sg (=i)
|
||||
// 1 = 2sg masc (=k)
|
||||
// 2 = 2sg fem (=T)
|
||||
// 3 = 3sg masc (=f)
|
||||
// 4 = 3sg fem (=s)
|
||||
// 5 = 1pl (=n)
|
||||
// 6 = 2pl (=Tn)
|
||||
// 7 = 3pl (=sn)
|
||||
// 8 = 1du / 2du / 3du (=sny — simplified; dual pronouns are rare in sources)
|
||||
//
|
||||
// Dual falls through to slot 8 (a single dual pronoun slot for all persons).
|
||||
|
||||
fn egy_slot(person: String, number: String) -> Int {
|
||||
if str_eq(number, "dual") { return 8 }
|
||||
if str_eq(person, "first") {
|
||||
if str_eq(number, "plural") { return 5 }
|
||||
return 0
|
||||
}
|
||||
if str_eq(person, "second") {
|
||||
if str_eq(number, "plural") { return 6 }
|
||||
return 1
|
||||
}
|
||||
// third person
|
||||
if str_eq(number, "plural") { return 7 }
|
||||
return 3
|
||||
}
|
||||
|
||||
// egy_slot_with_gender: slot variant that factors in gender for 2sg and 3sg.
|
||||
|
||||
fn egy_slot_with_gender(person: String, gender: String, number: String) -> Int {
|
||||
if str_eq(number, "dual") { return 8 }
|
||||
if str_eq(person, "first") {
|
||||
if str_eq(number, "plural") { return 5 }
|
||||
return 0
|
||||
}
|
||||
if str_eq(person, "second") {
|
||||
if str_eq(number, "plural") { return 6 }
|
||||
if str_eq(gender, "f") { return 2 }
|
||||
return 1
|
||||
}
|
||||
// third person
|
||||
if str_eq(number, "plural") { return 7 }
|
||||
if str_eq(gender, "f") { return 4 }
|
||||
return 3
|
||||
}
|
||||
|
||||
// ── Suffix pronouns ─────────────────────────────────────────────────────────────
|
||||
//
|
||||
// Egyptian suffix pronouns attach to verbs, nouns, and prepositions.
|
||||
// Written with = before the pronoun in transliteration (e.g. =f = "his / he").
|
||||
//
|
||||
// Standard Middle Egyptian paradigm:
|
||||
// 1sg: =i ("I / me / my")
|
||||
// 2sg m: =k ("you / your" masc)
|
||||
// 2sg f: =T ("you / your" fem, ṯ in classical)
|
||||
// 3sg m: =f ("he / him / his")
|
||||
// 3sg f: =s ("she / her")
|
||||
// 1pl: =n ("we / us / our")
|
||||
// 2pl: =Tn ("you all / your" plural)
|
||||
// 3pl: =sn ("they / them / their")
|
||||
// dual: =sny (simplified dual — rare in Middle Egyptian texts)
|
||||
|
||||
fn egy_conjugate_pronoun(person: String, number: String) -> String {
|
||||
let slot: Int = egy_slot(person, number)
|
||||
if slot == 0 { return "=i" }
|
||||
if slot == 1 { return "=k" }
|
||||
if slot == 5 { return "=n" }
|
||||
if slot == 6 { return "=Tn" }
|
||||
if slot == 7 { return "=sn" }
|
||||
if slot == 8 { return "=sny" }
|
||||
// slots 2–4 need gender; default to masc for slot 3
|
||||
return "=f"
|
||||
}
|
||||
|
||||
fn egy_suffix_pronoun(slot: Int) -> String {
|
||||
if slot == 0 { return "=i" }
|
||||
if slot == 1 { return "=k" }
|
||||
if slot == 2 { return "=T" }
|
||||
if slot == 3 { return "=f" }
|
||||
if slot == 4 { return "=s" }
|
||||
if slot == 5 { return "=n" }
|
||||
if slot == 6 { return "=Tn" }
|
||||
if slot == 7 { return "=sn" }
|
||||
// dual (slot 8)
|
||||
return "=sny"
|
||||
}
|
||||
|
||||
// ── Copula detection ────────────────────────────────────────────────────────────
|
||||
//
|
||||
// In Middle Egyptian the verb "to be" as predicate is often omitted in the
|
||||
// present (zero copula for adjective predicates). The auxiliary wnn is used
|
||||
// for existence / substantive "to be" and in subordinate clauses.
|
||||
// Canonical English label "be" maps to zero copula in the present.
|
||||
|
||||
fn egy_is_copula(verb: String) -> Bool {
|
||||
if str_eq(verb, "wnn") { return true }
|
||||
if str_eq(verb, "be") { return true }
|
||||
return false
|
||||
}
|
||||
|
||||
// ── Copula conjugation ──────────────────────────────────────────────────────────
|
||||
//
|
||||
// Present ("imperfective"): zero copula for adjectival predicate — return "".
|
||||
// The auxiliary iw...wnn is used in certain syntactic environments but the
|
||||
// bare zero is the canonical Middle Egyptian form.
|
||||
// Past ("perfective"): wnn.n (with perfective suffix .n)
|
||||
// Future ("prospective"): wnn.xr (prospective form, simplified)
|
||||
|
||||
fn egy_conjugate_copula(tense: String, slot: Int) -> String {
|
||||
if str_eq(tense, "present") { return "" }
|
||||
if str_eq(tense, "past") {
|
||||
return "wnn.n" + egy_suffix_pronoun(slot)
|
||||
}
|
||||
if str_eq(tense, "future") {
|
||||
return "wnn.xr" + egy_suffix_pronoun(slot)
|
||||
}
|
||||
if str_eq(tense, "infinitive") { return "wnn" }
|
||||
// Default: zero copula
|
||||
return ""
|
||||
}
|
||||
|
||||
// ── Irregular verb: rdi / di (to give) ─────────────────────────────────────────
|
||||
//
|
||||
// rdi is the full form; di is the common abbreviated written form.
|
||||
// Imperfective (present): di=f (3sg m), with full pronoun for other persons.
|
||||
// Perfective (past): di.n=f
|
||||
// Prospective (future): di.xr=f
|
||||
// Infinitive: rdi
|
||||
|
||||
fn egy_rdi_present(slot: Int) -> String {
|
||||
return "di" + egy_suffix_pronoun(slot)
|
||||
}
|
||||
|
||||
fn egy_rdi_past(slot: Int) -> String {
|
||||
return "di.n" + egy_suffix_pronoun(slot)
|
||||
}
|
||||
|
||||
fn egy_rdi_future(slot: Int) -> String {
|
||||
return "di.xr" + egy_suffix_pronoun(slot)
|
||||
}
|
||||
|
||||
// ── Irregular verb: mAA (to see) ───────────────────────────────────────────────
|
||||
//
|
||||
// mAA is a geminated root (m-AA).
|
||||
// Present: mAA=f; Past: mAA.n=f; Future: mAA.xr=f
|
||||
|
||||
fn egy_mAA_present(slot: Int) -> String {
|
||||
return "mAA" + egy_suffix_pronoun(slot)
|
||||
}
|
||||
|
||||
fn egy_mAA_past(slot: Int) -> String {
|
||||
return "mAA.n" + egy_suffix_pronoun(slot)
|
||||
}
|
||||
|
||||
fn egy_mAA_future(slot: Int) -> String {
|
||||
return "mAA.xr" + egy_suffix_pronoun(slot)
|
||||
}
|
||||
|
||||
// ── Irregular verb: Dd (to say) ─────────────────────────────────────────────────
|
||||
//
|
||||
// Present: Dd=f; Past: Dd.n=f; Future: Dd.xr=f
|
||||
// Infinitive: Dd
|
||||
|
||||
fn egy_Dd_present(slot: Int) -> String {
|
||||
return "Dd" + egy_suffix_pronoun(slot)
|
||||
}
|
||||
|
||||
fn egy_Dd_past(slot: Int) -> String {
|
||||
return "Dd.n" + egy_suffix_pronoun(slot)
|
||||
}
|
||||
|
||||
fn egy_Dd_future(slot: Int) -> String {
|
||||
return "Dd.xr" + egy_suffix_pronoun(slot)
|
||||
}
|
||||
|
||||
// ── Irregular verb: Sm (to go) ─────────────────────────────────────────────────
|
||||
//
|
||||
// Present: Sm=f; Past: Sm.n=f; Future: Sm.xr=f
|
||||
// (Note: the verb Smt "to go" appears in texts; Sm is the most common short form.)
|
||||
|
||||
fn egy_Sm_present(slot: Int) -> String {
|
||||
return "Sm" + egy_suffix_pronoun(slot)
|
||||
}
|
||||
|
||||
fn egy_Sm_past(slot: Int) -> String {
|
||||
return "Sm.n" + egy_suffix_pronoun(slot)
|
||||
}
|
||||
|
||||
fn egy_Sm_future(slot: Int) -> String {
|
||||
return "Sm.xr" + egy_suffix_pronoun(slot)
|
||||
}
|
||||
|
||||
// ── Irregular verb: iri (to do / make) ─────────────────────────────────────────
|
||||
//
|
||||
// iri has a contracted 3-radical stem ir- before pronouns.
|
||||
// Present: ir=f; Past: ir.n=f; Future: ir.xr=f
|
||||
// Infinitive: iri
|
||||
|
||||
fn egy_iri_present(slot: Int) -> String {
|
||||
return "ir" + egy_suffix_pronoun(slot)
|
||||
}
|
||||
|
||||
fn egy_iri_past(slot: Int) -> String {
|
||||
return "ir.n" + egy_suffix_pronoun(slot)
|
||||
}
|
||||
|
||||
fn egy_iri_future(slot: Int) -> String {
|
||||
return "ir.xr" + egy_suffix_pronoun(slot)
|
||||
}
|
||||
|
||||
// ── Regular verb: sdm (to hear) ────────────────────────────────────────────────
|
||||
//
|
||||
// sdm (to hear) is the paradigm verb used in grammar textbooks to illustrate
|
||||
// all Egyptian verb forms. The sdm=f construction names the imperfective suffix
|
||||
// verb pattern itself.
|
||||
// Present: sdm=f; Past: sdm.n=f; Future: sdm.xr=f
|
||||
// Infinitive: sdm
|
||||
|
||||
fn egy_sdm_present(slot: Int) -> String {
|
||||
return "sdm" + egy_suffix_pronoun(slot)
|
||||
}
|
||||
|
||||
fn egy_sdm_past(slot: Int) -> String {
|
||||
return "sdm.n" + egy_suffix_pronoun(slot)
|
||||
}
|
||||
|
||||
fn egy_sdm_future(slot: Int) -> String {
|
||||
return "sdm.xr" + egy_suffix_pronoun(slot)
|
||||
}
|
||||
|
||||
// ── Known-verb dispatcher ───────────────────────────────────────────────────────
|
||||
//
|
||||
// Returns the inflected form for a known verb, or "" if unknown.
|
||||
// Accepts both canonical English labels and Egyptian transliterations.
|
||||
|
||||
fn egy_known_verb(verb: String, tense: String, slot: Int) -> String {
|
||||
// ── rdi / di — to give ───────────────────────────────────────────────────────
|
||||
if str_eq(verb, "rdi") {
|
||||
if str_eq(tense, "present") { return egy_rdi_present(slot) }
|
||||
if str_eq(tense, "past") { return egy_rdi_past(slot) }
|
||||
if str_eq(tense, "future") { return egy_rdi_future(slot) }
|
||||
if str_eq(tense, "infinitive") { return "rdi" }
|
||||
return egy_rdi_present(slot)
|
||||
}
|
||||
if str_eq(verb, "di") {
|
||||
if str_eq(tense, "present") { return egy_rdi_present(slot) }
|
||||
if str_eq(tense, "past") { return egy_rdi_past(slot) }
|
||||
if str_eq(tense, "future") { return egy_rdi_future(slot) }
|
||||
if str_eq(tense, "infinitive") { return "rdi" }
|
||||
return egy_rdi_present(slot)
|
||||
}
|
||||
if str_eq(verb, "give") {
|
||||
if str_eq(tense, "present") { return egy_rdi_present(slot) }
|
||||
if str_eq(tense, "past") { return egy_rdi_past(slot) }
|
||||
if str_eq(tense, "future") { return egy_rdi_future(slot) }
|
||||
if str_eq(tense, "infinitive") { return "rdi" }
|
||||
return egy_rdi_present(slot)
|
||||
}
|
||||
|
||||
// ── mAA — to see ─────────────────────────────────────────────────────────────
|
||||
if str_eq(verb, "mAA") {
|
||||
if str_eq(tense, "present") { return egy_mAA_present(slot) }
|
||||
if str_eq(tense, "past") { return egy_mAA_past(slot) }
|
||||
if str_eq(tense, "future") { return egy_mAA_future(slot) }
|
||||
if str_eq(tense, "infinitive") { return "mAA" }
|
||||
return egy_mAA_present(slot)
|
||||
}
|
||||
if str_eq(verb, "see") {
|
||||
if str_eq(tense, "present") { return egy_mAA_present(slot) }
|
||||
if str_eq(tense, "past") { return egy_mAA_past(slot) }
|
||||
if str_eq(tense, "future") { return egy_mAA_future(slot) }
|
||||
if str_eq(tense, "infinitive") { return "mAA" }
|
||||
return egy_mAA_present(slot)
|
||||
}
|
||||
|
||||
// ── Dd — to say ──────────────────────────────────────────────────────────────
|
||||
if str_eq(verb, "Dd") {
|
||||
if str_eq(tense, "present") { return egy_Dd_present(slot) }
|
||||
if str_eq(tense, "past") { return egy_Dd_past(slot) }
|
||||
if str_eq(tense, "future") { return egy_Dd_future(slot) }
|
||||
if str_eq(tense, "infinitive") { return "Dd" }
|
||||
return egy_Dd_present(slot)
|
||||
}
|
||||
if str_eq(verb, "say") {
|
||||
if str_eq(tense, "present") { return egy_Dd_present(slot) }
|
||||
if str_eq(tense, "past") { return egy_Dd_past(slot) }
|
||||
if str_eq(tense, "future") { return egy_Dd_future(slot) }
|
||||
if str_eq(tense, "infinitive") { return "Dd" }
|
||||
return egy_Dd_present(slot)
|
||||
}
|
||||
|
||||
// ── Sm — to go ───────────────────────────────────────────────────────────────
|
||||
if str_eq(verb, "Sm") {
|
||||
if str_eq(tense, "present") { return egy_Sm_present(slot) }
|
||||
if str_eq(tense, "past") { return egy_Sm_past(slot) }
|
||||
if str_eq(tense, "future") { return egy_Sm_future(slot) }
|
||||
if str_eq(tense, "infinitive") { return "Sm" }
|
||||
return egy_Sm_present(slot)
|
||||
}
|
||||
if str_eq(verb, "go") {
|
||||
if str_eq(tense, "present") { return egy_Sm_present(slot) }
|
||||
if str_eq(tense, "past") { return egy_Sm_past(slot) }
|
||||
if str_eq(tense, "future") { return egy_Sm_future(slot) }
|
||||
if str_eq(tense, "infinitive") { return "Sm" }
|
||||
return egy_Sm_present(slot)
|
||||
}
|
||||
|
||||
// ── iri — to do / make ───────────────────────────────────────────────────────
|
||||
if str_eq(verb, "iri") {
|
||||
if str_eq(tense, "present") { return egy_iri_present(slot) }
|
||||
if str_eq(tense, "past") { return egy_iri_past(slot) }
|
||||
if str_eq(tense, "future") { return egy_iri_future(slot) }
|
||||
if str_eq(tense, "infinitive") { return "iri" }
|
||||
return egy_iri_present(slot)
|
||||
}
|
||||
if str_eq(verb, "do") {
|
||||
if str_eq(tense, "present") { return egy_iri_present(slot) }
|
||||
if str_eq(tense, "past") { return egy_iri_past(slot) }
|
||||
if str_eq(tense, "future") { return egy_iri_future(slot) }
|
||||
if str_eq(tense, "infinitive") { return "iri" }
|
||||
return egy_iri_present(slot)
|
||||
}
|
||||
if str_eq(verb, "make") {
|
||||
if str_eq(tense, "present") { return egy_iri_present(slot) }
|
||||
if str_eq(tense, "past") { return egy_iri_past(slot) }
|
||||
if str_eq(tense, "future") { return egy_iri_future(slot) }
|
||||
if str_eq(tense, "infinitive") { return "iri" }
|
||||
return egy_iri_present(slot)
|
||||
}
|
||||
|
||||
// ── sdm — to hear ────────────────────────────────────────────────────────────
|
||||
if str_eq(verb, "sdm") {
|
||||
if str_eq(tense, "present") { return egy_sdm_present(slot) }
|
||||
if str_eq(tense, "past") { return egy_sdm_past(slot) }
|
||||
if str_eq(tense, "future") { return egy_sdm_future(slot) }
|
||||
if str_eq(tense, "infinitive") { return "sdm" }
|
||||
return egy_sdm_present(slot)
|
||||
}
|
||||
if str_eq(verb, "hear") {
|
||||
if str_eq(tense, "present") { return egy_sdm_present(slot) }
|
||||
if str_eq(tense, "past") { return egy_sdm_past(slot) }
|
||||
if str_eq(tense, "future") { return egy_sdm_future(slot) }
|
||||
if str_eq(tense, "infinitive") { return "sdm" }
|
||||
return egy_sdm_present(slot)
|
||||
}
|
||||
|
||||
// Verb not in table
|
||||
return ""
|
||||
}
|
||||
|
||||
// ── Regular verb conjugation ────────────────────────────────────────────────────
|
||||
//
|
||||
// For verbs not in the explicit table, apply the productive suffix-verb pattern:
|
||||
// Present (imperfective sdm=f): stem + pronoun suffix
|
||||
// Past (perfective sdm.n=f): stem + ".n" + pronoun suffix
|
||||
// Future (prospective sdm.xr=f): stem + ".xr" + pronoun suffix
|
||||
// Infinitive: stem unchanged
|
||||
//
|
||||
// This covers the vast majority of strong (sound) verb roots.
|
||||
|
||||
fn egy_regular_present(stem: String, slot: Int) -> String {
|
||||
return stem + egy_suffix_pronoun(slot)
|
||||
}
|
||||
|
||||
fn egy_regular_past(stem: String, slot: Int) -> String {
|
||||
return stem + ".n" + egy_suffix_pronoun(slot)
|
||||
}
|
||||
|
||||
fn egy_regular_future(stem: String, slot: Int) -> String {
|
||||
return stem + ".xr" + egy_suffix_pronoun(slot)
|
||||
}
|
||||
|
||||
// ── egy_conjugate: main conjugation entry point ─────────────────────────────────
|
||||
//
|
||||
// verb: Egyptian verb (ASCII transliteration) or English canonical label
|
||||
// tense: "present" | "past" | "future" | "infinitive"
|
||||
// person: "first" | "second" | "third"
|
||||
// number: "singular" | "dual" | "plural"
|
||||
//
|
||||
// Returns:
|
||||
// - "" for present copula (zero copula — caller omits the verb)
|
||||
// - inflected form (stem + .n + suffix, etc.) for all other cases
|
||||
// - verb + regular suffix for unknown verbs (productive fallback)
|
||||
|
||||
fn egy_conjugate(verb: String, tense: String, person: String, number: String) -> String {
|
||||
let slot: Int = egy_slot(person, number)
|
||||
|
||||
// Handle copula (wnn / "be")
|
||||
if egy_is_copula(verb) {
|
||||
return egy_conjugate_copula(tense, slot)
|
||||
}
|
||||
|
||||
// Try the known-verb table
|
||||
let known: String = egy_known_verb(verb, tense, slot)
|
||||
if !str_eq(known, "") {
|
||||
return known
|
||||
}
|
||||
|
||||
// Infinitive: return unchanged
|
||||
if str_eq(tense, "infinitive") { return verb }
|
||||
|
||||
// Regular verb: apply productive sdm=f / sdm.n=f pattern
|
||||
if str_eq(tense, "present") { return egy_regular_present(verb, slot) }
|
||||
if str_eq(tense, "past") { return egy_regular_past(verb, slot) }
|
||||
if str_eq(tense, "future") { return egy_regular_future(verb, slot) }
|
||||
|
||||
// Unknown tense: return verb unchanged as safe fallback
|
||||
return verb
|
||||
}
|
||||
|
||||
// ── Noun number marking ─────────────────────────────────────────────────────────
|
||||
//
|
||||
// Middle Egyptian nouns are invariant for case — syntactic role is expressed by
|
||||
// word order and prepositions, not noun endings. Number is marked by suffix:
|
||||
//
|
||||
// Singular: base form (no suffix)
|
||||
// Dual: masc + wy / fem + ty (wy and ty in ASCII transliteration)
|
||||
// Plural: masc + w / fem + wt
|
||||
//
|
||||
// Many common nouns have suppletive or irregular plurals (recorded in the
|
||||
// vocabulary layer). This function implements the productive regular pattern.
|
||||
//
|
||||
// gram_case: accepted for API symmetry but has no effect (Egyptian is caseless).
|
||||
|
||||
fn egy_decline(noun: String, gram_case: String, number: String) -> String {
|
||||
if str_eq(number, "singular") { return noun }
|
||||
if str_eq(number, "dual") {
|
||||
// Feminine dual: if noun ends in t (feminine marker), replace with ty
|
||||
if egy_str_ends(noun, "t") {
|
||||
let stem: String = egy_drop(noun, 1)
|
||||
return stem + "ty"
|
||||
}
|
||||
return noun + "wy"
|
||||
}
|
||||
// Plural
|
||||
if egy_str_ends(noun, "t") {
|
||||
// Feminine noun: add wt
|
||||
return noun + "wt"
|
||||
}
|
||||
// Masculine noun: add w
|
||||
return noun + "w"
|
||||
}
|
||||
|
||||
// ── Feminine derivation ─────────────────────────────────────────────────────────
|
||||
//
|
||||
// egy_fem: derive the feminine form of a noun or adjective by appending -t.
|
||||
//
|
||||
// In Middle Egyptian, the feminine gender marker is the suffix -t (written with
|
||||
// the bread-loaf hieroglyph, Gardiner X1). If the base already ends in -t the
|
||||
// form is returned unchanged to avoid double-suffixing.
|
||||
|
||||
fn egy_fem(noun: String) -> String {
|
||||
if egy_str_ends(noun, "t") { return noun }
|
||||
return noun + "t"
|
||||
}
|
||||
|
||||
// ── Noun phrase assembly ────────────────────────────────────────────────────────
|
||||
//
|
||||
// egy_noun_phrase: return the surface form of a noun phrase.
|
||||
//
|
||||
// noun: base noun (ASCII transliteration)
|
||||
// gram_case: passed for API symmetry; has no effect (Egyptian is caseless)
|
||||
// number: "singular" | "dual" | "plural"
|
||||
// definite: "true" | "false" — Middle Egyptian has no article; parameter accepted
|
||||
// for API symmetry. Late Egyptian pꜣ/tꜣ/nꜣ articles are not implemented
|
||||
// here (they would require knowing the gender of each noun).
|
||||
//
|
||||
// Returns the noun in its correct number form.
|
||||
|
||||
fn egy_noun_phrase(noun: String, gram_case: String, number: String, definite: String) -> String {
|
||||
return egy_decline(noun, gram_case, number)
|
||||
}
|
||||
|
||||
// ── Canonical verb mapping ──────────────────────────────────────────────────────
|
||||
//
|
||||
// egy_map_canonical: map cross-lingual English canonical verb labels to their
|
||||
// Middle Egyptian equivalents before dispatching to egy_conjugate.
|
||||
|
||||
fn egy_map_canonical(verb: String) -> String {
|
||||
if str_eq(verb, "be") { return "wnn" }
|
||||
if str_eq(verb, "give") { return "rdi" }
|
||||
if str_eq(verb, "see") { return "mAA" }
|
||||
if str_eq(verb, "say") { return "Dd" }
|
||||
if str_eq(verb, "go") { return "Sm" }
|
||||
if str_eq(verb, "do") { return "iri" }
|
||||
if str_eq(verb, "make") { return "iri" }
|
||||
if str_eq(verb, "hear") { return "sdm" }
|
||||
// Unknown: return as-is; egy_conjugate will apply the regular pattern
|
||||
return verb
|
||||
}
|
||||
@@ -1,38 +0,0 @@
|
||||
// auto-generated by elc --emit-header — do not edit
|
||||
extern fn egy_str_ends(s: String, suf: String) -> Bool
|
||||
extern fn egy_str_len(s: String) -> Int
|
||||
extern fn egy_drop(s: String, n: Int) -> String
|
||||
extern fn egy_last_char(s: String) -> String
|
||||
extern fn egy_slot(person: String, number: String) -> Int
|
||||
extern fn egy_slot_with_gender(person: String, gender: String, number: String) -> Int
|
||||
extern fn egy_conjugate_pronoun(person: String, number: String) -> String
|
||||
extern fn egy_suffix_pronoun(slot: Int) -> String
|
||||
extern fn egy_is_copula(verb: String) -> Bool
|
||||
extern fn egy_conjugate_copula(tense: String, slot: Int) -> String
|
||||
extern fn egy_rdi_present(slot: Int) -> String
|
||||
extern fn egy_rdi_past(slot: Int) -> String
|
||||
extern fn egy_rdi_future(slot: Int) -> String
|
||||
extern fn egy_mAA_present(slot: Int) -> String
|
||||
extern fn egy_mAA_past(slot: Int) -> String
|
||||
extern fn egy_mAA_future(slot: Int) -> String
|
||||
extern fn egy_Dd_present(slot: Int) -> String
|
||||
extern fn egy_Dd_past(slot: Int) -> String
|
||||
extern fn egy_Dd_future(slot: Int) -> String
|
||||
extern fn egy_Sm_present(slot: Int) -> String
|
||||
extern fn egy_Sm_past(slot: Int) -> String
|
||||
extern fn egy_Sm_future(slot: Int) -> String
|
||||
extern fn egy_iri_present(slot: Int) -> String
|
||||
extern fn egy_iri_past(slot: Int) -> String
|
||||
extern fn egy_iri_future(slot: Int) -> String
|
||||
extern fn egy_sdm_present(slot: Int) -> String
|
||||
extern fn egy_sdm_past(slot: Int) -> String
|
||||
extern fn egy_sdm_future(slot: Int) -> String
|
||||
extern fn egy_known_verb(verb: String, tense: String, slot: Int) -> String
|
||||
extern fn egy_regular_present(stem: String, slot: Int) -> String
|
||||
extern fn egy_regular_past(stem: String, slot: Int) -> String
|
||||
extern fn egy_regular_future(stem: String, slot: Int) -> String
|
||||
extern fn egy_conjugate(verb: String, tense: String, person: String, number: String) -> String
|
||||
extern fn egy_decline(noun: String, gram_case: String, number: String) -> String
|
||||
extern fn egy_fem(noun: String) -> String
|
||||
extern fn egy_noun_phrase(noun: String, gram_case: String, number: String, definite: String) -> String
|
||||
extern fn egy_map_canonical(verb: String) -> String
|
||||
@@ -1,451 +0,0 @@
|
||||
// morphology-enm.el - Middle English morphology for the NLG engine.
|
||||
//
|
||||
// Implements Middle English verb conjugation and noun declension for the
|
||||
// ca. 1100-1500 CE period (Chaucerian English). Designed as a companion to
|
||||
// morphology.el and called by the engine when the language profile code is "enm".
|
||||
//
|
||||
// Language profile: code=enm, name=Middle English, morph_type=analytic,
|
||||
// word_order=SVO, question_strategy=inversion, script=latin, family=germanic.
|
||||
//
|
||||
// Verb conjugation covered:
|
||||
// Tenses: present, past
|
||||
// Persons: first/second/third x singular/plural (slots 0-5)
|
||||
// Classes: weak (productive: -est 2sg, -eth 3sg, -en pl; past: -ede/-de/-te)
|
||||
// Irregulars: been/ben (be), han/haven (have), goon (go), seen (see),
|
||||
// seyn/seyen (say), comen (come), maken (make)
|
||||
// Canonical map: "be" -> "been"
|
||||
//
|
||||
// Noun declension covered:
|
||||
// Middle English has largely lost case endings. This module handles:
|
||||
// - nominative (base form)
|
||||
// - genitive singular (+es)
|
||||
// - plural (+es default; irregular forms for common words)
|
||||
// Common irregulars: man->men, child->children, ox->oxen, foot->feet,
|
||||
// tooth->teeth
|
||||
//
|
||||
// Article formation:
|
||||
// Definite: "the" prepended
|
||||
// Indefinite: "a" or "an" based on the first letter of the noun phrase
|
||||
//
|
||||
// Depends on: morphology.el (str_ends_with, str_len, str_slice, str_eq)
|
||||
|
||||
// ── String helpers ─────────────────────────────────────────────────────────────
|
||||
|
||||
import "morphology.el"
|
||||
fn enm_str_ends(s: String, suf: String) -> Bool {
|
||||
return str_ends_with(s, suf)
|
||||
}
|
||||
|
||||
fn enm_drop(s: String, n: Int) -> String {
|
||||
let len: Int = str_len(s)
|
||||
if n >= len { return "" }
|
||||
return str_slice(s, 0, len - n)
|
||||
}
|
||||
|
||||
fn enm_first_char(s: String) -> String {
|
||||
if str_len(s) == 0 { return "" }
|
||||
return str_slice(s, 0, 1)
|
||||
}
|
||||
|
||||
// ── Person/number slot ─────────────────────────────────────────────────────────
|
||||
//
|
||||
// Maps person x number to a 0-based paradigm slot.
|
||||
// 0 = 1st singular (I / ich)
|
||||
// 1 = 2nd singular (thou)
|
||||
// 2 = 3rd singular (he / she / it)
|
||||
// 3 = 1st plural (we)
|
||||
// 4 = 2nd plural (ye)
|
||||
// 5 = 3rd plural (they)
|
||||
|
||||
fn enm_slot(person: String, number: String) -> Int {
|
||||
if str_eq(person, "first") {
|
||||
if str_eq(number, "singular") { return 0 }
|
||||
return 3
|
||||
}
|
||||
if str_eq(person, "second") {
|
||||
if str_eq(number, "singular") { return 1 }
|
||||
return 4
|
||||
}
|
||||
// third
|
||||
if str_eq(number, "singular") { return 2 }
|
||||
return 5
|
||||
}
|
||||
|
||||
// ── Irregular verb tables ──────────────────────────────────────────────────────
|
||||
//
|
||||
// Each irregular verb has a present and past paradigm of six forms (slots 0-5).
|
||||
// Forms are in Middle English spelling, close to Chaucerian usage.
|
||||
|
||||
fn enm_been_present(slot: Int) -> String {
|
||||
if slot == 0 { return "am" }
|
||||
if slot == 1 { return "art" }
|
||||
if slot == 2 { return "is" }
|
||||
if slot == 3 { return "aren" }
|
||||
if slot == 4 { return "been" }
|
||||
return "been"
|
||||
}
|
||||
|
||||
fn enm_been_past(slot: Int) -> String {
|
||||
if slot == 0 { return "was" }
|
||||
if slot == 1 { return "were" }
|
||||
if slot == 2 { return "was" }
|
||||
if slot == 3 { return "were" }
|
||||
if slot == 4 { return "were" }
|
||||
return "were"
|
||||
}
|
||||
|
||||
fn enm_haven_present(slot: Int) -> String {
|
||||
if slot == 0 { return "have" }
|
||||
if slot == 1 { return "hast" }
|
||||
if slot == 2 { return "hath" }
|
||||
if slot == 3 { return "have" }
|
||||
if slot == 4 { return "have" }
|
||||
return "have"
|
||||
}
|
||||
|
||||
fn enm_haven_past(slot: Int) -> String {
|
||||
if slot == 0 { return "hadde" }
|
||||
if slot == 1 { return "haddest" }
|
||||
if slot == 2 { return "hadde" }
|
||||
if slot == 3 { return "hadden" }
|
||||
if slot == 4 { return "hadden" }
|
||||
return "hadden"
|
||||
}
|
||||
|
||||
fn enm_goon_present(slot: Int) -> String {
|
||||
if slot == 0 { return "go" }
|
||||
if slot == 1 { return "goost" }
|
||||
if slot == 2 { return "gooth" }
|
||||
if slot == 3 { return "goon" }
|
||||
if slot == 4 { return "goon" }
|
||||
return "goon"
|
||||
}
|
||||
|
||||
fn enm_goon_past(slot: Int) -> String {
|
||||
if slot == 0 { return "wente" }
|
||||
if slot == 1 { return "wentest" }
|
||||
if slot == 2 { return "wente" }
|
||||
if slot == 3 { return "wenten" }
|
||||
if slot == 4 { return "wenten" }
|
||||
return "wenten"
|
||||
}
|
||||
|
||||
fn enm_seen_present(slot: Int) -> String {
|
||||
if slot == 0 { return "see" }
|
||||
if slot == 1 { return "seest" }
|
||||
if slot == 2 { return "seeth" }
|
||||
if slot == 3 { return "seen" }
|
||||
if slot == 4 { return "seen" }
|
||||
return "seen"
|
||||
}
|
||||
|
||||
fn enm_seen_past(slot: Int) -> String {
|
||||
if slot == 0 { return "saugh" }
|
||||
if slot == 1 { return "sawest" }
|
||||
if slot == 2 { return "saugh" }
|
||||
if slot == 3 { return "sawen" }
|
||||
if slot == 4 { return "sawen" }
|
||||
return "sawen"
|
||||
}
|
||||
|
||||
fn enm_seyen_present(slot: Int) -> String {
|
||||
if slot == 0 { return "seye" }
|
||||
if slot == 1 { return "seyst" }
|
||||
if slot == 2 { return "seith" }
|
||||
if slot == 3 { return "seyen" }
|
||||
if slot == 4 { return "seyen" }
|
||||
return "seyen"
|
||||
}
|
||||
|
||||
fn enm_seyen_past(slot: Int) -> String {
|
||||
if slot == 0 { return "seide" }
|
||||
if slot == 1 { return "seidest" }
|
||||
if slot == 2 { return "seide" }
|
||||
if slot == 3 { return "seiden" }
|
||||
if slot == 4 { return "seiden" }
|
||||
return "seiden"
|
||||
}
|
||||
|
||||
fn enm_comen_present(slot: Int) -> String {
|
||||
if slot == 0 { return "come" }
|
||||
if slot == 1 { return "comest" }
|
||||
if slot == 2 { return "cometh" }
|
||||
if slot == 3 { return "comen" }
|
||||
if slot == 4 { return "comen" }
|
||||
return "comen"
|
||||
}
|
||||
|
||||
fn enm_comen_past(slot: Int) -> String {
|
||||
if slot == 0 { return "cam" }
|
||||
if slot == 1 { return "come" }
|
||||
if slot == 2 { return "cam" }
|
||||
if slot == 3 { return "comen" }
|
||||
if slot == 4 { return "comen" }
|
||||
return "comen"
|
||||
}
|
||||
|
||||
fn enm_maken_present(slot: Int) -> String {
|
||||
if slot == 0 { return "make" }
|
||||
if slot == 1 { return "makest" }
|
||||
if slot == 2 { return "maketh" }
|
||||
if slot == 3 { return "maken" }
|
||||
if slot == 4 { return "maken" }
|
||||
return "maken"
|
||||
}
|
||||
|
||||
fn enm_maken_past(slot: Int) -> String {
|
||||
if slot == 0 { return "made" }
|
||||
if slot == 1 { return "madest" }
|
||||
if slot == 2 { return "made" }
|
||||
if slot == 3 { return "maden" }
|
||||
if slot == 4 { return "maden" }
|
||||
return "maden"
|
||||
}
|
||||
|
||||
// ── Canonical verb mapping ─────────────────────────────────────────────────────
|
||||
//
|
||||
// Maps English semantic labels to Middle English infinitives so the semantic
|
||||
// layer can request forms without knowing the target-language lexeme.
|
||||
|
||||
fn enm_map_canonical(verb: String) -> String {
|
||||
if str_eq(verb, "be") { return "been" }
|
||||
if str_eq(verb, "have") { return "haven" }
|
||||
if str_eq(verb, "go") { return "goon" }
|
||||
if str_eq(verb, "see") { return "seen" }
|
||||
if str_eq(verb, "say") { return "seyen" }
|
||||
if str_eq(verb, "come") { return "comen" }
|
||||
if str_eq(verb, "make") { return "maken" }
|
||||
return verb
|
||||
}
|
||||
|
||||
// ── Weak verb stem derivation ──────────────────────────────────────────────────
|
||||
//
|
||||
// For weak verbs the present stem is the infinitive minus any trailing -en or -e.
|
||||
// Past tense suffix: most common is -ede (after unvoiced consonants often -te,
|
||||
// after voiced -de). This module uses -ede as the default productive past suffix.
|
||||
//
|
||||
// Present:
|
||||
// slot 0: stem (I love)
|
||||
// slot 1: stem + est (thou lovest)
|
||||
// slot 2: stem + eth (he loveth)
|
||||
// slot 3: stem + en (we loven)
|
||||
// slot 4: stem + en (ye loven)
|
||||
// slot 5: stem + en (they loven)
|
||||
//
|
||||
// Past:
|
||||
// slot 0: stem + ede (I lovede)
|
||||
// slot 1: stem + edest (thou lovedest)
|
||||
// slot 2: stem + ede (he lovede)
|
||||
// slot 3..5: stem + eden (we loveden)
|
||||
|
||||
fn enm_weak_stem(verb: String) -> String {
|
||||
if enm_str_ends(verb, "en") { return enm_drop(verb, 2) }
|
||||
if enm_str_ends(verb, "e") { return enm_drop(verb, 1) }
|
||||
return verb
|
||||
}
|
||||
|
||||
fn enm_weak_present(stem: String, slot: Int) -> String {
|
||||
if slot == 0 { return stem + "e" }
|
||||
if slot == 1 { return stem + "est" }
|
||||
if slot == 2 { return stem + "eth" }
|
||||
if slot == 3 { return stem + "en" }
|
||||
if slot == 4 { return stem + "en" }
|
||||
return stem + "en"
|
||||
}
|
||||
|
||||
fn enm_weak_past(stem: String, slot: Int) -> String {
|
||||
if slot == 0 { return stem + "ede" }
|
||||
if slot == 1 { return stem + "edest" }
|
||||
if slot == 2 { return stem + "ede" }
|
||||
if slot == 3 { return stem + "eden" }
|
||||
if slot == 4 { return stem + "eden" }
|
||||
return stem + "eden"
|
||||
}
|
||||
|
||||
// ── enm_conjugate: main conjugation entry point ───────────────────────────────
|
||||
//
|
||||
// verb: Middle English infinitive (e.g. "loven", "been") or English canonical
|
||||
// tense: "present" | "past"
|
||||
// person: "first" | "second" | "third"
|
||||
// number: "singular" | "plural"
|
||||
//
|
||||
// Returns the inflected form. Unknown tenses fall back to the infinitive rather
|
||||
// than crashing.
|
||||
|
||||
fn enm_conjugate(verb: String, tense: String, person: String, number: String) -> String {
|
||||
let v: String = enm_map_canonical(verb)
|
||||
let slot: Int = enm_slot(person, number)
|
||||
|
||||
// ── Irregulars ────────────────────────────────────────────────────────────
|
||||
|
||||
if str_eq(v, "been") {
|
||||
if str_eq(tense, "present") { return enm_been_present(slot) }
|
||||
if str_eq(tense, "past") { return enm_been_past(slot) }
|
||||
return v
|
||||
}
|
||||
|
||||
if str_eq(v, "haven") {
|
||||
if str_eq(tense, "present") { return enm_haven_present(slot) }
|
||||
if str_eq(tense, "past") { return enm_haven_past(slot) }
|
||||
return v
|
||||
}
|
||||
|
||||
if str_eq(v, "goon") {
|
||||
if str_eq(tense, "present") { return enm_goon_present(slot) }
|
||||
if str_eq(tense, "past") { return enm_goon_past(slot) }
|
||||
return v
|
||||
}
|
||||
|
||||
if str_eq(v, "seen") {
|
||||
if str_eq(tense, "present") { return enm_seen_present(slot) }
|
||||
if str_eq(tense, "past") { return enm_seen_past(slot) }
|
||||
return v
|
||||
}
|
||||
|
||||
if str_eq(v, "seyen") {
|
||||
if str_eq(tense, "present") { return enm_seyen_present(slot) }
|
||||
if str_eq(tense, "past") { return enm_seyen_past(slot) }
|
||||
return v
|
||||
}
|
||||
|
||||
if str_eq(v, "comen") {
|
||||
if str_eq(tense, "present") { return enm_comen_present(slot) }
|
||||
if str_eq(tense, "past") { return enm_comen_past(slot) }
|
||||
return v
|
||||
}
|
||||
|
||||
if str_eq(v, "maken") {
|
||||
if str_eq(tense, "present") { return enm_maken_present(slot) }
|
||||
if str_eq(tense, "past") { return enm_maken_past(slot) }
|
||||
return v
|
||||
}
|
||||
|
||||
// ── Regular weak verb ─────────────────────────────────────────────────────
|
||||
|
||||
let stem: String = enm_weak_stem(v)
|
||||
if str_eq(tense, "present") { return enm_weak_present(stem, slot) }
|
||||
if str_eq(tense, "past") { return enm_weak_past(stem, slot) }
|
||||
|
||||
// Unknown tense: return infinitive unchanged
|
||||
return v
|
||||
}
|
||||
|
||||
// ── Noun plural irregulars ─────────────────────────────────────────────────────
|
||||
//
|
||||
// Returns the suppletive plural form for nouns with non-productive plurals, or ""
|
||||
// if the noun takes the regular -es plural.
|
||||
//
|
||||
// Covered: man, woman, child, ox, foot, tooth, goose, mouse, louse
|
||||
// These mirror patterns still visible in Modern English, present in ME too.
|
||||
|
||||
fn enm_irregular_plural(noun: String) -> String {
|
||||
if str_eq(noun, "man") { return "men" }
|
||||
if str_eq(noun, "woman") { return "wommen" }
|
||||
if str_eq(noun, "child") { return "children" }
|
||||
if str_eq(noun, "ox") { return "oxen" }
|
||||
if str_eq(noun, "foot") { return "feet" }
|
||||
if str_eq(noun, "tooth") { return "teeth" }
|
||||
if str_eq(noun, "goose") { return "gees" }
|
||||
if str_eq(noun, "mouse") { return "mees" }
|
||||
if str_eq(noun, "louse") { return "lees" }
|
||||
return ""
|
||||
}
|
||||
|
||||
// ── Regular plural formation ───────────────────────────────────────────────────
|
||||
//
|
||||
// Default: append -es. For nouns already ending in -e, append just -s.
|
||||
// For nouns ending in -s, -x, -sh, -ch: the -es is still appropriate but
|
||||
// in ME spelling the forms vary; we use the simple +es rule uniformly.
|
||||
|
||||
fn enm_make_plural(noun: String) -> String {
|
||||
// Check suppletive irregular first
|
||||
let irreg: String = enm_irregular_plural(noun)
|
||||
if !str_eq(irreg, "") { return irreg }
|
||||
|
||||
// Noun ends in -e: just add -s to avoid double vowel
|
||||
if enm_str_ends(noun, "e") { return noun + "s" }
|
||||
|
||||
// Default: +es
|
||||
return noun + "es"
|
||||
}
|
||||
|
||||
// ── enm_decline: main declension entry point ──────────────────────────────────
|
||||
//
|
||||
// Middle English has largely lost case morphology. This function handles the
|
||||
// three practically relevant categories:
|
||||
// nominative — base form (used also for accusative and dative)
|
||||
// genitive — base form + es (possessive)
|
||||
// plural — irregular or base + es
|
||||
//
|
||||
// noun: base nominative form (e.g. "knyght", "man", "lond")
|
||||
// gram_case: "nominative" | "accusative" | "dative" | "genitive" | "plural"
|
||||
// ("accusative" and "dative" return the nominative form)
|
||||
// number: "singular" | "plural"
|
||||
//
|
||||
// Returns the inflected form.
|
||||
|
||||
fn enm_decline(noun: String, gram_case: String, number: String) -> String {
|
||||
// Plural number overrides gram_case for the plural form
|
||||
if str_eq(number, "plural") {
|
||||
return enm_make_plural(noun)
|
||||
}
|
||||
|
||||
// Singular
|
||||
if str_eq(gram_case, "genitive") {
|
||||
// Genitive singular: +es (even after -e: "the kinges court")
|
||||
return noun + "es"
|
||||
}
|
||||
|
||||
// Nominative, accusative, dative — all the same in ME
|
||||
return noun
|
||||
}
|
||||
|
||||
// ── Article selection ──────────────────────────────────────────────────────────
|
||||
//
|
||||
// Middle English uses "the" (definite) and "a" / "an" (indefinite).
|
||||
// The indefinite article is "an" before a vowel-initial word, "a" otherwise.
|
||||
// Vowel check is on the first character of the noun phrase word.
|
||||
|
||||
fn enm_is_vowel_initial(s: String) -> Bool {
|
||||
let c: String = enm_first_char(s)
|
||||
if str_eq(c, "a") { return true }
|
||||
if str_eq(c, "e") { return true }
|
||||
if str_eq(c, "i") { return true }
|
||||
if str_eq(c, "o") { return true }
|
||||
if str_eq(c, "u") { return true }
|
||||
// ME also treated initial h as effectively silent in some dialects;
|
||||
// we conservatively treat h-initial as consonant-initial.
|
||||
return false
|
||||
}
|
||||
|
||||
fn enm_indef_article(noun_phrase: String) -> String {
|
||||
if enm_is_vowel_initial(noun_phrase) { return "an" }
|
||||
return "a"
|
||||
}
|
||||
|
||||
// ── enm_noun_phrase: noun phrase builder ─────────────────────────────────────
|
||||
//
|
||||
// Constructs a full noun phrase with the appropriate article.
|
||||
//
|
||||
// noun: base nominative singular form (e.g. "knyght", "man", "lond")
|
||||
// gram_case: "nominative" | "accusative" | "dative" | "genitive" | "plural"
|
||||
// number: "singular" | "plural"
|
||||
// definite: "true" | "false" (string comparison)
|
||||
//
|
||||
// Returns the complete noun phrase string (article + declined noun).
|
||||
|
||||
fn enm_noun_phrase(noun: String, gram_case: String, number: String, definite: String) -> String {
|
||||
let form: String = enm_decline(noun, gram_case, number)
|
||||
|
||||
if str_eq(definite, "true") {
|
||||
return "the " + form
|
||||
}
|
||||
|
||||
// Indefinite article only makes sense for singular; plural takes no article
|
||||
if str_eq(number, "plural") {
|
||||
return form
|
||||
}
|
||||
|
||||
let art: String = enm_indef_article(form)
|
||||
return art + " " + form
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
// auto-generated by elc --emit-header — do not edit
|
||||
extern fn enm_str_ends(s: String, suf: String) -> Bool
|
||||
extern fn enm_drop(s: String, n: Int) -> String
|
||||
extern fn enm_first_char(s: String) -> String
|
||||
extern fn enm_slot(person: String, number: String) -> Int
|
||||
extern fn enm_been_present(slot: Int) -> String
|
||||
extern fn enm_been_past(slot: Int) -> String
|
||||
extern fn enm_haven_present(slot: Int) -> String
|
||||
extern fn enm_haven_past(slot: Int) -> String
|
||||
extern fn enm_goon_present(slot: Int) -> String
|
||||
extern fn enm_goon_past(slot: Int) -> String
|
||||
extern fn enm_seen_present(slot: Int) -> String
|
||||
extern fn enm_seen_past(slot: Int) -> String
|
||||
extern fn enm_seyen_present(slot: Int) -> String
|
||||
extern fn enm_seyen_past(slot: Int) -> String
|
||||
extern fn enm_comen_present(slot: Int) -> String
|
||||
extern fn enm_comen_past(slot: Int) -> String
|
||||
extern fn enm_maken_present(slot: Int) -> String
|
||||
extern fn enm_maken_past(slot: Int) -> String
|
||||
extern fn enm_map_canonical(verb: String) -> String
|
||||
extern fn enm_weak_stem(verb: String) -> String
|
||||
extern fn enm_weak_present(stem: String, slot: Int) -> String
|
||||
extern fn enm_weak_past(stem: String, slot: Int) -> String
|
||||
extern fn enm_conjugate(verb: String, tense: String, person: String, number: String) -> String
|
||||
extern fn enm_irregular_plural(noun: String) -> String
|
||||
extern fn enm_make_plural(noun: String) -> String
|
||||
extern fn enm_decline(noun: String, gram_case: String, number: String) -> String
|
||||
extern fn enm_is_vowel_initial(s: String) -> Bool
|
||||
extern fn enm_indef_article(noun_phrase: String) -> String
|
||||
extern fn enm_noun_phrase(noun: String, gram_case: String, number: String, definite: String) -> String
|
||||
@@ -1,716 +0,0 @@
|
||||
// morphology-es.el - Spanish morphology for the NLG engine.
|
||||
//
|
||||
// Implements fusional Spanish verb conjugation, noun pluralization, gender
|
||||
// inference, and article agreement. Designed as a companion to morphology.el
|
||||
// and called by the engine when the language profile code is "es".
|
||||
//
|
||||
// Verb tenses covered: present, preterite (past), future, imperfect.
|
||||
// Persons: first/second/third × singular/plural (1s 2s 3s 1p 2p 3p).
|
||||
// Verb classes: -ar, -er, -ir (regular) + a core set of common irregulars.
|
||||
//
|
||||
// Depends on: morphology.el (str_ends, str_drop_last, str_last_char, str_last2, str_last3, is_vowel)
|
||||
|
||||
// ── String helpers (local, matching morphology.el conventions) ────────────────
|
||||
|
||||
import "morphology.el"
|
||||
fn es_str_ends(s: String, suf: String) -> Bool {
|
||||
return str_ends_with(s, suf)
|
||||
}
|
||||
|
||||
fn es_str_drop_last(s: String, n: Int) -> String {
|
||||
let len: Int = str_len(s)
|
||||
if n >= len {
|
||||
return ""
|
||||
}
|
||||
return str_slice(s, 0, len - n)
|
||||
}
|
||||
|
||||
fn es_str_last_char(s: String) -> String {
|
||||
let n: Int = str_len(s)
|
||||
if n == 0 {
|
||||
return ""
|
||||
}
|
||||
return str_slice(s, n - 1, n)
|
||||
}
|
||||
|
||||
fn es_str_last2(s: String) -> String {
|
||||
let n: Int = str_len(s)
|
||||
if n < 2 {
|
||||
return s
|
||||
}
|
||||
return str_slice(s, n - 2, n)
|
||||
}
|
||||
|
||||
fn es_str_last3(s: String) -> String {
|
||||
let n: Int = str_len(s)
|
||||
if n < 3 {
|
||||
return s
|
||||
}
|
||||
return str_slice(s, n - 3, n)
|
||||
}
|
||||
|
||||
// ── Verb class detection ──────────────────────────────────────────────────────
|
||||
//
|
||||
// Spanish verbs fall into three conjugation classes defined by the infinitive
|
||||
// ending: -ar, -er, -ir. The stem is the infinitive minus those two characters.
|
||||
|
||||
fn es_verb_class(base: String) -> String {
|
||||
if es_str_ends(base, "ar") { return "ar" }
|
||||
if es_str_ends(base, "er") { return "er" }
|
||||
if es_str_ends(base, "ir") { return "ir" }
|
||||
return "ar"
|
||||
}
|
||||
|
||||
fn es_stem(base: String) -> String {
|
||||
return es_str_drop_last(base, 2)
|
||||
}
|
||||
|
||||
// ── Person/number index ───────────────────────────────────────────────────────
|
||||
//
|
||||
// Maps person × number to a 0-based slot index used inside paradigm tables.
|
||||
// 0 = 1s, 1 = 2s, 2 = 3s, 3 = 1p, 4 = 2p, 5 = 3p
|
||||
|
||||
fn es_slot(person: String, number: String) -> Int {
|
||||
if str_eq(person, "first") {
|
||||
if str_eq(number, "singular") { return 0 }
|
||||
return 3
|
||||
}
|
||||
if str_eq(person, "second") {
|
||||
if str_eq(number, "singular") { return 1 }
|
||||
return 4
|
||||
}
|
||||
// third
|
||||
if str_eq(number, "singular") { return 2 }
|
||||
return 5
|
||||
}
|
||||
|
||||
// ── Irregular present tense ───────────────────────────────────────────────────
|
||||
//
|
||||
// Returns the fully-inflected form if the verb is irregular in the present
|
||||
// tense for the given person/number slot, otherwise returns "".
|
||||
//
|
||||
// ser: soy, eres, es, somos, sois, son
|
||||
// estar: estoy, estás, está, estamos, estáis, están
|
||||
// tener: tengo, tienes, tiene, tenemos, tenéis, tienen
|
||||
// hacer: hago, haces, hace, hacemos, hacéis, hacen
|
||||
// ir: voy, vas, va, vamos, vais, van
|
||||
// ver: veo, ves, ve, vemos, veis, ven
|
||||
// dar: doy, das, da, damos, dais, dan
|
||||
// saber: sé, sabes, sabe, sabemos, sabéis, saben
|
||||
// poder: puedo, puedes, puede, podemos, podéis, pueden
|
||||
// querer: quiero, quieres, quiere, queremos, queréis, quieren
|
||||
// venir: vengo, vienes, viene, venimos, venís, vienen
|
||||
// decir: digo, dices, dice, decimos, decís, dicen
|
||||
// haber: he, has, ha, hemos, habéis, han
|
||||
|
||||
fn es_irregular_present(verb: String, person: String, number: String) -> String {
|
||||
let slot: Int = es_slot(person, number)
|
||||
|
||||
if str_eq(verb, "ser") {
|
||||
if slot == 0 { return "soy" }
|
||||
if slot == 1 { return "eres" }
|
||||
if slot == 2 { return "es" }
|
||||
if slot == 3 { return "somos" }
|
||||
if slot == 4 { return "sois" }
|
||||
return "son"
|
||||
}
|
||||
|
||||
if str_eq(verb, "estar") {
|
||||
if slot == 0 { return "estoy" }
|
||||
if slot == 1 { return "estás" }
|
||||
if slot == 2 { return "está" }
|
||||
if slot == 3 { return "estamos" }
|
||||
if slot == 4 { return "estáis" }
|
||||
return "están"
|
||||
}
|
||||
|
||||
if str_eq(verb, "tener") {
|
||||
if slot == 0 { return "tengo" }
|
||||
if slot == 1 { return "tienes" }
|
||||
if slot == 2 { return "tiene" }
|
||||
if slot == 3 { return "tenemos" }
|
||||
if slot == 4 { return "tenéis" }
|
||||
return "tienen"
|
||||
}
|
||||
|
||||
if str_eq(verb, "hacer") {
|
||||
if slot == 0 { return "hago" }
|
||||
if slot == 1 { return "haces" }
|
||||
if slot == 2 { return "hace" }
|
||||
if slot == 3 { return "hacemos" }
|
||||
if slot == 4 { return "hacéis" }
|
||||
return "hacen"
|
||||
}
|
||||
|
||||
if str_eq(verb, "ir") {
|
||||
if slot == 0 { return "voy" }
|
||||
if slot == 1 { return "vas" }
|
||||
if slot == 2 { return "va" }
|
||||
if slot == 3 { return "vamos" }
|
||||
if slot == 4 { return "vais" }
|
||||
return "van"
|
||||
}
|
||||
|
||||
if str_eq(verb, "ver") {
|
||||
if slot == 0 { return "veo" }
|
||||
if slot == 1 { return "ves" }
|
||||
if slot == 2 { return "ve" }
|
||||
if slot == 3 { return "vemos" }
|
||||
if slot == 4 { return "veis" }
|
||||
return "ven"
|
||||
}
|
||||
|
||||
if str_eq(verb, "dar") {
|
||||
if slot == 0 { return "doy" }
|
||||
if slot == 1 { return "das" }
|
||||
if slot == 2 { return "da" }
|
||||
if slot == 3 { return "damos" }
|
||||
if slot == 4 { return "dais" }
|
||||
return "dan"
|
||||
}
|
||||
|
||||
if str_eq(verb, "saber") {
|
||||
if slot == 0 { return "sé" }
|
||||
if slot == 1 { return "sabes" }
|
||||
if slot == 2 { return "sabe" }
|
||||
if slot == 3 { return "sabemos" }
|
||||
if slot == 4 { return "sabéis" }
|
||||
return "saben"
|
||||
}
|
||||
|
||||
if str_eq(verb, "poder") {
|
||||
if slot == 0 { return "puedo" }
|
||||
if slot == 1 { return "puedes" }
|
||||
if slot == 2 { return "puede" }
|
||||
if slot == 3 { return "podemos" }
|
||||
if slot == 4 { return "podéis" }
|
||||
return "pueden"
|
||||
}
|
||||
|
||||
if str_eq(verb, "querer") {
|
||||
if slot == 0 { return "quiero" }
|
||||
if slot == 1 { return "quieres" }
|
||||
if slot == 2 { return "quiere" }
|
||||
if slot == 3 { return "queremos" }
|
||||
if slot == 4 { return "queréis" }
|
||||
return "quieren"
|
||||
}
|
||||
|
||||
if str_eq(verb, "venir") {
|
||||
if slot == 0 { return "vengo" }
|
||||
if slot == 1 { return "vienes" }
|
||||
if slot == 2 { return "viene" }
|
||||
if slot == 3 { return "venimos" }
|
||||
if slot == 4 { return "venís" }
|
||||
return "vienen"
|
||||
}
|
||||
|
||||
if str_eq(verb, "decir") {
|
||||
if slot == 0 { return "digo" }
|
||||
if slot == 1 { return "dices" }
|
||||
if slot == 2 { return "dice" }
|
||||
if slot == 3 { return "decimos" }
|
||||
if slot == 4 { return "decís" }
|
||||
return "dicen"
|
||||
}
|
||||
|
||||
if str_eq(verb, "haber") {
|
||||
if slot == 0 { return "he" }
|
||||
if slot == 1 { return "has" }
|
||||
if slot == 2 { return "ha" }
|
||||
if slot == 3 { return "hemos" }
|
||||
if slot == 4 { return "habéis" }
|
||||
return "han"
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// ── Irregular preterite tense ─────────────────────────────────────────────────
|
||||
//
|
||||
// Returns the inflected preterite form for irregular verbs, or "" if regular.
|
||||
//
|
||||
// ser/ir (same preterite): fui, fuiste, fue, fuimos, fuisteis, fueron
|
||||
// tener: tuve, tuviste, tuvo, tuvimos, tuvisteis, tuvieron
|
||||
// hacer: hice, hiciste, hizo, hicimos, hicisteis, hicieron
|
||||
// estar: estuve, estuviste, estuvo, estuvimos, estuvisteis, estuvieron
|
||||
// dar: di, diste, dio, dimos, disteis, dieron
|
||||
// saber: supe, supiste, supo, supimos, supisteis, supieron
|
||||
// poder: pude, pudiste, pudo, pudimos, pudisteis, pudieron
|
||||
// querer: quise, quisiste, quiso, quisimos, quisisteis, quisieron
|
||||
// venir: vine, viniste, vino, vinimos, vinisteis, vinieron
|
||||
// decir: dije, dijiste, dijo, dijimos, dijisteis, dijeron
|
||||
// haber: hube, hubiste, hubo, hubimos, hubisteis, hubieron
|
||||
// ver: vi, viste, vio, vimos, visteis, vieron
|
||||
|
||||
fn es_irregular_preterite(verb: String, person: String, number: String) -> String {
|
||||
let slot: Int = es_slot(person, number)
|
||||
|
||||
if str_eq(verb, "ser") {
|
||||
if slot == 0 { return "fui" }
|
||||
if slot == 1 { return "fuiste" }
|
||||
if slot == 2 { return "fue" }
|
||||
if slot == 3 { return "fuimos" }
|
||||
if slot == 4 { return "fuisteis" }
|
||||
return "fueron"
|
||||
}
|
||||
|
||||
if str_eq(verb, "ir") {
|
||||
if slot == 0 { return "fui" }
|
||||
if slot == 1 { return "fuiste" }
|
||||
if slot == 2 { return "fue" }
|
||||
if slot == 3 { return "fuimos" }
|
||||
if slot == 4 { return "fuisteis" }
|
||||
return "fueron"
|
||||
}
|
||||
|
||||
if str_eq(verb, "tener") {
|
||||
if slot == 0 { return "tuve" }
|
||||
if slot == 1 { return "tuviste" }
|
||||
if slot == 2 { return "tuvo" }
|
||||
if slot == 3 { return "tuvimos" }
|
||||
if slot == 4 { return "tuvisteis" }
|
||||
return "tuvieron"
|
||||
}
|
||||
|
||||
if str_eq(verb, "hacer") {
|
||||
if slot == 0 { return "hice" }
|
||||
if slot == 1 { return "hiciste" }
|
||||
if slot == 2 { return "hizo" }
|
||||
if slot == 3 { return "hicimos" }
|
||||
if slot == 4 { return "hicisteis" }
|
||||
return "hicieron"
|
||||
}
|
||||
|
||||
if str_eq(verb, "estar") {
|
||||
if slot == 0 { return "estuve" }
|
||||
if slot == 1 { return "estuviste" }
|
||||
if slot == 2 { return "estuvo" }
|
||||
if slot == 3 { return "estuvimos" }
|
||||
if slot == 4 { return "estuvisteis" }
|
||||
return "estuvieron"
|
||||
}
|
||||
|
||||
if str_eq(verb, "dar") {
|
||||
if slot == 0 { return "di" }
|
||||
if slot == 1 { return "diste" }
|
||||
if slot == 2 { return "dio" }
|
||||
if slot == 3 { return "dimos" }
|
||||
if slot == 4 { return "disteis" }
|
||||
return "dieron"
|
||||
}
|
||||
|
||||
if str_eq(verb, "saber") {
|
||||
if slot == 0 { return "supe" }
|
||||
if slot == 1 { return "supiste" }
|
||||
if slot == 2 { return "supo" }
|
||||
if slot == 3 { return "supimos" }
|
||||
if slot == 4 { return "supisteis" }
|
||||
return "supieron"
|
||||
}
|
||||
|
||||
if str_eq(verb, "poder") {
|
||||
if slot == 0 { return "pude" }
|
||||
if slot == 1 { return "pudiste" }
|
||||
if slot == 2 { return "pudo" }
|
||||
if slot == 3 { return "pudimos" }
|
||||
if slot == 4 { return "pudisteis" }
|
||||
return "pudieron"
|
||||
}
|
||||
|
||||
if str_eq(verb, "querer") {
|
||||
if slot == 0 { return "quise" }
|
||||
if slot == 1 { return "quisiste" }
|
||||
if slot == 2 { return "quiso" }
|
||||
if slot == 3 { return "quisimos" }
|
||||
if slot == 4 { return "quisisteis" }
|
||||
return "quisieron"
|
||||
}
|
||||
|
||||
if str_eq(verb, "venir") {
|
||||
if slot == 0 { return "vine" }
|
||||
if slot == 1 { return "viniste" }
|
||||
if slot == 2 { return "vino" }
|
||||
if slot == 3 { return "vinimos" }
|
||||
if slot == 4 { return "vinisteis" }
|
||||
return "vinieron"
|
||||
}
|
||||
|
||||
if str_eq(verb, "decir") {
|
||||
if slot == 0 { return "dije" }
|
||||
if slot == 1 { return "dijiste" }
|
||||
if slot == 2 { return "dijo" }
|
||||
if slot == 3 { return "dijimos" }
|
||||
if slot == 4 { return "dijisteis" }
|
||||
return "dijeron"
|
||||
}
|
||||
|
||||
if str_eq(verb, "haber") {
|
||||
if slot == 0 { return "hube" }
|
||||
if slot == 1 { return "hubiste" }
|
||||
if slot == 2 { return "hubo" }
|
||||
if slot == 3 { return "hubimos" }
|
||||
if slot == 4 { return "hubisteis" }
|
||||
return "hubieron"
|
||||
}
|
||||
|
||||
if str_eq(verb, "ver") {
|
||||
if slot == 0 { return "vi" }
|
||||
if slot == 1 { return "viste" }
|
||||
if slot == 2 { return "vio" }
|
||||
if slot == 3 { return "vimos" }
|
||||
if slot == 4 { return "visteis" }
|
||||
return "vieron"
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// ── Irregular imperfect tense ─────────────────────────────────────────────────
|
||||
//
|
||||
// Only three verbs are truly irregular in the imperfect:
|
||||
// ser: era, eras, era, éramos, erais, eran
|
||||
// ir: iba, ibas, iba, íbamos, ibais, iban
|
||||
// ver: veía, veías, veía, veíamos, veíais, veían
|
||||
|
||||
fn es_irregular_imperfect(verb: String, person: String, number: String) -> String {
|
||||
let slot: Int = es_slot(person, number)
|
||||
|
||||
if str_eq(verb, "ser") {
|
||||
if slot == 0 { return "era" }
|
||||
if slot == 1 { return "eras" }
|
||||
if slot == 2 { return "era" }
|
||||
if slot == 3 { return "éramos" }
|
||||
if slot == 4 { return "erais" }
|
||||
return "eran"
|
||||
}
|
||||
|
||||
if str_eq(verb, "ir") {
|
||||
if slot == 0 { return "iba" }
|
||||
if slot == 1 { return "ibas" }
|
||||
if slot == 2 { return "iba" }
|
||||
if slot == 3 { return "íbamos" }
|
||||
if slot == 4 { return "ibais" }
|
||||
return "iban"
|
||||
}
|
||||
|
||||
if str_eq(verb, "ver") {
|
||||
if slot == 0 { return "veía" }
|
||||
if slot == 1 { return "veías" }
|
||||
if slot == 2 { return "veía" }
|
||||
if slot == 3 { return "veíamos" }
|
||||
if slot == 4 { return "veíais" }
|
||||
return "veían"
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// ── Regular present conjugation ───────────────────────────────────────────────
|
||||
//
|
||||
// -ar: -o, -as, -a, -amos, -áis, -an
|
||||
// -er: -o, -es, -e, -emos, -éis, -en
|
||||
// -ir: -o, -es, -e, -imos, -ís, -en
|
||||
|
||||
fn es_regular_present(stem: String, vclass: String, slot: Int) -> String {
|
||||
if str_eq(vclass, "ar") {
|
||||
if slot == 0 { return stem + "o" }
|
||||
if slot == 1 { return stem + "as" }
|
||||
if slot == 2 { return stem + "a" }
|
||||
if slot == 3 { return stem + "amos" }
|
||||
if slot == 4 { return stem + "áis" }
|
||||
return stem + "an"
|
||||
}
|
||||
if str_eq(vclass, "er") {
|
||||
if slot == 0 { return stem + "o" }
|
||||
if slot == 1 { return stem + "es" }
|
||||
if slot == 2 { return stem + "e" }
|
||||
if slot == 3 { return stem + "emos" }
|
||||
if slot == 4 { return stem + "éis" }
|
||||
return stem + "en"
|
||||
}
|
||||
// -ir
|
||||
if slot == 0 { return stem + "o" }
|
||||
if slot == 1 { return stem + "es" }
|
||||
if slot == 2 { return stem + "e" }
|
||||
if slot == 3 { return stem + "imos" }
|
||||
if slot == 4 { return stem + "ís" }
|
||||
return stem + "en"
|
||||
}
|
||||
|
||||
// ── Regular preterite conjugation ─────────────────────────────────────────────
|
||||
//
|
||||
// -ar: -é, -aste, -ó, -amos, -asteis, -aron
|
||||
// -er: -í, -iste, -ió, -imos, -isteis, -ieron
|
||||
// -ir: -í, -iste, -ió, -imos, -isteis, -ieron
|
||||
|
||||
fn es_regular_preterite(stem: String, vclass: String, slot: Int) -> String {
|
||||
if str_eq(vclass, "ar") {
|
||||
if slot == 0 { return stem + "é" }
|
||||
if slot == 1 { return stem + "aste" }
|
||||
if slot == 2 { return stem + "ó" }
|
||||
if slot == 3 { return stem + "amos" }
|
||||
if slot == 4 { return stem + "asteis" }
|
||||
return stem + "aron"
|
||||
}
|
||||
// -er and -ir share the same preterite endings
|
||||
if slot == 0 { return stem + "í" }
|
||||
if slot == 1 { return stem + "iste" }
|
||||
if slot == 2 { return stem + "ió" }
|
||||
if slot == 3 { return stem + "imos" }
|
||||
if slot == 4 { return stem + "isteis" }
|
||||
return stem + "ieron"
|
||||
}
|
||||
|
||||
// ── Regular future conjugation ────────────────────────────────────────────────
|
||||
//
|
||||
// Future is formed from the full infinitive + endings (all classes):
|
||||
// -é, -ás, -á, -emos, -éis, -án
|
||||
//
|
||||
// No stem change; the infinitive is the future stem.
|
||||
|
||||
fn es_regular_future(base: String, slot: Int) -> String {
|
||||
if slot == 0 { return base + "é" }
|
||||
if slot == 1 { return base + "ás" }
|
||||
if slot == 2 { return base + "á" }
|
||||
if slot == 3 { return base + "emos" }
|
||||
if slot == 4 { return base + "éis" }
|
||||
return base + "án"
|
||||
}
|
||||
|
||||
// ── Irregular future stems ────────────────────────────────────────────────────
|
||||
//
|
||||
// Some verbs contract or alter their infinitive for the future stem.
|
||||
// Returns the irregular future stem, or "" if the verb uses the regular stem.
|
||||
|
||||
fn es_irregular_future_stem(verb: String) -> String {
|
||||
if str_eq(verb, "tener") { return "tendr" }
|
||||
if str_eq(verb, "hacer") { return "har" }
|
||||
if str_eq(verb, "poder") { return "podr" }
|
||||
if str_eq(verb, "querer") { return "querr" }
|
||||
if str_eq(verb, "venir") { return "vendr" }
|
||||
if str_eq(verb, "decir") { return "dir" }
|
||||
if str_eq(verb, "haber") { return "habr" }
|
||||
if str_eq(verb, "saber") { return "sabr" }
|
||||
if str_eq(verb, "salir") { return "saldr" }
|
||||
if str_eq(verb, "poner") { return "pondr" }
|
||||
return ""
|
||||
}
|
||||
|
||||
// ── Regular imperfect conjugation ─────────────────────────────────────────────
|
||||
//
|
||||
// -ar: -aba, -abas, -aba, -ábamos, -abais, -aban
|
||||
// -er/-ir: -ía, -ías, -ía, -íamos, -íais, -ían
|
||||
|
||||
fn es_regular_imperfect(stem: String, vclass: String, slot: Int) -> String {
|
||||
if str_eq(vclass, "ar") {
|
||||
if slot == 0 { return stem + "aba" }
|
||||
if slot == 1 { return stem + "abas" }
|
||||
if slot == 2 { return stem + "aba" }
|
||||
if slot == 3 { return stem + "ábamos" }
|
||||
if slot == 4 { return stem + "abais" }
|
||||
return stem + "aban"
|
||||
}
|
||||
// -er and -ir
|
||||
if slot == 0 { return stem + "ía" }
|
||||
if slot == 1 { return stem + "ías" }
|
||||
if slot == 2 { return stem + "ía" }
|
||||
if slot == 3 { return stem + "íamos" }
|
||||
if slot == 4 { return stem + "íais" }
|
||||
return stem + "ían"
|
||||
}
|
||||
|
||||
// ── Full conjugation entry point ──────────────────────────────────────────────
|
||||
//
|
||||
// es_conjugate: conjugate a Spanish verb.
|
||||
//
|
||||
// verb: Spanish infinitive (e.g. "hablar", "ser", "tener")
|
||||
// tense: "present" | "past" | "future" | "imperfect"
|
||||
// (note: "past" maps to the preterite/indefinite past)
|
||||
// person: "first" | "second" | "third"
|
||||
// number: "singular" | "plural"
|
||||
|
||||
fn es_conjugate(verb: String, tense: String, person: String, number: String) -> String {
|
||||
let slot: Int = es_slot(person, number)
|
||||
|
||||
if str_eq(tense, "present") {
|
||||
let irreg: String = es_irregular_present(verb, person, number)
|
||||
if !str_eq(irreg, "") {
|
||||
return irreg
|
||||
}
|
||||
let vclass: String = es_verb_class(verb)
|
||||
let stem: String = es_stem(verb)
|
||||
return es_regular_present(stem, vclass, slot)
|
||||
}
|
||||
|
||||
if str_eq(tense, "past") {
|
||||
let irreg: String = es_irregular_preterite(verb, person, number)
|
||||
if !str_eq(irreg, "") {
|
||||
return irreg
|
||||
}
|
||||
let vclass: String = es_verb_class(verb)
|
||||
let stem: String = es_stem(verb)
|
||||
return es_regular_preterite(stem, vclass, slot)
|
||||
}
|
||||
|
||||
if str_eq(tense, "future") {
|
||||
let irreg_stem: String = es_irregular_future_stem(verb)
|
||||
if !str_eq(irreg_stem, "") {
|
||||
return es_regular_future(irreg_stem, slot)
|
||||
}
|
||||
return es_regular_future(verb, slot)
|
||||
}
|
||||
|
||||
if str_eq(tense, "imperfect") {
|
||||
let irreg: String = es_irregular_imperfect(verb, person, number)
|
||||
if !str_eq(irreg, "") {
|
||||
return irreg
|
||||
}
|
||||
let vclass: String = es_verb_class(verb)
|
||||
let stem: String = es_stem(verb)
|
||||
return es_regular_imperfect(stem, vclass, slot)
|
||||
}
|
||||
|
||||
// Unknown tense: return infinitive unchanged
|
||||
return verb
|
||||
}
|
||||
|
||||
// ── Noun gender inference ─────────────────────────────────────────────────────
|
||||
//
|
||||
// Returns "m" (masculine), "f" (feminine), or "unknown".
|
||||
//
|
||||
// Heuristics (not exhaustive — cover most common patterns):
|
||||
// ends in -o -> masculine (libro, gato, niño)
|
||||
// ends in -a -> feminine (casa, mesa, niña)
|
||||
// ends in -ión -> feminine (canción, nación)
|
||||
// ends in -dad/-tad -> feminine (ciudad, libertad)
|
||||
// ends in -umbre -> feminine (costumbre)
|
||||
// ends in -sis -> feminine (crisis, tesis)
|
||||
// ends in -ema/-ama -> masculine (problema, programa, tema, idioma)
|
||||
// ends in -or -> masculine (color, amor, señor)
|
||||
// ends in -aje -> masculine (viaje, paisaje)
|
||||
// ends in -án/-ón -> masculine (avión → check -ión first)
|
||||
// otherwise -> unknown
|
||||
|
||||
fn es_gender(noun: String) -> String {
|
||||
// -ión before -o so "avión" → feminine (it ends -ión, not just -on)
|
||||
if es_str_ends(noun, "ión") { return "f" }
|
||||
if es_str_ends(noun, "dad") { return "f" }
|
||||
if es_str_ends(noun, "tad") { return "f" }
|
||||
if es_str_ends(noun, "umbre") { return "f" }
|
||||
if es_str_ends(noun, "sis") { return "f" }
|
||||
if es_str_ends(noun, "ema") { return "m" }
|
||||
if es_str_ends(noun, "ama") { return "m" }
|
||||
if es_str_ends(noun, "aje") { return "m" }
|
||||
if es_str_ends(noun, "or") { return "m" }
|
||||
if es_str_ends(noun, "o") { return "m" }
|
||||
if es_str_ends(noun, "a") { return "f" }
|
||||
return "unknown"
|
||||
}
|
||||
|
||||
// ── Noun pluralization ────────────────────────────────────────────────────────
|
||||
//
|
||||
// Rules (applied in order):
|
||||
// ends in vowel (a e i o u) -> add -s
|
||||
// ends in consonant -> add -es
|
||||
// ends in -z -> replace -z with -ces
|
||||
// ends in -s (unstressed) -> unchanged (e.g. "el lunes" -> "los lunes")
|
||||
//
|
||||
// Note: nouns ending in stressed vowel + s (e.g. "el autobús" → "los autobuses")
|
||||
// are handled by the consonant rule since -s is a consonant ending for pluralization
|
||||
// purposes; but "el lunes" (days of week ending in -s) stay unchanged — this is
|
||||
// an irregular class. The table below handles common invariant nouns.
|
||||
|
||||
fn es_invariant_plural(noun: String) -> String {
|
||||
if str_eq(noun, "lunes") { return "lunes" }
|
||||
if str_eq(noun, "martes") { return "martes" }
|
||||
if str_eq(noun, "miércoles") { return "miércoles" }
|
||||
if str_eq(noun, "jueves") { return "jueves" }
|
||||
if str_eq(noun, "viernes") { return "viernes" }
|
||||
if str_eq(noun, "crisis") { return "crisis" }
|
||||
if str_eq(noun, "tesis") { return "tesis" }
|
||||
if str_eq(noun, "análisis") { return "análisis" }
|
||||
if str_eq(noun, "dosis") { return "dosis" }
|
||||
if str_eq(noun, "virus") { return "virus" }
|
||||
return ""
|
||||
}
|
||||
|
||||
fn es_pluralize(noun: String) -> String {
|
||||
let inv: String = es_invariant_plural(noun)
|
||||
if !str_eq(inv, "") {
|
||||
return inv
|
||||
}
|
||||
let last: String = es_str_last_char(noun)
|
||||
// Ends in -z: replace with -ces
|
||||
if str_eq(last, "z") {
|
||||
return es_str_drop_last(noun, 1) + "ces"
|
||||
}
|
||||
// Ends in a vowel: add -s
|
||||
if str_eq(last, "a") { return noun + "s" }
|
||||
if str_eq(last, "e") { return noun + "s" }
|
||||
if str_eq(last, "i") { return noun + "s" }
|
||||
if str_eq(last, "o") { return noun + "s" }
|
||||
if str_eq(last, "u") { return noun + "s" }
|
||||
// Ends in consonant (including -s for stressed words like autobús): add -es
|
||||
return noun + "es"
|
||||
}
|
||||
|
||||
// ── Article agreement ─────────────────────────────────────────────────────────
|
||||
//
|
||||
// es_agree_article: return the correct Spanish article for a noun.
|
||||
//
|
||||
// noun: the noun (used for gender and number inference)
|
||||
// definite: "true" for definite (el/la/los/las), "false" for indefinite (un/una/unos/unas)
|
||||
// number: "singular" | "plural"
|
||||
//
|
||||
// Special case: feminine nouns beginning with stressed "a-" or "ha-" take
|
||||
// masculine singular definite article: "el agua", "el hacha".
|
||||
// This is handled by checking the noun's first character when gender is feminine.
|
||||
|
||||
fn es_starts_with_stressed_a(noun: String) -> Bool {
|
||||
// Approximate: check if noun starts with "a" or "ha" (covers most cases)
|
||||
// The accent on the first syllable is not detectable orthographically in
|
||||
// general, so we apply the rule broadly for any feminine noun starting with
|
||||
// "a" or "ha" in singular.
|
||||
let n: Int = str_len(noun)
|
||||
if n == 0 {
|
||||
return false
|
||||
}
|
||||
let c0: String = str_slice(noun, 0, 1)
|
||||
if str_eq(c0, "a") { return true }
|
||||
if n >= 2 {
|
||||
let c1: String = str_slice(noun, 1, 2)
|
||||
if str_eq(c0, "h") {
|
||||
if str_eq(c1, "a") { return true }
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
fn es_agree_article(noun: String, definite: String, number: String) -> String {
|
||||
let gender: String = es_gender(noun)
|
||||
let is_plural: Bool = str_eq(number, "plural")
|
||||
let is_def: Bool = str_eq(definite, "true")
|
||||
|
||||
if is_def {
|
||||
if is_plural {
|
||||
if str_eq(gender, "f") { return "las" }
|
||||
return "los"
|
||||
}
|
||||
// singular
|
||||
if str_eq(gender, "f") {
|
||||
// el agua rule: feminine singular nouns starting with stressed "a"
|
||||
if es_starts_with_stressed_a(noun) { return "el" }
|
||||
return "la"
|
||||
}
|
||||
return "el"
|
||||
}
|
||||
|
||||
// indefinite
|
||||
if is_plural {
|
||||
if str_eq(gender, "f") { return "unas" }
|
||||
return "unos"
|
||||
}
|
||||
if str_eq(gender, "f") { return "una" }
|
||||
return "un"
|
||||
}
|
||||
@@ -1,23 +0,0 @@
|
||||
// auto-generated by elc --emit-header — do not edit
|
||||
extern fn es_str_ends(s: String, suf: String) -> Bool
|
||||
extern fn es_str_drop_last(s: String, n: Int) -> String
|
||||
extern fn es_str_last_char(s: String) -> String
|
||||
extern fn es_str_last2(s: String) -> String
|
||||
extern fn es_str_last3(s: String) -> String
|
||||
extern fn es_verb_class(base: String) -> String
|
||||
extern fn es_stem(base: String) -> String
|
||||
extern fn es_slot(person: String, number: String) -> Int
|
||||
extern fn es_irregular_present(verb: String, person: String, number: String) -> String
|
||||
extern fn es_irregular_preterite(verb: String, person: String, number: String) -> String
|
||||
extern fn es_irregular_imperfect(verb: String, person: String, number: String) -> String
|
||||
extern fn es_regular_present(stem: String, vclass: String, slot: Int) -> String
|
||||
extern fn es_regular_preterite(stem: String, vclass: String, slot: Int) -> String
|
||||
extern fn es_regular_future(base: String, slot: Int) -> String
|
||||
extern fn es_irregular_future_stem(verb: String) -> String
|
||||
extern fn es_regular_imperfect(stem: String, vclass: String, slot: Int) -> String
|
||||
extern fn es_conjugate(verb: String, tense: String, person: String, number: String) -> String
|
||||
extern fn es_gender(noun: String) -> String
|
||||
extern fn es_invariant_plural(noun: String) -> String
|
||||
extern fn es_pluralize(noun: String) -> String
|
||||
extern fn es_starts_with_stressed_a(noun: String) -> Bool
|
||||
extern fn es_agree_article(noun: String, definite: String, number: String) -> String
|
||||
@@ -1,623 +0,0 @@
|
||||
// morphology-fi.el - Finnish morphology: noun case inflection and verb conjugation.
|
||||
//
|
||||
// Finnish is SOV, agglutinative, 15 grammatical cases, no grammatical gender,
|
||||
// no articles. Morphology is suffix-chain based.
|
||||
//
|
||||
// Key facts:
|
||||
// - Vowel harmony: suffixes harmonize with the stem's vowel class.
|
||||
// Back vowels (a, o, u) → back-harmony suffixes (-ssa, -sta, -an, etc.)
|
||||
// Front vowels (ä, ö, y) or neutral-only stems → front-harmony (-ssä, -stä, -än, etc.)
|
||||
// - Consonant gradation: alternation between strong and weak consonant grades.
|
||||
// Full gradation tables are large; this module implements the most common
|
||||
// patterns. Irregular/exceptional stems must be supplied in inflected form.
|
||||
// - Verbs conjugate for person (1/2/3) and number (singular/plural), plus tense
|
||||
// (present/past) and polarity (affirmative/negative).
|
||||
// - Question suffix: -ko (back harmony) / -kö (front harmony), appended to verb.
|
||||
//
|
||||
// Depends on: (no dependencies - standalone morphology module)
|
||||
|
||||
// ── Vowel harmony ─────────────────────────────────────────────────────────────
|
||||
//
|
||||
// fi_harmony(word) -> "back" | "front"
|
||||
//
|
||||
// Scan the word right-to-left for the last unambiguously back (a, o, u) or
|
||||
// front (ä, ö, y) vowel. Neutral vowels (e, i) do not determine the class.
|
||||
// If only neutral vowels are found, default to "front" (the conservative choice
|
||||
// for borrowed words and those without clear back vowels).
|
||||
|
||||
import "morphology.el"
|
||||
fn fi_harmony(word: String) -> String {
|
||||
let n: Int = str_len(word)
|
||||
let i: Int = n - 1
|
||||
while i >= 0 {
|
||||
let c: String = str_slice(word, i, i + 1)
|
||||
// Back vowels
|
||||
if str_eq(c, "a") { return "back" }
|
||||
if str_eq(c, "o") { return "back" }
|
||||
if str_eq(c, "u") { return "back" }
|
||||
// Front vowels (UTF-8; Finnish ä is U+00E4, ö is U+00F6)
|
||||
// In a byte scan we may land mid-codepoint; we do a substring comparison
|
||||
// by checking two-byte sequences at position i-1 when c is a lead byte.
|
||||
if str_eq(c, "ä") { return "front" }
|
||||
if str_eq(c, "ö") { return "front" }
|
||||
if str_eq(c, "y") { return "front" }
|
||||
let i = i - 1
|
||||
}
|
||||
// Default: front (covers neutral-only and unknown stems)
|
||||
return "front"
|
||||
}
|
||||
|
||||
// ── Suffix harmonization ──────────────────────────────────────────────────────
|
||||
//
|
||||
// fi_suffix(base, harmony) -> String
|
||||
//
|
||||
// Given a back-harmony base suffix, return the harmonized form.
|
||||
// Only the a/ä alternation is handled here; all other vowels stay the same.
|
||||
// E.g. fi_suffix("ssa", "front") -> "ssä"
|
||||
// fi_suffix("ssa", "back") -> "ssa"
|
||||
|
||||
fn fi_suffix(base: String, harmony: String) -> String {
|
||||
if str_eq(harmony, "front") {
|
||||
// Replace every 'a' with 'ä' and every 'o' in suffix with 'ö'.
|
||||
// We handle only the common patterns used in Finnish case suffixes.
|
||||
if str_eq(base, "a") { return "ä" }
|
||||
if str_eq(base, "ssa") { return "ssä" }
|
||||
if str_eq(base, "sta") { return "stä" }
|
||||
if str_eq(base, "an") { return "än" }
|
||||
if str_eq(base, "aan") { return "ään" }
|
||||
if str_eq(base, "lla") { return "llä" }
|
||||
if str_eq(base, "lta") { return "ltä" }
|
||||
if str_eq(base, "lle") { return "lle" }
|
||||
if str_eq(base, "na") { return "nä" }
|
||||
if str_eq(base, "ksi") { return "ksi" }
|
||||
if str_eq(base, "tta") { return "ttä" }
|
||||
if str_eq(base, "ta") { return "tä" }
|
||||
if str_eq(base, "ja") { return "jä" }
|
||||
if str_eq(base, "oja") { return "öjä" }
|
||||
if str_eq(base, "issa") { return "issä" }
|
||||
if str_eq(base, "ista") { return "istä" }
|
||||
if str_eq(base, "ihin") { return "ihin" }
|
||||
if str_eq(base, "illa") { return "illä" }
|
||||
if str_eq(base, "ilta") { return "iltä" }
|
||||
if str_eq(base, "ille") { return "ille" }
|
||||
if str_eq(base, "ina") { return "inä" }
|
||||
if str_eq(base, "itta") { return "ittä" }
|
||||
if str_eq(base, "ko") { return "kö" }
|
||||
if str_eq(base, "pa") { return "pä" }
|
||||
if str_eq(base, "va") { return "vä" }
|
||||
if str_eq(base, "ma") { return "mä" }
|
||||
if str_eq(base, "han") { return "hän" }
|
||||
if str_eq(base, "lla") { return "llä" }
|
||||
// Fall back: return the back-harmony form unchanged
|
||||
return base
|
||||
}
|
||||
// Back harmony: return as-is
|
||||
return base
|
||||
}
|
||||
|
||||
// ── Noun case inflection ──────────────────────────────────────────────────────
|
||||
//
|
||||
// fi_noun_case(stem, gram_case, number, harmony) -> String
|
||||
//
|
||||
// Computes the inflected noun form from the oblique stem, case name, number
|
||||
// ("singular" | "plural"), and vowel harmony class.
|
||||
//
|
||||
// The "stem" is the oblique/genitive stem (e.g. talo for talo, puhu for puhua).
|
||||
// For most Type-1 nouns the oblique stem = dictionary form minus final vowel.
|
||||
// For the singular nominative the dictionary form itself is used (passed directly).
|
||||
//
|
||||
// Cases and their suffixes (talo → back, stem "talo"):
|
||||
// nominative sg : stem (no suffix) → talo
|
||||
// nominative pl : stem + t → talot
|
||||
// genitive sg : stem + n → talon
|
||||
// genitive pl : stem + jen / jen → talojen
|
||||
// accusative sg : stem + n (= gen sg) → talon
|
||||
// accusative pl : stem + t (= nom pl) → talot
|
||||
// partitive sg : stem + a/ä → taloa
|
||||
// partitive pl : stem + ja/jä → taloja
|
||||
// inessive sg : stem + ssa/ssä → talossa
|
||||
// inessive pl : stem + issa/issä → taloissa
|
||||
// elative sg : stem + sta/stä → talosta
|
||||
// elative pl : stem + ista/istä → taloista
|
||||
// illative sg : stem + vowel + n → taloon (long vowel + n)
|
||||
// illative pl : stem + ihin → taloihin
|
||||
// adessive sg : stem + lla/llä → talolla
|
||||
// adessive pl : stem + illa/illä → taloilla
|
||||
// ablative sg : stem + lta/ltä → talolta
|
||||
// ablative pl : stem + ilta/iltä → taloilta
|
||||
// allative sg : stem + lle → talolle
|
||||
// allative pl : stem + ille → taloille
|
||||
// essive sg : stem + na/nä → talona
|
||||
// essive pl : stem + ina/inä → taloina
|
||||
// translative sg : stem + ksi → taloksi
|
||||
// translative pl : stem + iksi → taloiksi
|
||||
// instructive pl : stem + in → taloin (plural only)
|
||||
// abessive sg : stem + tta/ttä → talotta
|
||||
// abessive pl : stem + itta/ittä → taloitta
|
||||
// comitative pl : stem + ineen → taloineen (plural only)
|
||||
|
||||
fn fi_noun_case(stem: String, gram_case: String, number: String, harmony: String) -> String {
|
||||
let sg: Bool = str_eq(number, "singular")
|
||||
|
||||
if str_eq(gram_case, "nominative") {
|
||||
if sg { return stem }
|
||||
return stem + "t"
|
||||
}
|
||||
|
||||
if str_eq(gram_case, "genitive") {
|
||||
if sg { return stem + "n" }
|
||||
return stem + "jen"
|
||||
}
|
||||
|
||||
if str_eq(gram_case, "accusative") {
|
||||
if sg { return stem + "n" }
|
||||
return stem + "t"
|
||||
}
|
||||
|
||||
if str_eq(gram_case, "partitive") {
|
||||
if sg { return stem + fi_suffix("a", harmony) }
|
||||
return stem + fi_suffix("ja", harmony)
|
||||
}
|
||||
|
||||
if str_eq(gram_case, "inessive") {
|
||||
if sg { return stem + fi_suffix("ssa", harmony) }
|
||||
return stem + fi_suffix("issa", harmony)
|
||||
}
|
||||
|
||||
if str_eq(gram_case, "elative") {
|
||||
if sg { return stem + fi_suffix("sta", harmony) }
|
||||
return stem + fi_suffix("ista", harmony)
|
||||
}
|
||||
|
||||
if str_eq(gram_case, "illative") {
|
||||
if sg {
|
||||
// Singular illative: final vowel of stem is lengthened + n
|
||||
// e.g. talo → taloon, puu → puuhun, käsi → käteen
|
||||
// We take the last character of the stem and double it, then add n.
|
||||
let last: String = fi_str_last_char(stem)
|
||||
return stem + last + "n"
|
||||
}
|
||||
return stem + fi_suffix("ihin", harmony)
|
||||
}
|
||||
|
||||
if str_eq(gram_case, "adessive") {
|
||||
if sg { return stem + fi_suffix("lla", harmony) }
|
||||
return stem + fi_suffix("illa", harmony)
|
||||
}
|
||||
|
||||
if str_eq(gram_case, "ablative") {
|
||||
if sg { return stem + fi_suffix("lta", harmony) }
|
||||
return stem + fi_suffix("ilta", harmony)
|
||||
}
|
||||
|
||||
if str_eq(gram_case, "allative") {
|
||||
// Allative is -lle for both numbers (only the stem differs)
|
||||
if sg { return stem + "lle" }
|
||||
return stem + "ille"
|
||||
}
|
||||
|
||||
if str_eq(gram_case, "essive") {
|
||||
if sg { return stem + fi_suffix("na", harmony) }
|
||||
return stem + fi_suffix("ina", harmony)
|
||||
}
|
||||
|
||||
if str_eq(gram_case, "translative") {
|
||||
if sg { return stem + "ksi" }
|
||||
return stem + "iksi"
|
||||
}
|
||||
|
||||
if str_eq(gram_case, "instructive") {
|
||||
// Instructive is plural only in modern Finnish
|
||||
return stem + "in"
|
||||
}
|
||||
|
||||
if str_eq(gram_case, "abessive") {
|
||||
if sg { return stem + fi_suffix("tta", harmony) }
|
||||
return stem + fi_suffix("itta", harmony)
|
||||
}
|
||||
|
||||
if str_eq(gram_case, "comitative") {
|
||||
// Comitative is plural only
|
||||
return stem + "ineen"
|
||||
}
|
||||
|
||||
// Unknown case: return stem unchanged
|
||||
return stem
|
||||
}
|
||||
|
||||
// ── Noun helper: str_last_char ─────────────────────────────────────────────────
|
||||
//
|
||||
// Return the last Unicode character of a string.
|
||||
// Mirrors the helper in morphology.el; redefined here for standalone use.
|
||||
|
||||
fn fi_str_last_char(s: String) -> String {
|
||||
let n: Int = str_len(s)
|
||||
if n == 0 { return "" }
|
||||
return str_slice(s, n - 1, n)
|
||||
}
|
||||
|
||||
// ── Full noun inflection (convenience wrapper) ────────────────────────────────
|
||||
//
|
||||
// fi_apply_case(noun, gram_case, number) -> String
|
||||
//
|
||||
// Accepts the nominative singular form (dictionary form), derives the harmony
|
||||
// class, and produces the requested case form.
|
||||
//
|
||||
// For most regular nouns the oblique stem equals the dictionary form. The
|
||||
// illative singular is handled by appending the last vowel + n.
|
||||
|
||||
fn fi_apply_case(noun: String, gram_case: String, number: String) -> String {
|
||||
let harmony: String = fi_harmony(noun)
|
||||
// For nominative singular, return the noun as-is.
|
||||
if str_eq(gram_case, "nominative") {
|
||||
if str_eq(number, "singular") { return noun }
|
||||
return noun + "t"
|
||||
}
|
||||
// For all other cases, use the noun as the oblique stem.
|
||||
// (Callers that need consonant-gradated stems must pass the graded stem
|
||||
// directly via fi_noun_case.)
|
||||
return fi_noun_case(noun, gram_case, number, harmony)
|
||||
}
|
||||
|
||||
// ── Verb stem extraction ──────────────────────────────────────────────────────
|
||||
//
|
||||
// fi_verb_stem(dict_form) -> String
|
||||
//
|
||||
// Strip the infinitive ending to get the present-tense stem.
|
||||
//
|
||||
// Type 1 verbs (most common): infinitive ends in -a/-ä, stem = infinitive - a/ä
|
||||
// puhua → puhu, juosta → juos (irregular, handled separately)
|
||||
// Type 2 verbs: end in -da/-dä, stem = infinitive - da/dä
|
||||
// syödä → syö, juoda → juo
|
||||
// Type 3 verbs: end in -la/-lä, -ra/-rä, -na/-nä, -sta/-stä
|
||||
// tulla → tul + l → stem "tull" (double consonant)
|
||||
// Type 4 verbs: end in -ata/-ätä
|
||||
// tavata → tapaa (irregular lengthening, handled as irregular)
|
||||
//
|
||||
// For NLG purposes we handle Type 1 and Type 2 as the most frequent.
|
||||
|
||||
fn fi_verb_stem(dict_form: String) -> String {
|
||||
// Type 2: -da/-dä → drop 2 characters
|
||||
if str_ends_with(dict_form, "da") {
|
||||
return str_drop_last(dict_form, 2)
|
||||
}
|
||||
if str_ends_with(dict_form, "dä") {
|
||||
return str_drop_last(dict_form, 2)
|
||||
}
|
||||
// Type 3: -lla/-llä, -rra, -nna → drop last 2 chars (keeps double consonant)
|
||||
if str_ends_with(dict_form, "lla") {
|
||||
return str_drop_last(dict_form, 2)
|
||||
}
|
||||
if str_ends_with(dict_form, "llä") {
|
||||
return str_drop_last(dict_form, 2)
|
||||
}
|
||||
if str_ends_with(dict_form, "rra") {
|
||||
return str_drop_last(dict_form, 2)
|
||||
}
|
||||
if str_ends_with(dict_form, "nna") {
|
||||
return str_drop_last(dict_form, 2)
|
||||
}
|
||||
// Type 1 (and default): -a/-ä → drop 1 character
|
||||
if str_ends_with(dict_form, "a") {
|
||||
return str_drop_last(dict_form, 1)
|
||||
}
|
||||
if str_ends_with(dict_form, "ä") {
|
||||
return str_drop_last(dict_form, 1)
|
||||
}
|
||||
return dict_form
|
||||
}
|
||||
|
||||
// ── Irregular verb table ──────────────────────────────────────────────────────
|
||||
//
|
||||
// Returns an 18-element list encoding essential irregular paradigm forms, or an
|
||||
// empty list if the verb is regular.
|
||||
//
|
||||
// Slot layout (0-indexed):
|
||||
// 0 inf infinitive (dictionary form)
|
||||
// 1 pres_1sg present 1sg
|
||||
// 2 pres_2sg present 2sg
|
||||
// 3 pres_3sg present 3sg
|
||||
// 4 pres_1pl present 1pl
|
||||
// 5 pres_2pl present 2pl
|
||||
// 6 pres_3pl present 3pl
|
||||
// 7 past_1sg past 1sg
|
||||
// 8 past_2sg past 2sg
|
||||
// 9 past_3sg past 3sg
|
||||
// 10 past_1pl past 1pl
|
||||
// 11 past_2pl past 2pl
|
||||
// 12 past_3pl past 3pl
|
||||
// 13 neg_stem negative stem (used with en/et/ei/emme/ette/eivät)
|
||||
// 14 cond_stem conditional stem (for future use)
|
||||
// 15 imp_2sg imperative 2sg
|
||||
// 16 part_pres present participle stem
|
||||
// 17 part_past past participle
|
||||
|
||||
fn fi_irregular_verb(dict_form: String) -> [String] {
|
||||
let empty: [String] = []
|
||||
|
||||
// olla — to be (the most irregular Finnish verb)
|
||||
if str_eq(dict_form, "olla") {
|
||||
let r: [String] = ["olla", "olen", "olet", "on", "olemme", "olette", "ovat",
|
||||
"olin", "olit", "oli", "olimme", "olitte", "olivat",
|
||||
"ole", "olis", "ole", "oleva", "ollut"]
|
||||
return r
|
||||
}
|
||||
|
||||
// voida — can / to be able to
|
||||
if str_eq(dict_form, "voida") {
|
||||
let r: [String] = ["voida", "voin", "voit", "voi", "voimme", "voitte", "voivat",
|
||||
"voin", "voit", "voi", "voimme", "voitte", "voivat",
|
||||
"voi", "vois", "voi", "voiva", "voinut"]
|
||||
return r
|
||||
}
|
||||
|
||||
// mennä — to go (Type 3 with irregularities)
|
||||
if str_eq(dict_form, "mennä") {
|
||||
let r: [String] = ["mennä", "menen", "menet", "menee", "menemme", "menette", "menevät",
|
||||
"menin", "menit", "meni", "menimme", "menitte", "menivät",
|
||||
"mene", "menis", "mene", "menevä", "mennyt"]
|
||||
return r
|
||||
}
|
||||
|
||||
// tulla — to come (Type 3)
|
||||
if str_eq(dict_form, "tulla") {
|
||||
let r: [String] = ["tulla", "tulen", "tulet", "tulee", "tulemme", "tulette", "tulevat",
|
||||
"tulin", "tulit", "tuli", "tulimme", "tulitte", "tulivat",
|
||||
"tule", "tulis", "tule", "tuleva", "tullut"]
|
||||
return r
|
||||
}
|
||||
|
||||
// tehdä — to do / make (Type 2, irregular)
|
||||
if str_eq(dict_form, "tehdä") {
|
||||
let r: [String] = ["tehdä", "teen", "teet", "tekee", "teemme", "teette", "tekevät",
|
||||
"tein", "teit", "teki", "teimme", "teitte", "tekivät",
|
||||
"tee", "tekis", "tee", "tekevä", "tehnyt"]
|
||||
return r
|
||||
}
|
||||
|
||||
// nähdä — to see (Type 2, irregular)
|
||||
if str_eq(dict_form, "nähdä") {
|
||||
let r: [String] = ["nähdä", "näen", "näet", "näkee", "näemme", "näette", "näkevät",
|
||||
"näin", "näit", "näki", "näimme", "näitte", "näkivät",
|
||||
"näe", "näkis", "näe", "näkevä", "nähnyt"]
|
||||
return r
|
||||
}
|
||||
|
||||
// saada — to get / to be able to (Type 2)
|
||||
if str_eq(dict_form, "saada") {
|
||||
let r: [String] = ["saada", "saan", "saat", "saa", "saamme", "saatte", "saavat",
|
||||
"sain", "sait", "sai", "saimme", "saitte", "saivat",
|
||||
"saa", "sais", "saa", "saava", "saanut"]
|
||||
return r
|
||||
}
|
||||
|
||||
// pitää — must / to like (Type 1 with stem change)
|
||||
if str_eq(dict_form, "pitää") {
|
||||
let r: [String] = ["pitää", "pidän", "pidät", "pitää", "pidämme", "pidätte", "pitävät",
|
||||
"pidin", "pidit", "piti", "pidimme", "piditte", "pitivät",
|
||||
"pidä", "pitäis", "pidä", "pitävä", "pitänyt"]
|
||||
return r
|
||||
}
|
||||
|
||||
// tietää — to know (Type 1 with stem change)
|
||||
if str_eq(dict_form, "tietää") {
|
||||
let r: [String] = ["tietää", "tiedän", "tiedät", "tietää", "tiedämme", "tiedätte", "tietävät",
|
||||
"tiesin", "tiesit", "tiesi", "tiesimme", "tiesitte", "tiesivät",
|
||||
"tiedä", "tietäis", "tiedä", "tietävä", "tiennyt"]
|
||||
return r
|
||||
}
|
||||
|
||||
return empty
|
||||
}
|
||||
|
||||
// ── Present-tense personal endings ───────────────────────────────────────────
|
||||
//
|
||||
// Type-1 verb present tense endings (suffix onto stem):
|
||||
// 1sg: -n 2sg: -t 3sg: stem-final vowel lengthened (no suffix)
|
||||
// 1pl: -mme 2pl: -tte 3pl: -vat/-vät
|
||||
//
|
||||
// The 3sg form doubles the final vowel of the stem (puhu → puhuu, tule → tulee).
|
||||
// The 3pl uses the harmony-dependent -vat/-vät suffix.
|
||||
|
||||
fn fi_present_ending(stem: String, person: String, number: String, harmony: String) -> String {
|
||||
if str_eq(number, "singular") {
|
||||
if str_eq(person, "first") { return stem + "n" }
|
||||
if str_eq(person, "second") { return stem + "t" }
|
||||
if str_eq(person, "third") {
|
||||
// 3sg: lengthen final vowel
|
||||
let last: String = fi_str_last_char(stem)
|
||||
return stem + last
|
||||
}
|
||||
}
|
||||
if str_eq(number, "plural") {
|
||||
if str_eq(person, "first") { return stem + "mme" }
|
||||
if str_eq(person, "second") { return stem + "tte" }
|
||||
if str_eq(person, "third") { return stem + fi_suffix("vat", harmony) }
|
||||
}
|
||||
return stem
|
||||
}
|
||||
|
||||
// ── Past-tense forms ──────────────────────────────────────────────────────────
|
||||
//
|
||||
// Type-1 verbs form the past by inserting -i- between the stem and the personal
|
||||
// ending. The stem-final vowel may contract before -i-.
|
||||
//
|
||||
// puhua: puhu + i → puhui + n → puhuin (1sg past)
|
||||
// Common contraction: stem final -a/-ä drops before -i-
|
||||
// puhua stem puhu → puhu + i = puhui (no drop needed, -u not -a)
|
||||
// tavata → contraction gives tavasi (handled as irregular or Type 4)
|
||||
// For Type-1 verbs with -u/-y final stem the rule is simple concatenation.
|
||||
|
||||
fn fi_past_stem(stem: String) -> String {
|
||||
// If stem ends in a or ä, they may contract. For Type-1 verbs where the
|
||||
// infinitive is -aa/-ää the stem ends in -a/-ä; before -i- that often
|
||||
// gives -oi-/-öi- (e.g. puhua: puhu → puhui, but sanoa: sano → sanoi).
|
||||
// The heuristic: if the stem already ends in a vowel other than a/ä, just
|
||||
// append i. If it ends in a/ä, convert to o/ö + i (common pattern).
|
||||
let last: String = fi_str_last_char(stem)
|
||||
if str_eq(last, "a") {
|
||||
return str_drop_last(stem, 1) + "oi"
|
||||
}
|
||||
if str_eq(last, "ä") {
|
||||
return str_drop_last(stem, 1) + "öi"
|
||||
}
|
||||
return stem + "i"
|
||||
}
|
||||
|
||||
fn fi_past_ending(stem: String, person: String, number: String, harmony: String) -> String {
|
||||
let pstem: String = fi_past_stem(stem)
|
||||
if str_eq(number, "singular") {
|
||||
if str_eq(person, "first") { return pstem + "n" }
|
||||
if str_eq(person, "second") { return pstem + "t" }
|
||||
if str_eq(person, "third") { return str_drop_last(pstem, 1) }
|
||||
}
|
||||
if str_eq(number, "plural") {
|
||||
if str_eq(person, "first") { return pstem + "mme" }
|
||||
if str_eq(person, "second") { return pstem + "tte" }
|
||||
if str_eq(person, "third") { return pstem + fi_suffix("vat", harmony) }
|
||||
}
|
||||
return pstem
|
||||
}
|
||||
|
||||
// ── Negative forms ────────────────────────────────────────────────────────────
|
||||
//
|
||||
// Finnish negation: negative auxiliary ei (conjugated for person/number) +
|
||||
// verb in the connective (negative) stem = infinitive stem without personal ending.
|
||||
//
|
||||
// Negative auxiliary conjugation:
|
||||
// 1sg: en 2sg: et 3sg: ei
|
||||
// 1pl: emme 2pl: ette 3pl: eivät
|
||||
//
|
||||
// The negative stem for most verbs = present stem (the form without any ending).
|
||||
|
||||
fn fi_neg_aux(person: String, number: String) -> String {
|
||||
if str_eq(number, "singular") {
|
||||
if str_eq(person, "first") { return "en" }
|
||||
if str_eq(person, "second") { return "et" }
|
||||
if str_eq(person, "third") { return "ei" }
|
||||
}
|
||||
if str_eq(number, "plural") {
|
||||
if str_eq(person, "first") { return "emme" }
|
||||
if str_eq(person, "second") { return "ette" }
|
||||
if str_eq(person, "third") { return "eivät" }
|
||||
}
|
||||
return "ei"
|
||||
}
|
||||
|
||||
fn fi_negative(verb: String, person: String, number: String) -> String {
|
||||
let irreg: [String] = fi_irregular_verb(verb)
|
||||
let aux: String = fi_neg_aux(person, number)
|
||||
if native_list_len(irreg) > 0 {
|
||||
let neg_stem: String = native_list_get(irreg, 13)
|
||||
return aux + " " + neg_stem
|
||||
}
|
||||
let stem: String = fi_verb_stem(verb)
|
||||
return aux + " " + stem
|
||||
}
|
||||
|
||||
// ── Main conjugation entry point ──────────────────────────────────────────────
|
||||
//
|
||||
// fi_conjugate(verb, tense, person, number) -> String
|
||||
//
|
||||
// tense: "present" | "past"
|
||||
// person: "first" | "second" | "third"
|
||||
// number: "singular" | "plural"
|
||||
|
||||
fn fi_conjugate(verb: String, tense: String, person: String, number: String) -> String {
|
||||
let harmony: String = fi_harmony(verb)
|
||||
|
||||
// Check irregular table first
|
||||
let irreg: [String] = fi_irregular_verb(verb)
|
||||
if native_list_len(irreg) > 0 {
|
||||
if str_eq(tense, "present") {
|
||||
if str_eq(number, "singular") {
|
||||
if str_eq(person, "first") { return native_list_get(irreg, 1) }
|
||||
if str_eq(person, "second") { return native_list_get(irreg, 2) }
|
||||
if str_eq(person, "third") { return native_list_get(irreg, 3) }
|
||||
}
|
||||
if str_eq(number, "plural") {
|
||||
if str_eq(person, "first") { return native_list_get(irreg, 4) }
|
||||
if str_eq(person, "second") { return native_list_get(irreg, 5) }
|
||||
if str_eq(person, "third") { return native_list_get(irreg, 6) }
|
||||
}
|
||||
}
|
||||
if str_eq(tense, "past") {
|
||||
if str_eq(number, "singular") {
|
||||
if str_eq(person, "first") { return native_list_get(irreg, 7) }
|
||||
if str_eq(person, "second") { return native_list_get(irreg, 8) }
|
||||
if str_eq(person, "third") { return native_list_get(irreg, 9) }
|
||||
}
|
||||
if str_eq(number, "plural") {
|
||||
if str_eq(person, "first") { return native_list_get(irreg, 10) }
|
||||
if str_eq(person, "second") { return native_list_get(irreg, 11) }
|
||||
if str_eq(person, "third") { return native_list_get(irreg, 12) }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Regular verbs
|
||||
let stem: String = fi_verb_stem(verb)
|
||||
|
||||
if str_eq(tense, "present") {
|
||||
return fi_present_ending(stem, person, number, harmony)
|
||||
}
|
||||
|
||||
if str_eq(tense, "past") {
|
||||
return fi_past_ending(stem, person, number, harmony)
|
||||
}
|
||||
|
||||
return stem
|
||||
}
|
||||
|
||||
// ── Question suffix ───────────────────────────────────────────────────────────
|
||||
//
|
||||
// Finnish questions are formed by appending -ko (back harmony) or -kö (front
|
||||
// harmony) directly to the verb (or sometimes another focus word).
|
||||
|
||||
fn fi_question_suffix(harmony: String) -> String {
|
||||
if str_eq(harmony, "front") { return "kö" }
|
||||
return "ko"
|
||||
}
|
||||
|
||||
// ── Question formation ────────────────────────────────────────────────────────
|
||||
//
|
||||
// fi_make_question: append the appropriate question suffix to a verb form.
|
||||
|
||||
fn fi_make_question(verb_form: String, harmony: String) -> String {
|
||||
return verb_form + fi_question_suffix(harmony)
|
||||
}
|
||||
|
||||
// ── Convenience: inflect a noun through all 15 cases ─────────────────────────
|
||||
//
|
||||
// Returns a 30-element list: [case_name, sg_form, case_name, pl_form, ...]
|
||||
// for all 15 cases. Plural-only cases (instructive, comitative) have an
|
||||
// empty string for the singular slot.
|
||||
|
||||
fn fi_full_paradigm(noun: String) -> [String] {
|
||||
let harmony: String = fi_harmony(noun)
|
||||
let r: [String] = []
|
||||
let cases: [String] = ["nominative", "genitive", "accusative", "partitive",
|
||||
"inessive", "elative", "illative", "adessive",
|
||||
"ablative", "allative", "essive", "translative",
|
||||
"instructive", "abessive", "comitative"]
|
||||
let n: Int = native_list_len(cases)
|
||||
let i: Int = 0
|
||||
while i < n {
|
||||
let c: String = native_list_get(cases, i)
|
||||
let r = native_list_append(r, c)
|
||||
// Singular
|
||||
if str_eq(c, "instructive") {
|
||||
let r = native_list_append(r, "")
|
||||
} else {
|
||||
if str_eq(c, "comitative") {
|
||||
let r = native_list_append(r, "")
|
||||
} else {
|
||||
let r = native_list_append(r, fi_noun_case(noun, c, "singular", harmony))
|
||||
}
|
||||
}
|
||||
// Plural
|
||||
let r = native_list_append(r, fi_noun_case(noun, c, "plural", harmony))
|
||||
let i = i + 1
|
||||
}
|
||||
return r
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
// auto-generated by elc --emit-header — do not edit
|
||||
extern fn fi_harmony(word: String) -> String
|
||||
extern fn fi_suffix(base: String, harmony: String) -> String
|
||||
extern fn fi_noun_case(stem: String, gram_case: String, number: String, harmony: String) -> String
|
||||
extern fn fi_str_last_char(s: String) -> String
|
||||
extern fn fi_apply_case(noun: String, gram_case: String, number: String) -> String
|
||||
extern fn fi_verb_stem(dict_form: String) -> String
|
||||
extern fn fi_irregular_verb(dict_form: String) -> [String]
|
||||
extern fn fi_present_ending(stem: String, person: String, number: String, harmony: String) -> String
|
||||
extern fn fi_past_stem(stem: String) -> String
|
||||
extern fn fi_past_ending(stem: String, person: String, number: String, harmony: String) -> String
|
||||
extern fn fi_neg_aux(person: String, number: String) -> String
|
||||
extern fn fi_negative(verb: String, person: String, number: String) -> String
|
||||
extern fn fi_conjugate(verb: String, tense: String, person: String, number: String) -> String
|
||||
extern fn fi_question_suffix(harmony: String) -> String
|
||||
extern fn fi_make_question(verb_form: String, harmony: String) -> String
|
||||
extern fn fi_full_paradigm(noun: String) -> [String]
|
||||
@@ -1,677 +0,0 @@
|
||||
// morphology-fr.el - French morphology for the NLG engine.
|
||||
//
|
||||
// Implements fusional French verb conjugation, noun pluralization, gender
|
||||
// inference, article agreement, and interrogative inversion. Designed as a
|
||||
// companion to morphology.el and called by the engine when the language
|
||||
// profile code is "fr".
|
||||
//
|
||||
// Verb tenses covered: present, future, imparfait, passé composé.
|
||||
// Persons: first/second/third × singular/plural.
|
||||
// Verb groups: -er (regular), -ir (regular finir-type), -re (regular vendre-type)
|
||||
// + a core set of common irregular verbs.
|
||||
//
|
||||
// Liaison / elision notes:
|
||||
// - le/la → l' before vowel-initial nouns (handled in fr_agree_article)
|
||||
// - est-ce que can precede any statement for yes/no questions (fr_question_inversion)
|
||||
// - Inversion inserts -t- between vowel-final verb form and 3s pronoun: parle-t-il
|
||||
//
|
||||
// Depends on: morphology.el (str_ends_with, str_slice, str_len helpers)
|
||||
|
||||
// ── String helpers (local, matching morphology.el conventions) ────────────────
|
||||
|
||||
import "morphology.el"
|
||||
fn fr_str_ends(s: String, suf: String) -> Bool {
|
||||
return str_ends_with(s, suf)
|
||||
}
|
||||
|
||||
fn fr_str_drop_last(s: String, n: Int) -> String {
|
||||
let len: Int = str_len(s)
|
||||
if n >= len {
|
||||
return ""
|
||||
}
|
||||
return str_slice(s, 0, len - n)
|
||||
}
|
||||
|
||||
fn fr_str_last_char(s: String) -> String {
|
||||
let n: Int = str_len(s)
|
||||
if n == 0 {
|
||||
return ""
|
||||
}
|
||||
return str_slice(s, n - 1, n)
|
||||
}
|
||||
|
||||
fn fr_str_last2(s: String) -> String {
|
||||
let n: Int = str_len(s)
|
||||
if n < 2 {
|
||||
return s
|
||||
}
|
||||
return str_slice(s, n - 2, n)
|
||||
}
|
||||
|
||||
fn fr_is_vowel_start(s: String) -> Bool {
|
||||
let n: Int = str_len(s)
|
||||
if n == 0 {
|
||||
return false
|
||||
}
|
||||
let c: String = str_slice(s, 0, 1)
|
||||
if str_eq(c, "a") { return true }
|
||||
if str_eq(c, "e") { return true }
|
||||
if str_eq(c, "é") { return true }
|
||||
if str_eq(c, "è") { return true }
|
||||
if str_eq(c, "ê") { return true }
|
||||
if str_eq(c, "i") { return true }
|
||||
if str_eq(c, "î") { return true }
|
||||
if str_eq(c, "o") { return true }
|
||||
if str_eq(c, "ô") { return true }
|
||||
if str_eq(c, "u") { return true }
|
||||
if str_eq(c, "û") { return true }
|
||||
if str_eq(c, "h") { return true }
|
||||
return false
|
||||
}
|
||||
|
||||
// ── Verb group detection ──────────────────────────────────────────────────────
|
||||
//
|
||||
// Returns "er" | "ir" | "re" | "irregular".
|
||||
// Irregular detection is done by checking against a known list first; all other
|
||||
// verbs are classified by ending.
|
||||
|
||||
fn fr_is_known_irregular(verb: String) -> Bool {
|
||||
if str_eq(verb, "être") { return true }
|
||||
if str_eq(verb, "avoir") { return true }
|
||||
if str_eq(verb, "aller") { return true }
|
||||
if str_eq(verb, "faire") { return true }
|
||||
if str_eq(verb, "pouvoir") { return true }
|
||||
if str_eq(verb, "vouloir") { return true }
|
||||
if str_eq(verb, "venir") { return true }
|
||||
if str_eq(verb, "dire") { return true }
|
||||
if str_eq(verb, "voir") { return true }
|
||||
if str_eq(verb, "prendre") { return true }
|
||||
if str_eq(verb, "mettre") { return true }
|
||||
if str_eq(verb, "savoir") { return true }
|
||||
return false
|
||||
}
|
||||
|
||||
fn fr_verb_group(base: String) -> String {
|
||||
if fr_is_known_irregular(base) { return "irregular" }
|
||||
if fr_str_ends(base, "er") { return "er" }
|
||||
if fr_str_ends(base, "ir") { return "ir" }
|
||||
if fr_str_ends(base, "re") { return "re" }
|
||||
return "er"
|
||||
}
|
||||
|
||||
fn fr_stem(base: String) -> String {
|
||||
return fr_str_drop_last(base, 2)
|
||||
}
|
||||
|
||||
// ── Person/number slot index ──────────────────────────────────────────────────
|
||||
//
|
||||
// 0 = 1s (je), 1 = 2s (tu), 2 = 3s (il/elle), 3 = 1p (nous), 4 = 2p (vous), 5 = 3p (ils/elles)
|
||||
|
||||
fn fr_slot(person: String, number: String) -> Int {
|
||||
if str_eq(person, "first") {
|
||||
if str_eq(number, "singular") { return 0 }
|
||||
return 3
|
||||
}
|
||||
if str_eq(person, "second") {
|
||||
if str_eq(number, "singular") { return 1 }
|
||||
return 4
|
||||
}
|
||||
if str_eq(number, "singular") { return 2 }
|
||||
return 5
|
||||
}
|
||||
|
||||
// ── Irregular present tense ───────────────────────────────────────────────────
|
||||
//
|
||||
// être: suis, es, est, sommes, êtes, sont
|
||||
// avoir: ai, as, a, avons, avez, ont
|
||||
// aller: vais, vas, va, allons, allez, vont
|
||||
// faire: fais, fais, fait, faisons, faites, font
|
||||
// pouvoir: peux, peux, peut, pouvons, pouvez, peuvent
|
||||
// vouloir: veux, veux, veut, voulons, voulez, veulent
|
||||
// venir: viens, viens, vient, venons, venez, viennent
|
||||
// dire: dis, dis, dit, disons, dites, disent
|
||||
// voir: vois, vois, voit, voyons, voyez, voient
|
||||
// prendre: prends, prends, prend, prenons, prenez, prennent
|
||||
// mettre: mets, mets, met, mettons, mettez, mettent
|
||||
// savoir: sais, sais, sait, savons, savez, savent
|
||||
|
||||
fn fr_irregular_present(verb: String, person: String, number: String) -> String {
|
||||
let slot: Int = fr_slot(person, number)
|
||||
|
||||
if str_eq(verb, "être") {
|
||||
if slot == 0 { return "suis" }
|
||||
if slot == 1 { return "es" }
|
||||
if slot == 2 { return "est" }
|
||||
if slot == 3 { return "sommes" }
|
||||
if slot == 4 { return "etes" }
|
||||
return "sont"
|
||||
}
|
||||
|
||||
// ASCII alias used by morph_map_canonical — identical conjugation.
|
||||
if str_eq(verb, "etre") {
|
||||
if slot == 0 { return "suis" }
|
||||
if slot == 1 { return "es" }
|
||||
if slot == 2 { return "est" }
|
||||
if slot == 3 { return "sommes" }
|
||||
if slot == 4 { return "etes" }
|
||||
return "sont"
|
||||
}
|
||||
|
||||
if str_eq(verb, "avoir") {
|
||||
if slot == 0 { return "ai" }
|
||||
if slot == 1 { return "as" }
|
||||
if slot == 2 { return "a" }
|
||||
if slot == 3 { return "avons" }
|
||||
if slot == 4 { return "avez" }
|
||||
return "ont"
|
||||
}
|
||||
|
||||
if str_eq(verb, "aller") {
|
||||
if slot == 0 { return "vais" }
|
||||
if slot == 1 { return "vas" }
|
||||
if slot == 2 { return "va" }
|
||||
if slot == 3 { return "allons" }
|
||||
if slot == 4 { return "allez" }
|
||||
return "vont"
|
||||
}
|
||||
|
||||
if str_eq(verb, "faire") {
|
||||
if slot == 0 { return "fais" }
|
||||
if slot == 1 { return "fais" }
|
||||
if slot == 2 { return "fait" }
|
||||
if slot == 3 { return "faisons" }
|
||||
if slot == 4 { return "faites" }
|
||||
return "font"
|
||||
}
|
||||
|
||||
if str_eq(verb, "pouvoir") {
|
||||
if slot == 0 { return "peux" }
|
||||
if slot == 1 { return "peux" }
|
||||
if slot == 2 { return "peut" }
|
||||
if slot == 3 { return "pouvons" }
|
||||
if slot == 4 { return "pouvez" }
|
||||
return "peuvent"
|
||||
}
|
||||
|
||||
if str_eq(verb, "vouloir") {
|
||||
if slot == 0 { return "veux" }
|
||||
if slot == 1 { return "veux" }
|
||||
if slot == 2 { return "veut" }
|
||||
if slot == 3 { return "voulons" }
|
||||
if slot == 4 { return "voulez" }
|
||||
return "veulent"
|
||||
}
|
||||
|
||||
if str_eq(verb, "venir") {
|
||||
if slot == 0 { return "viens" }
|
||||
if slot == 1 { return "viens" }
|
||||
if slot == 2 { return "vient" }
|
||||
if slot == 3 { return "venons" }
|
||||
if slot == 4 { return "venez" }
|
||||
return "viennent"
|
||||
}
|
||||
|
||||
if str_eq(verb, "dire") {
|
||||
if slot == 0 { return "dis" }
|
||||
if slot == 1 { return "dis" }
|
||||
if slot == 2 { return "dit" }
|
||||
if slot == 3 { return "disons" }
|
||||
if slot == 4 { return "dites" }
|
||||
return "disent"
|
||||
}
|
||||
|
||||
if str_eq(verb, "voir") {
|
||||
if slot == 0 { return "vois" }
|
||||
if slot == 1 { return "vois" }
|
||||
if slot == 2 { return "voit" }
|
||||
if slot == 3 { return "voyons" }
|
||||
if slot == 4 { return "voyez" }
|
||||
return "voient"
|
||||
}
|
||||
|
||||
if str_eq(verb, "prendre") {
|
||||
if slot == 0 { return "prends" }
|
||||
if slot == 1 { return "prends" }
|
||||
if slot == 2 { return "prend" }
|
||||
if slot == 3 { return "prenons" }
|
||||
if slot == 4 { return "prenez" }
|
||||
return "prennent"
|
||||
}
|
||||
|
||||
if str_eq(verb, "mettre") {
|
||||
if slot == 0 { return "mets" }
|
||||
if slot == 1 { return "mets" }
|
||||
if slot == 2 { return "met" }
|
||||
if slot == 3 { return "mettons" }
|
||||
if slot == 4 { return "mettez" }
|
||||
return "mettent"
|
||||
}
|
||||
|
||||
if str_eq(verb, "savoir") {
|
||||
if slot == 0 { return "sais" }
|
||||
if slot == 1 { return "sais" }
|
||||
if slot == 2 { return "sait" }
|
||||
if slot == 3 { return "savons" }
|
||||
if slot == 4 { return "savez" }
|
||||
return "savent"
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// ── Regular present tense ─────────────────────────────────────────────────────
|
||||
//
|
||||
// -er: -e, -es, -e, -ons, -ez, -ent
|
||||
// -ir: -is, -is, -it, -issons, -issez, -issent (finir-type; stem gets -iss- in plural)
|
||||
// -re: -s, -s, -(nothing), -ons, -ez, -ent
|
||||
|
||||
fn fr_regular_present(stem: String, vgroup: String, slot: Int) -> String {
|
||||
if str_eq(vgroup, "er") {
|
||||
if slot == 0 { return stem + "e" }
|
||||
if slot == 1 { return stem + "es" }
|
||||
if slot == 2 { return stem + "e" }
|
||||
if slot == 3 { return stem + "ons" }
|
||||
if slot == 4 { return stem + "ez" }
|
||||
return stem + "ent"
|
||||
}
|
||||
|
||||
if str_eq(vgroup, "ir") {
|
||||
// finir-type: singular uses bare stem, plural uses stem + -iss-
|
||||
if slot == 0 { return stem + "is" }
|
||||
if slot == 1 { return stem + "is" }
|
||||
if slot == 2 { return stem + "it" }
|
||||
if slot == 3 { return stem + "issons" }
|
||||
if slot == 4 { return stem + "issez" }
|
||||
return stem + "issent"
|
||||
}
|
||||
|
||||
// -re (vendre-type)
|
||||
if slot == 0 { return stem + "s" }
|
||||
if slot == 1 { return stem + "s" }
|
||||
if slot == 2 { return stem }
|
||||
if slot == 3 { return stem + "ons" }
|
||||
if slot == 4 { return stem + "ez" }
|
||||
return stem + "ent"
|
||||
}
|
||||
|
||||
// ── Regular future tense ──────────────────────────────────────────────────────
|
||||
//
|
||||
// Future is formed from the infinitive (minus silent -e for -re verbs) + endings:
|
||||
// -ai, -as, -a, -ons, -ez, -ont
|
||||
|
||||
fn fr_future_stem(base: String, vgroup: String) -> String {
|
||||
// -re verbs drop the final -e before adding future endings
|
||||
if str_eq(vgroup, "re") {
|
||||
return fr_str_drop_last(base, 1)
|
||||
}
|
||||
return base
|
||||
}
|
||||
|
||||
fn fr_regular_future(fstem: String, slot: Int) -> String {
|
||||
if slot == 0 { return fstem + "ai" }
|
||||
if slot == 1 { return fstem + "as" }
|
||||
if slot == 2 { return fstem + "a" }
|
||||
if slot == 3 { return fstem + "ons" }
|
||||
if slot == 4 { return fstem + "ez" }
|
||||
return fstem + "ont"
|
||||
}
|
||||
|
||||
// ── Irregular future stems ────────────────────────────────────────────────────
|
||||
//
|
||||
// Returns the irregular future stem, or "" if regular.
|
||||
|
||||
fn fr_irregular_future_stem(verb: String) -> String {
|
||||
if str_eq(verb, "être") { return "ser" }
|
||||
if str_eq(verb, "avoir") { return "aur" }
|
||||
if str_eq(verb, "aller") { return "ir" }
|
||||
if str_eq(verb, "faire") { return "fer" }
|
||||
if str_eq(verb, "pouvoir") { return "pourr" }
|
||||
if str_eq(verb, "vouloir") { return "voudr" }
|
||||
if str_eq(verb, "venir") { return "viendr" }
|
||||
if str_eq(verb, "voir") { return "verr" }
|
||||
if str_eq(verb, "savoir") { return "saur" }
|
||||
return ""
|
||||
}
|
||||
|
||||
// ── Regular imparfait ─────────────────────────────────────────────────────────
|
||||
//
|
||||
// Imparfait is formed from the nous-present stem (infinitive minus -er/-ir/-re,
|
||||
// then add -iss for -ir verbs in nous-form) + endings:
|
||||
// -ais, -ais, -ait, -ions, -iez, -aient
|
||||
//
|
||||
// For -er verbs: stem = infinitive minus -er
|
||||
// For -ir verbs: stem = infinitive minus -ir (bare stem, not -iss- — imparfait
|
||||
// uses the basic stem, unlike present plural which uses -iss-)
|
||||
// For -re verbs: stem = infinitive minus -re
|
||||
// Exception: être uses ét- as the imparfait stem.
|
||||
|
||||
fn fr_imperfect_stem(base: String, vgroup: String) -> String {
|
||||
if str_eq(base, "être") { return "ét" }
|
||||
return fr_stem(base)
|
||||
}
|
||||
|
||||
fn fr_regular_imperfect(istem: String, slot: Int) -> String {
|
||||
if slot == 0 { return istem + "ais" }
|
||||
if slot == 1 { return istem + "ais" }
|
||||
if slot == 2 { return istem + "ait" }
|
||||
if slot == 3 { return istem + "ions" }
|
||||
if slot == 4 { return istem + "iez" }
|
||||
return istem + "aient"
|
||||
}
|
||||
|
||||
// ── Passé composé (past compound) ────────────────────────────────────────────
|
||||
//
|
||||
// Passé composé = auxiliary (avoir or être) + past participle.
|
||||
// Most verbs use avoir; a core set of motion/state verbs use être.
|
||||
//
|
||||
// This function returns a two-word string "auxiliary participle".
|
||||
// The caller is responsible for agreement of the past participle when être is
|
||||
// used (feminine adds -e, plural adds -s); this function returns the masculine
|
||||
// singular participle unconditionally.
|
||||
|
||||
fn fr_uses_etre(verb: String) -> Bool {
|
||||
if str_eq(verb, "aller") { return true }
|
||||
if str_eq(verb, "venir") { return true }
|
||||
if str_eq(verb, "partir") { return true }
|
||||
if str_eq(verb, "arriver") { return true }
|
||||
if str_eq(verb, "entrer") { return true }
|
||||
if str_eq(verb, "sortir") { return true }
|
||||
if str_eq(verb, "naître") { return true }
|
||||
if str_eq(verb, "mourir") { return true }
|
||||
if str_eq(verb, "rester") { return true }
|
||||
if str_eq(verb, "tomber") { return true }
|
||||
if str_eq(verb, "monter") { return true }
|
||||
if str_eq(verb, "descendre") { return true }
|
||||
if str_eq(verb, "rentrer") { return true }
|
||||
if str_eq(verb, "retourner") { return true }
|
||||
if str_eq(verb, "passer") { return true }
|
||||
return false
|
||||
}
|
||||
|
||||
// Returns the past participle (masculine singular form).
|
||||
fn fr_past_participle(verb: String) -> String {
|
||||
// Irregular participles
|
||||
if str_eq(verb, "être") { return "été" }
|
||||
if str_eq(verb, "avoir") { return "eu" }
|
||||
if str_eq(verb, "aller") { return "allé" }
|
||||
if str_eq(verb, "faire") { return "fait" }
|
||||
if str_eq(verb, "pouvoir") { return "pu" }
|
||||
if str_eq(verb, "vouloir") { return "voulu" }
|
||||
if str_eq(verb, "venir") { return "venu" }
|
||||
if str_eq(verb, "dire") { return "dit" }
|
||||
if str_eq(verb, "voir") { return "vu" }
|
||||
if str_eq(verb, "prendre") { return "pris" }
|
||||
if str_eq(verb, "mettre") { return "mis" }
|
||||
if str_eq(verb, "savoir") { return "su" }
|
||||
if str_eq(verb, "naître") { return "né" }
|
||||
if str_eq(verb, "mourir") { return "mort" }
|
||||
// Regular participles by group
|
||||
let vgroup: String = fr_verb_group(verb)
|
||||
if str_eq(vgroup, "er") {
|
||||
return fr_str_drop_last(verb, 2) + "é"
|
||||
}
|
||||
if str_eq(vgroup, "ir") {
|
||||
return fr_str_drop_last(verb, 2) + "i"
|
||||
}
|
||||
// -re verbs: drop -re, add -u
|
||||
return fr_str_drop_last(verb, 2) + "u"
|
||||
}
|
||||
|
||||
// Conjugates the avoir auxiliary in the present (for passé composé).
|
||||
fn fr_avoir_present(slot: Int) -> String {
|
||||
if slot == 0 { return "ai" }
|
||||
if slot == 1 { return "as" }
|
||||
if slot == 2 { return "a" }
|
||||
if slot == 3 { return "avons" }
|
||||
if slot == 4 { return "avez" }
|
||||
return "ont"
|
||||
}
|
||||
|
||||
// Conjugates the être auxiliary in the present (for passé composé).
|
||||
fn fr_etre_present(slot: Int) -> String {
|
||||
if slot == 0 { return "suis" }
|
||||
if slot == 1 { return "es" }
|
||||
if slot == 2 { return "est" }
|
||||
if slot == 3 { return "sommes" }
|
||||
if slot == 4 { return "êtes" }
|
||||
return "sont"
|
||||
}
|
||||
|
||||
// ── Full conjugation entry point ──────────────────────────────────────────────
|
||||
//
|
||||
// fr_conjugate: conjugate a French verb.
|
||||
//
|
||||
// verb: French infinitive (e.g. "parler", "être", "venir")
|
||||
// tense: "present" | "future" | "imperfect" | "past"
|
||||
// (note: "past" returns the passé composé as "aux participle")
|
||||
// person: "first" | "second" | "third"
|
||||
// number: "singular" | "plural"
|
||||
|
||||
fn fr_conjugate(verb: String, tense: String, person: String, number: String) -> String {
|
||||
let slot: Int = fr_slot(person, number)
|
||||
|
||||
if str_eq(tense, "present") {
|
||||
let irreg: String = fr_irregular_present(verb, person, number)
|
||||
if !str_eq(irreg, "") {
|
||||
return irreg
|
||||
}
|
||||
let vgroup: String = fr_verb_group(verb)
|
||||
let stem: String = fr_stem(verb)
|
||||
return fr_regular_present(stem, vgroup, slot)
|
||||
}
|
||||
|
||||
if str_eq(tense, "future") {
|
||||
let irreg_stem: String = fr_irregular_future_stem(verb)
|
||||
if !str_eq(irreg_stem, "") {
|
||||
return fr_regular_future(irreg_stem, slot)
|
||||
}
|
||||
let vgroup: String = fr_verb_group(verb)
|
||||
let fstem: String = fr_future_stem(verb, vgroup)
|
||||
return fr_regular_future(fstem, slot)
|
||||
}
|
||||
|
||||
if str_eq(tense, "imperfect") {
|
||||
let vgroup: String = fr_verb_group(verb)
|
||||
let istem: String = fr_imperfect_stem(verb, vgroup)
|
||||
return fr_regular_imperfect(istem, slot)
|
||||
}
|
||||
|
||||
if str_eq(tense, "past") {
|
||||
// Passé composé: auxiliary + past participle
|
||||
let pp: String = fr_past_participle(verb)
|
||||
if fr_uses_etre(verb) {
|
||||
let aux: String = fr_etre_present(slot)
|
||||
return aux + " " + pp
|
||||
}
|
||||
let aux: String = fr_avoir_present(slot)
|
||||
return aux + " " + pp
|
||||
}
|
||||
|
||||
// Unknown tense: return infinitive unchanged
|
||||
return verb
|
||||
}
|
||||
|
||||
// ── Noun gender inference ─────────────────────────────────────────────────────
|
||||
//
|
||||
// Returns "m" (masculine), "f" (feminine), or "unknown".
|
||||
//
|
||||
// Heuristics (common French patterns):
|
||||
// ends in -tion/-sion/-xion -> feminine (nation, passion, connexion)
|
||||
// ends in -ure -> feminine (voiture, culture)
|
||||
// ends in -ette -> feminine (omelette, cigarette)
|
||||
// ends in -eur (abstract) -> feminine (couleur, peur, chaleur)
|
||||
// ends in -eur (agent) -> masculine (acteur, serveur) — can't always distinguish
|
||||
// ends in -eur (no rule) -> try -teur → masculine (docteur, auteur)
|
||||
// ends in -ment -> masculine (sentiment, gouvernement)
|
||||
// ends in -age -> masculine (voyage, fromage)
|
||||
// ends in -isme -> masculine (socialisme)
|
||||
// ends in -eau -> masculine (tableau, gâteau)
|
||||
// ends in -er/-é -> masculine (boucher, café)
|
||||
// ends in -ée -> feminine (journée, idée)
|
||||
// ends in -ie -> feminine (philosophie, géographie)
|
||||
// ends in -ance/-ence -> feminine (chance, science)
|
||||
// ends in -té/-tié -> feminine (beauté, amitié)
|
||||
// ends in -ude -> feminine (attitude, solitude)
|
||||
// ends in -ade -> feminine (salade, promenade)
|
||||
// ends in -ette -> feminine (already covered)
|
||||
// ends in -e (generic) -> often feminine but not reliable; return "unknown"
|
||||
|
||||
fn fr_gender(noun: String) -> String {
|
||||
// Feminine patterns (check more specific before general)
|
||||
if fr_str_ends(noun, "tion") { return "f" }
|
||||
if fr_str_ends(noun, "sion") { return "f" }
|
||||
if fr_str_ends(noun, "xion") { return "f" }
|
||||
if fr_str_ends(noun, "ure") { return "f" }
|
||||
if fr_str_ends(noun, "ette") { return "f" }
|
||||
if fr_str_ends(noun, "ance") { return "f" }
|
||||
if fr_str_ends(noun, "ence") { return "f" }
|
||||
if fr_str_ends(noun, "ité") { return "f" }
|
||||
if fr_str_ends(noun, "té") { return "f" }
|
||||
if fr_str_ends(noun, "tié") { return "f" }
|
||||
if fr_str_ends(noun, "ude") { return "f" }
|
||||
if fr_str_ends(noun, "ade") { return "f" }
|
||||
if fr_str_ends(noun, "ée") { return "f" }
|
||||
if fr_str_ends(noun, "ie") { return "f" }
|
||||
// Masculine patterns
|
||||
if fr_str_ends(noun, "ment") { return "m" }
|
||||
if fr_str_ends(noun, "age") { return "m" }
|
||||
if fr_str_ends(noun, "isme") { return "m" }
|
||||
if fr_str_ends(noun, "eau") { return "m" }
|
||||
if fr_str_ends(noun, "eur") { return "m" }
|
||||
if fr_str_ends(noun, "er") { return "m" }
|
||||
if fr_str_ends(noun, "é") { return "m" }
|
||||
return "unknown"
|
||||
}
|
||||
|
||||
// ── Noun pluralization ────────────────────────────────────────────────────────
|
||||
//
|
||||
// French plural rules:
|
||||
// already ends in -s, -x, or -z -> unchanged
|
||||
// ends in -eau -> add -x (bateau → bateaux)
|
||||
// ends in -eu -> add -x (jeu → jeux; bleu → bleus is exception)
|
||||
// ends in -al -> replace -al with -aux (animal → animaux)
|
||||
// ends in -ail (most) -> replace -ail with -aux (travail → travaux; bail)
|
||||
// otherwise -> add -s
|
||||
|
||||
fn fr_invariant_plural(noun: String) -> String {
|
||||
// Words already ending in -s, -x, -z are unchanged in plural
|
||||
let last: String = fr_str_last_char(noun)
|
||||
if str_eq(last, "s") { return noun }
|
||||
if str_eq(last, "x") { return noun }
|
||||
if str_eq(last, "z") { return noun }
|
||||
return ""
|
||||
}
|
||||
|
||||
fn fr_pluralize(noun: String) -> String {
|
||||
let inv: String = fr_invariant_plural(noun)
|
||||
if !str_eq(inv, "") {
|
||||
return inv
|
||||
}
|
||||
if fr_str_ends(noun, "eau") {
|
||||
return noun + "x"
|
||||
}
|
||||
if fr_str_ends(noun, "eu") {
|
||||
return noun + "x"
|
||||
}
|
||||
if fr_str_ends(noun, "al") {
|
||||
return fr_str_drop_last(noun, 2) + "aux"
|
||||
}
|
||||
if fr_str_ends(noun, "ail") {
|
||||
return fr_str_drop_last(noun, 3) + "aux"
|
||||
}
|
||||
return noun + "s"
|
||||
}
|
||||
|
||||
// ── Article agreement ─────────────────────────────────────────────────────────
|
||||
//
|
||||
// fr_agree_article: return the correct French article for a noun.
|
||||
//
|
||||
// noun: the noun (used for gender inference and elision check)
|
||||
// definite: "true" for definite (le/la/l'/les), "false" for indefinite (un/une/des)
|
||||
// number: "singular" | "plural"
|
||||
//
|
||||
// Elision: le/la → l' before a vowel- or h-initial noun (handled here).
|
||||
|
||||
fn fr_agree_article(noun: String, definite: String, number: String) -> String {
|
||||
let gender: String = fr_gender(noun)
|
||||
let is_plural: Bool = str_eq(number, "plural")
|
||||
let is_def: Bool = str_eq(definite, "true")
|
||||
let vowel_start: Bool = fr_is_vowel_start(noun)
|
||||
|
||||
if is_def {
|
||||
if is_plural {
|
||||
return "les"
|
||||
}
|
||||
// singular
|
||||
if vowel_start {
|
||||
return "l'"
|
||||
}
|
||||
if str_eq(gender, "f") {
|
||||
return "la"
|
||||
}
|
||||
return "le"
|
||||
}
|
||||
|
||||
// indefinite
|
||||
if is_plural {
|
||||
return "des"
|
||||
}
|
||||
if str_eq(gender, "f") {
|
||||
return "une"
|
||||
}
|
||||
return "un"
|
||||
}
|
||||
|
||||
// ── Question inversion ────────────────────────────────────────────────────────
|
||||
//
|
||||
// fr_question_inversion: form a yes/no question using subject-verb inversion.
|
||||
//
|
||||
// subject: pronoun string: "je" | "tu" | "il" | "elle" | "nous" | "vous" | "ils" | "elles"
|
||||
// verb_form: the conjugated verb form (e.g. "parle", "mange", "est")
|
||||
//
|
||||
// Rules:
|
||||
// - Verb and subject are joined with a hyphen: "parle-t-il ?"
|
||||
// - When the verb form ends in a vowel and the subject starts with a vowel
|
||||
// (il, elle, ils, elles), insert euphonic -t-: "parle-t-il ?"
|
||||
// - Je inversion is archaic; "est-ce que je ...?" is preferred — this function
|
||||
// generates "est-ce que je <verb_form> ?" for first-person singular.
|
||||
// - The result ends with " ?"
|
||||
|
||||
fn fr_subject_starts_vowel(subject: String) -> Bool {
|
||||
if str_eq(subject, "il") { return true }
|
||||
if str_eq(subject, "elle") { return true }
|
||||
if str_eq(subject, "ils") { return true }
|
||||
if str_eq(subject, "elles") { return true }
|
||||
return false
|
||||
}
|
||||
|
||||
fn fr_verb_ends_vowel(verb_form: String) -> Bool {
|
||||
let last: String = fr_str_last_char(verb_form)
|
||||
if str_eq(last, "a") { return true }
|
||||
if str_eq(last, "e") { return true }
|
||||
if str_eq(last, "é") { return true }
|
||||
if str_eq(last, "i") { return true }
|
||||
if str_eq(last, "o") { return true }
|
||||
if str_eq(last, "u") { return true }
|
||||
return false
|
||||
}
|
||||
|
||||
fn fr_question_inversion(subject: String, verb_form: String) -> String {
|
||||
// First-person singular: use est-ce que construction
|
||||
if str_eq(subject, "je") {
|
||||
return "est-ce que je " + verb_form + " ?"
|
||||
}
|
||||
|
||||
// Determine whether to insert -t-
|
||||
let need_t: Bool = false
|
||||
if fr_verb_ends_vowel(verb_form) {
|
||||
if fr_subject_starts_vowel(subject) {
|
||||
let need_t = true
|
||||
}
|
||||
}
|
||||
|
||||
if need_t {
|
||||
return verb_form + "-t-" + subject + " ?"
|
||||
}
|
||||
return verb_form + "-" + subject + " ?"
|
||||
}
|
||||
@@ -1,29 +0,0 @@
|
||||
// auto-generated by elc --emit-header — do not edit
|
||||
extern fn fr_str_ends(s: String, suf: String) -> Bool
|
||||
extern fn fr_str_drop_last(s: String, n: Int) -> String
|
||||
extern fn fr_str_last_char(s: String) -> String
|
||||
extern fn fr_str_last2(s: String) -> String
|
||||
extern fn fr_is_vowel_start(s: String) -> Bool
|
||||
extern fn fr_is_known_irregular(verb: String) -> Bool
|
||||
extern fn fr_verb_group(base: String) -> String
|
||||
extern fn fr_stem(base: String) -> String
|
||||
extern fn fr_slot(person: String, number: String) -> Int
|
||||
extern fn fr_irregular_present(verb: String, person: String, number: String) -> String
|
||||
extern fn fr_regular_present(stem: String, vgroup: String, slot: Int) -> String
|
||||
extern fn fr_future_stem(base: String, vgroup: String) -> String
|
||||
extern fn fr_regular_future(fstem: String, slot: Int) -> String
|
||||
extern fn fr_irregular_future_stem(verb: String) -> String
|
||||
extern fn fr_imperfect_stem(base: String, vgroup: String) -> String
|
||||
extern fn fr_regular_imperfect(istem: String, slot: Int) -> String
|
||||
extern fn fr_uses_etre(verb: String) -> Bool
|
||||
extern fn fr_past_participle(verb: String) -> String
|
||||
extern fn fr_avoir_present(slot: Int) -> String
|
||||
extern fn fr_etre_present(slot: Int) -> String
|
||||
extern fn fr_conjugate(verb: String, tense: String, person: String, number: String) -> String
|
||||
extern fn fr_gender(noun: String) -> String
|
||||
extern fn fr_invariant_plural(noun: String) -> String
|
||||
extern fn fr_pluralize(noun: String) -> String
|
||||
extern fn fr_agree_article(noun: String, definite: String, number: String) -> String
|
||||
extern fn fr_subject_starts_vowel(subject: String) -> Bool
|
||||
extern fn fr_verb_ends_vowel(verb_form: String) -> Bool
|
||||
extern fn fr_question_inversion(subject: String, verb_form: String) -> String
|
||||
@@ -1,668 +0,0 @@
|
||||
// morphology-fro.el - Old French morphology for the NLG engine.
|
||||
//
|
||||
// Implements Old French verb conjugation, noun declension, and the definite
|
||||
// article. Designed as a companion to morphology.el and called by the engine
|
||||
// when the language profile code is "fro".
|
||||
//
|
||||
// Language profile: code=fro, name=Old French, morph_type=fusional,
|
||||
// word_order=V2, question_strategy=inversion, script=latin,
|
||||
// family=romance.
|
||||
//
|
||||
// Historical note: Old French (ca. 900–1400 CE) is the ancestor of Modern
|
||||
// French. It diverged from Vulgar Latin and retained a two-case system —
|
||||
// nominative (cas sujet) and oblique (cas régime) — inherited ultimately from
|
||||
// Latin. By around 1300 CE the case distinction had largely collapsed in
|
||||
// spoken usage, surviving mainly in formal written registers until it
|
||||
// disappeared altogether. This file targets the core Old French period
|
||||
// (ca. 1000–1300).
|
||||
//
|
||||
// Two-case system (masculine nouns):
|
||||
// Singular: nominative stem + -s (li murs = the wall [subject])
|
||||
// oblique stem (le mur = the wall [object])
|
||||
// Plural: nominative stem (li mur = the walls [subject])
|
||||
// oblique stem + -s (les murs = the walls [object])
|
||||
// Feminine nouns show no case distinction throughout.
|
||||
//
|
||||
// Verb conjugation covered:
|
||||
// Tenses: present indicative, passé simple (past), future
|
||||
// Persons: first/second/third × singular/plural (slots 0-5)
|
||||
// Conjugations:
|
||||
// 1st (-er): present -e/-es/-e/-ons/-ez/-ent
|
||||
// passé simple -ai/-as/-a/-ames/-astes/-erent
|
||||
// future stem+rai/ras/ra/rons/rez/ront
|
||||
// 2nd (-ir): present -is/-is/-it/-issons/-issiez/-issent
|
||||
// passé simple -is/-is/-it/-imes/-istes/-irent
|
||||
// future stem+rai series
|
||||
// 3rd (-re): present stem/s/t/-ons/-ez/-ent
|
||||
// passé simple -is series (like 2nd)
|
||||
// future stem+rai series
|
||||
// Irregulars: estre (be), avoir (have), aler (go), venir (come),
|
||||
// faire (do/make)
|
||||
// Canonical map: "be" -> "estre"
|
||||
//
|
||||
// Noun declension covered:
|
||||
// Masculine: two-case (nom/obl) × sg/pl as above
|
||||
// Feminine: case-neutral, sg base / pl base + -s
|
||||
// Gender detection: -e ending -> feminine (heuristic), else masculine
|
||||
//
|
||||
// Article:
|
||||
// Definite masculine nom sg: li; obl sg: le; nom pl: li; obl pl: les
|
||||
// Definite feminine sg: la; pl: les
|
||||
//
|
||||
// Depends on: morphology.el (str_eq, str_len, str_slice, str_ends_with)
|
||||
|
||||
// ── String helpers ─────────────────────────────────────────────────────────────
|
||||
|
||||
import "morphology.el"
|
||||
fn fro_str_ends(s: String, suf: String) -> Bool {
|
||||
return str_ends_with(s, suf)
|
||||
}
|
||||
|
||||
fn fro_drop(s: String, n: Int) -> String {
|
||||
let len: Int = str_len(s)
|
||||
if n >= len { return "" }
|
||||
return str_slice(s, 0, len - n)
|
||||
}
|
||||
|
||||
// ── Person/number slot ─────────────────────────────────────────────────────────
|
||||
//
|
||||
// Maps person × number to a 0-based paradigm index.
|
||||
// 0 = 1st singular (je)
|
||||
// 1 = 2nd singular (tu)
|
||||
// 2 = 3rd singular (il/ele)
|
||||
// 3 = 1st plural (nos)
|
||||
// 4 = 2nd plural (vos)
|
||||
// 5 = 3rd plural (il/eles)
|
||||
|
||||
fn fro_slot(person: String, number: String) -> Int {
|
||||
if str_eq(person, "first") {
|
||||
if str_eq(number, "singular") { return 0 }
|
||||
return 3
|
||||
}
|
||||
if str_eq(person, "second") {
|
||||
if str_eq(number, "singular") { return 1 }
|
||||
return 4
|
||||
}
|
||||
// third person
|
||||
if str_eq(number, "singular") { return 2 }
|
||||
return 5
|
||||
}
|
||||
|
||||
// ── Canonical verb mapping ─────────────────────────────────────────────────────
|
||||
//
|
||||
// English semantic-layer labels are resolved to Old French dictionary infinitives
|
||||
// before conjugation.
|
||||
|
||||
fn fro_map_canonical(verb: String) -> String {
|
||||
if str_eq(verb, "be") { return "estre" }
|
||||
if str_eq(verb, "have") { return "avoir" }
|
||||
if str_eq(verb, "go") { return "aler" }
|
||||
if str_eq(verb, "come") { return "venir" }
|
||||
if str_eq(verb, "do") { return "faire" }
|
||||
if str_eq(verb, "make") { return "faire" }
|
||||
if str_eq(verb, "say") { return "dire" }
|
||||
if str_eq(verb, "see") { return "veoir" }
|
||||
if str_eq(verb, "want") { return "vouloir" }
|
||||
if str_eq(verb, "can") { return "pooir" }
|
||||
return verb
|
||||
}
|
||||
|
||||
// ── Irregular verb: estre (to be) ─────────────────────────────────────────────
|
||||
//
|
||||
// Suppletive paradigm — one of the most irregular verbs in Old French.
|
||||
//
|
||||
// Present indicative:
|
||||
// 1sg sui 2sg es 3sg est
|
||||
// 1pl somes 2pl estes 3pl sont
|
||||
//
|
||||
// Passé simple (past):
|
||||
// 1sg fui 2sg fus 3sg fu
|
||||
// 1pl fumes 2pl fustes 3pl furent
|
||||
//
|
||||
// Future (periphrastic, based on ester- stem):
|
||||
// 1sg esterai 2sg esteras 3sg estera
|
||||
// 1pl esterons 2pl esterez 3pl esteront
|
||||
|
||||
fn fro_estre_present(slot: Int) -> String {
|
||||
if slot == 0 { return "sui" }
|
||||
if slot == 1 { return "es" }
|
||||
if slot == 2 { return "est" }
|
||||
if slot == 3 { return "somes" }
|
||||
if slot == 4 { return "estes" }
|
||||
return "sont"
|
||||
}
|
||||
|
||||
fn fro_estre_past(slot: Int) -> String {
|
||||
if slot == 0 { return "fui" }
|
||||
if slot == 1 { return "fus" }
|
||||
if slot == 2 { return "fu" }
|
||||
if slot == 3 { return "fumes" }
|
||||
if slot == 4 { return "fustes" }
|
||||
return "furent"
|
||||
}
|
||||
|
||||
fn fro_estre_future(slot: Int) -> String {
|
||||
if slot == 0 { return "esterai" }
|
||||
if slot == 1 { return "esteras" }
|
||||
if slot == 2 { return "estera" }
|
||||
if slot == 3 { return "esterons" }
|
||||
if slot == 4 { return "esterez" }
|
||||
return "esteront"
|
||||
}
|
||||
|
||||
// ── Irregular verb: avoir (to have) ───────────────────────────────────────────
|
||||
//
|
||||
// Present indicative:
|
||||
// 1sg ai 2sg as 3sg a
|
||||
// 1pl avons 2pl avez 3pl ont
|
||||
//
|
||||
// Passé simple:
|
||||
// 1sg oi 2sg os 3sg ot
|
||||
// 1pl eumes 2pl eustes 3pl orent
|
||||
//
|
||||
// Future:
|
||||
// 1sg avrai 2sg avras 3sg avra
|
||||
// 1pl avrons 2pl avrez 3pl avront
|
||||
|
||||
fn fro_avoir_present(slot: Int) -> String {
|
||||
if slot == 0 { return "ai" }
|
||||
if slot == 1 { return "as" }
|
||||
if slot == 2 { return "a" }
|
||||
if slot == 3 { return "avons" }
|
||||
if slot == 4 { return "avez" }
|
||||
return "ont"
|
||||
}
|
||||
|
||||
fn fro_avoir_past(slot: Int) -> String {
|
||||
if slot == 0 { return "oi" }
|
||||
if slot == 1 { return "os" }
|
||||
if slot == 2 { return "ot" }
|
||||
if slot == 3 { return "eumes" }
|
||||
if slot == 4 { return "eustes" }
|
||||
return "orent"
|
||||
}
|
||||
|
||||
fn fro_avoir_future(slot: Int) -> String {
|
||||
if slot == 0 { return "avrai" }
|
||||
if slot == 1 { return "avras" }
|
||||
if slot == 2 { return "avra" }
|
||||
if slot == 3 { return "avrons" }
|
||||
if slot == 4 { return "avrez" }
|
||||
return "avront"
|
||||
}
|
||||
|
||||
// ── Irregular verb: aler (to go) ──────────────────────────────────────────────
|
||||
//
|
||||
// Highly suppletive — present draws on Latin *vadere (vois- stem).
|
||||
//
|
||||
// Present indicative:
|
||||
// 1sg vois 2sg vas 3sg va
|
||||
// 1pl alons 2pl alez 3pl vont
|
||||
//
|
||||
// Passé simple (regular -er pattern on al-):
|
||||
// 1sg alai 2sg alas 3sg ala
|
||||
// 1pl alames 2pl alastes 3pl alerent
|
||||
//
|
||||
// Future (ir- stem, archaic):
|
||||
// 1sg irai 2sg iras 3sg ira
|
||||
// 1pl irons 2pl irez 3pl iront
|
||||
|
||||
fn fro_aler_present(slot: Int) -> String {
|
||||
if slot == 0 { return "vois" }
|
||||
if slot == 1 { return "vas" }
|
||||
if slot == 2 { return "va" }
|
||||
if slot == 3 { return "alons" }
|
||||
if slot == 4 { return "alez" }
|
||||
return "vont"
|
||||
}
|
||||
|
||||
fn fro_aler_past(slot: Int) -> String {
|
||||
if slot == 0 { return "alai" }
|
||||
if slot == 1 { return "alas" }
|
||||
if slot == 2 { return "ala" }
|
||||
if slot == 3 { return "alames" }
|
||||
if slot == 4 { return "alastes" }
|
||||
return "alerent"
|
||||
}
|
||||
|
||||
fn fro_aler_future(slot: Int) -> String {
|
||||
if slot == 0 { return "irai" }
|
||||
if slot == 1 { return "iras" }
|
||||
if slot == 2 { return "ira" }
|
||||
if slot == 3 { return "irons" }
|
||||
if slot == 4 { return "irez" }
|
||||
return "iront"
|
||||
}
|
||||
|
||||
// ── Irregular verb: venir (to come) ───────────────────────────────────────────
|
||||
//
|
||||
// Present indicative (vien-/ven- alternation):
|
||||
// 1sg vieng 2sg viens 3sg vient
|
||||
// 1pl venons 2pl venez 3pl vienent
|
||||
//
|
||||
// Passé simple:
|
||||
// 1sg ving 2sg vins 3sg vint
|
||||
// 1pl vinsmes 2pl vinstes 3pl vindrent
|
||||
//
|
||||
// Future (venr- stem):
|
||||
// 1sg venrai 2sg venras 3sg venra
|
||||
// 1pl venrons 2pl venrez 3pl venront
|
||||
|
||||
fn fro_venir_present(slot: Int) -> String {
|
||||
if slot == 0 { return "vieng" }
|
||||
if slot == 1 { return "viens" }
|
||||
if slot == 2 { return "vient" }
|
||||
if slot == 3 { return "venons" }
|
||||
if slot == 4 { return "venez" }
|
||||
return "vienent"
|
||||
}
|
||||
|
||||
fn fro_venir_past(slot: Int) -> String {
|
||||
if slot == 0 { return "ving" }
|
||||
if slot == 1 { return "vins" }
|
||||
if slot == 2 { return "vint" }
|
||||
if slot == 3 { return "vinsmes" }
|
||||
if slot == 4 { return "vinstes" }
|
||||
return "vindrent"
|
||||
}
|
||||
|
||||
fn fro_venir_future(slot: Int) -> String {
|
||||
if slot == 0 { return "venrai" }
|
||||
if slot == 1 { return "venras" }
|
||||
if slot == 2 { return "venra" }
|
||||
if slot == 3 { return "venrons" }
|
||||
if slot == 4 { return "venrez" }
|
||||
return "venront"
|
||||
}
|
||||
|
||||
// ── Irregular verb: faire (to do/make) ────────────────────────────────────────
|
||||
//
|
||||
// Present indicative (faz/fais- alternation):
|
||||
// 1sg faz 2sg fais 3sg fait
|
||||
// 1pl faisons 2pl faites 3pl font
|
||||
//
|
||||
// Passé simple:
|
||||
// 1sg fis 2sg fis 3sg fist
|
||||
// 1pl fimes 2pl fistes 3pl firent
|
||||
//
|
||||
// Future (fer- stem):
|
||||
// 1sg ferai 2sg feras 3sg fera
|
||||
// 1pl ferons 2pl ferez 3pl feront
|
||||
|
||||
fn fro_faire_present(slot: Int) -> String {
|
||||
if slot == 0 { return "faz" }
|
||||
if slot == 1 { return "fais" }
|
||||
if slot == 2 { return "fait" }
|
||||
if slot == 3 { return "faisons" }
|
||||
if slot == 4 { return "faites" }
|
||||
return "font"
|
||||
}
|
||||
|
||||
fn fro_faire_past(slot: Int) -> String {
|
||||
if slot == 0 { return "fis" }
|
||||
if slot == 1 { return "fis" }
|
||||
if slot == 2 { return "fist" }
|
||||
if slot == 3 { return "fimes" }
|
||||
if slot == 4 { return "fistes" }
|
||||
return "firent"
|
||||
}
|
||||
|
||||
fn fro_faire_future(slot: Int) -> String {
|
||||
if slot == 0 { return "ferai" }
|
||||
if slot == 1 { return "feras" }
|
||||
if slot == 2 { return "fera" }
|
||||
if slot == 3 { return "ferons" }
|
||||
if slot == 4 { return "ferez" }
|
||||
return "feront"
|
||||
}
|
||||
|
||||
// ── Conjugation class detection ────────────────────────────────────────────────
|
||||
//
|
||||
// Old French verbs fall into three broad conjugation classes:
|
||||
// 1st conjugation: infinitive ends in -er (chanter, donner)
|
||||
// 2nd conjugation: infinitive ends in -ir (finir, choisir)
|
||||
// 3rd conjugation: infinitive ends in -re (vendre, rendre)
|
||||
//
|
||||
// Returns "1", "2", or "3".
|
||||
|
||||
fn fro_verb_class(verb: String) -> String {
|
||||
if fro_str_ends(verb, "er") { return "1" }
|
||||
if fro_str_ends(verb, "ir") { return "2" }
|
||||
if fro_str_ends(verb, "re") { return "3" }
|
||||
return "1"
|
||||
}
|
||||
|
||||
// fro_verb_stem: strip the infinitive suffix to expose the productive stem.
|
||||
// 1st (-er): drop 2 bytes
|
||||
// 2nd (-ir): drop 2 bytes
|
||||
// 3rd (-re): drop 2 bytes
|
||||
|
||||
fn fro_verb_stem(verb: String, vclass: String) -> String {
|
||||
return fro_drop(verb, 2)
|
||||
}
|
||||
|
||||
// ── 1st conjugation (-er): regular endings ────────────────────────────────────
|
||||
//
|
||||
// Present indicative (stem + ending):
|
||||
// 1sg -e 2sg -es 3sg -e
|
||||
// 1pl -ons 2pl -ez 3pl -ent
|
||||
//
|
||||
// Passé simple:
|
||||
// 1sg -ai 2sg -as 3sg -a
|
||||
// 1pl -ames 2pl -astes 3pl -erent
|
||||
//
|
||||
// Future (infinitive is the base — drop -r then add endings):
|
||||
// Actually the future stem = infinitive minus final -r (chanter- -> chanterai)
|
||||
// 1sg -ai 2sg -as 3sg -a
|
||||
// 1pl -ons 2pl -ez 3pl -ont
|
||||
// Combined with chanterr-: chant+er+ai = chanterai; stem for future = infinitive + "a"...
|
||||
// Simpler: future base = fro_drop(verb, 1) i.e. drop final -r to keep the -e:
|
||||
// chanterai, chanteras, chantera, chanterons, chanterez, chanteront
|
||||
|
||||
fn fro_conj1_present(stem: String, slot: Int) -> String {
|
||||
if slot == 0 { return stem + "e" }
|
||||
if slot == 1 { return stem + "es" }
|
||||
if slot == 2 { return stem + "e" }
|
||||
if slot == 3 { return stem + "ons" }
|
||||
if slot == 4 { return stem + "ez" }
|
||||
return stem + "ent"
|
||||
}
|
||||
|
||||
fn fro_conj1_past(stem: String, slot: Int) -> String {
|
||||
if slot == 0 { return stem + "ai" }
|
||||
if slot == 1 { return stem + "as" }
|
||||
if slot == 2 { return stem + "a" }
|
||||
if slot == 3 { return stem + "ames" }
|
||||
if slot == 4 { return stem + "astes" }
|
||||
return stem + "erent"
|
||||
}
|
||||
|
||||
fn fro_conj1_future(verb: String, slot: Int) -> String {
|
||||
// Future base = infinitive minus final -r (retains the -e): chanter -> chante-
|
||||
let base: String = fro_drop(verb, 1)
|
||||
if slot == 0 { return base + "rai" }
|
||||
if slot == 1 { return base + "ras" }
|
||||
if slot == 2 { return base + "ra" }
|
||||
if slot == 3 { return base + "rons" }
|
||||
if slot == 4 { return base + "rez" }
|
||||
return base + "ront"
|
||||
}
|
||||
|
||||
// ── 2nd conjugation (-ir): regular endings ────────────────────────────────────
|
||||
//
|
||||
// Present indicative uses an infix -iss- in pl forms (inchoative):
|
||||
// 1sg stem + -is 2sg stem + -is 3sg stem + -it
|
||||
// 1pl stem + -issons 2pl stem + -issiez 3pl stem + -issent
|
||||
//
|
||||
// Passé simple:
|
||||
// 1sg stem + -is 2sg stem + -is 3sg stem + -it
|
||||
// 1pl stem + -imes 2pl stem + -istes 3pl stem + -irent
|
||||
//
|
||||
// Future (infinitive minus final -r):
|
||||
// finir -> fini- -> finirai ...
|
||||
|
||||
fn fro_conj2_present(stem: String, slot: Int) -> String {
|
||||
if slot == 0 { return stem + "is" }
|
||||
if slot == 1 { return stem + "is" }
|
||||
if slot == 2 { return stem + "it" }
|
||||
if slot == 3 { return stem + "issons" }
|
||||
if slot == 4 { return stem + "issiez" }
|
||||
return stem + "issent"
|
||||
}
|
||||
|
||||
fn fro_conj2_past(stem: String, slot: Int) -> String {
|
||||
if slot == 0 { return stem + "is" }
|
||||
if slot == 1 { return stem + "is" }
|
||||
if slot == 2 { return stem + "it" }
|
||||
if slot == 3 { return stem + "imes" }
|
||||
if slot == 4 { return stem + "istes" }
|
||||
return stem + "irent"
|
||||
}
|
||||
|
||||
fn fro_conj2_future(verb: String, slot: Int) -> String {
|
||||
let base: String = fro_drop(verb, 1)
|
||||
if slot == 0 { return base + "rai" }
|
||||
if slot == 1 { return base + "ras" }
|
||||
if slot == 2 { return base + "ra" }
|
||||
if slot == 3 { return base + "rons" }
|
||||
if slot == 4 { return base + "rez" }
|
||||
return base + "ront"
|
||||
}
|
||||
|
||||
// ── 3rd conjugation (-re): regular endings ────────────────────────────────────
|
||||
//
|
||||
// Present indicative:
|
||||
// 1sg stem (no ending) 2sg stem + -s 3sg stem + -t
|
||||
// 1pl stem + -ons 2pl stem + -ez 3pl stem + -ent
|
||||
//
|
||||
// Passé simple (same endings as 2nd conj):
|
||||
// 1sg stem + -is 2sg stem + -is 3sg stem + -it
|
||||
// 1pl stem + -imes 2pl stem + -istes 3pl stem + -irent
|
||||
//
|
||||
// Future (-re verbs drop -e before adding endings):
|
||||
// vendre -> vendr- -> vendrai ...
|
||||
|
||||
fn fro_conj3_present(stem: String, slot: Int) -> String {
|
||||
if slot == 0 { return stem }
|
||||
if slot == 1 { return stem + "s" }
|
||||
if slot == 2 { return stem + "t" }
|
||||
if slot == 3 { return stem + "ons" }
|
||||
if slot == 4 { return stem + "ez" }
|
||||
return stem + "ent"
|
||||
}
|
||||
|
||||
fn fro_conj3_past(stem: String, slot: Int) -> String {
|
||||
if slot == 0 { return stem + "is" }
|
||||
if slot == 1 { return stem + "is" }
|
||||
if slot == 2 { return stem + "it" }
|
||||
if slot == 3 { return stem + "imes" }
|
||||
if slot == 4 { return stem + "istes" }
|
||||
return stem + "irent"
|
||||
}
|
||||
|
||||
fn fro_conj3_future(verb: String, slot: Int) -> String {
|
||||
// Drop -re (2 bytes) to get consonant-final stem, then add -rai etc.
|
||||
let base: String = fro_drop(verb, 2)
|
||||
if slot == 0 { return base + "rai" }
|
||||
if slot == 1 { return base + "ras" }
|
||||
if slot == 2 { return base + "ra" }
|
||||
if slot == 3 { return base + "rons" }
|
||||
if slot == 4 { return base + "rez" }
|
||||
return base + "ront"
|
||||
}
|
||||
|
||||
// ── fro_conjugate: main conjugation entry point ───────────────────────────────
|
||||
//
|
||||
// verb: Old French infinitive (e.g. "chanter", "finir", "vendre")
|
||||
// or English canonical label ("be", "go", "have", ...)
|
||||
// tense: "present" | "past" | "future"
|
||||
// person: "first" | "second" | "third"
|
||||
// number: "singular" | "plural"
|
||||
//
|
||||
// Returns the inflected form. Unknown tenses fall back to the infinitive.
|
||||
|
||||
fn fro_conjugate(verb: String, tense: String, person: String, number: String) -> String {
|
||||
let v: String = fro_map_canonical(verb)
|
||||
let slot: Int = fro_slot(person, number)
|
||||
|
||||
// ── Irregular: estre (to be) ──────────────────────────────────────────────
|
||||
if str_eq(v, "estre") {
|
||||
if str_eq(tense, "present") { return fro_estre_present(slot) }
|
||||
if str_eq(tense, "past") { return fro_estre_past(slot) }
|
||||
if str_eq(tense, "future") { return fro_estre_future(slot) }
|
||||
return v
|
||||
}
|
||||
|
||||
// ── Irregular: avoir (to have) ────────────────────────────────────────────
|
||||
if str_eq(v, "avoir") {
|
||||
if str_eq(tense, "present") { return fro_avoir_present(slot) }
|
||||
if str_eq(tense, "past") { return fro_avoir_past(slot) }
|
||||
if str_eq(tense, "future") { return fro_avoir_future(slot) }
|
||||
return v
|
||||
}
|
||||
|
||||
// ── Irregular: aler (to go) ───────────────────────────────────────────────
|
||||
if str_eq(v, "aler") {
|
||||
if str_eq(tense, "present") { return fro_aler_present(slot) }
|
||||
if str_eq(tense, "past") { return fro_aler_past(slot) }
|
||||
if str_eq(tense, "future") { return fro_aler_future(slot) }
|
||||
return v
|
||||
}
|
||||
|
||||
// ── Irregular: venir (to come) ────────────────────────────────────────────
|
||||
if str_eq(v, "venir") {
|
||||
if str_eq(tense, "present") { return fro_venir_present(slot) }
|
||||
if str_eq(tense, "past") { return fro_venir_past(slot) }
|
||||
if str_eq(tense, "future") { return fro_venir_future(slot) }
|
||||
return v
|
||||
}
|
||||
|
||||
// ── Irregular: faire (to do/make) ─────────────────────────────────────────
|
||||
if str_eq(v, "faire") {
|
||||
if str_eq(tense, "present") { return fro_faire_present(slot) }
|
||||
if str_eq(tense, "past") { return fro_faire_past(slot) }
|
||||
if str_eq(tense, "future") { return fro_faire_future(slot) }
|
||||
return v
|
||||
}
|
||||
|
||||
// ── Regular conjugations ──────────────────────────────────────────────────
|
||||
let vclass: String = fro_verb_class(v)
|
||||
let stem: String = fro_verb_stem(v, vclass)
|
||||
|
||||
if str_eq(vclass, "1") {
|
||||
if str_eq(tense, "present") { return fro_conj1_present(stem, slot) }
|
||||
if str_eq(tense, "past") { return fro_conj1_past(stem, slot) }
|
||||
if str_eq(tense, "future") { return fro_conj1_future(v, slot) }
|
||||
return v
|
||||
}
|
||||
|
||||
if str_eq(vclass, "2") {
|
||||
if str_eq(tense, "present") { return fro_conj2_present(stem, slot) }
|
||||
if str_eq(tense, "past") { return fro_conj2_past(stem, slot) }
|
||||
if str_eq(tense, "future") { return fro_conj2_future(v, slot) }
|
||||
return v
|
||||
}
|
||||
|
||||
if str_eq(vclass, "3") {
|
||||
if str_eq(tense, "present") { return fro_conj3_present(stem, slot) }
|
||||
if str_eq(tense, "past") { return fro_conj3_past(stem, slot) }
|
||||
if str_eq(tense, "future") { return fro_conj3_future(v, slot) }
|
||||
return v
|
||||
}
|
||||
|
||||
// Final fallback: return the infinitive
|
||||
return v
|
||||
}
|
||||
|
||||
// ── Gender detection ───────────────────────────────────────────────────────────
|
||||
//
|
||||
// Heuristic gender detection from the citation form (nominative singular).
|
||||
// Old French gender was inherited from Latin with only two genders surviving:
|
||||
// masculine and feminine (neuter collapsed into masculine/feminine by Vulgar Latin).
|
||||
//
|
||||
// Heuristic: citation form ending in -e -> feminine; otherwise -> masculine.
|
||||
// This is imperfect but covers the majority of common nouns.
|
||||
|
||||
fn fro_gender(noun: String) -> String {
|
||||
if fro_str_ends(noun, "e") { return "fem" }
|
||||
return "masc"
|
||||
}
|
||||
|
||||
// ── Noun declension ────────────────────────────────────────────────────────────
|
||||
//
|
||||
// Old French two-case system:
|
||||
//
|
||||
// Masculine (e.g. mur — wall, stem = mur):
|
||||
// Singular nominative (cas sujet): murs (stem + -s)
|
||||
// Singular oblique (cas régime): mur (stem)
|
||||
// Plural nominative: mur (stem)
|
||||
// Plural oblique: murs (stem + -s)
|
||||
//
|
||||
// This pattern means nominative markers invert relative to Latin:
|
||||
// the nominative takes -s in singular but loses it in plural,
|
||||
// while the oblique works the other way round.
|
||||
//
|
||||
// Feminine (e.g. dame — lady):
|
||||
// No case distinction throughout.
|
||||
// Singular: dame (citation form)
|
||||
// Plural: dames (citation + -s)
|
||||
//
|
||||
// gram_case: "nominative" | "oblique"
|
||||
// number: "singular" | "plural"
|
||||
|
||||
fn fro_decline_masc(noun: String, gram_case: String, number: String) -> String {
|
||||
if str_eq(number, "singular") {
|
||||
if str_eq(gram_case, "nominative") { return noun + "s" }
|
||||
// oblique singular: bare stem
|
||||
return noun
|
||||
}
|
||||
// plural
|
||||
if str_eq(gram_case, "nominative") { return noun }
|
||||
return noun + "s"
|
||||
}
|
||||
|
||||
fn fro_decline_fem(noun: String, number: String) -> String {
|
||||
if str_eq(number, "singular") { return noun }
|
||||
return noun + "s"
|
||||
}
|
||||
|
||||
// fro_decline: main declension entry point.
|
||||
//
|
||||
// noun: citation form (nominative/oblique singular — typically the bare stem)
|
||||
// gram_case: "nominative" | "oblique"
|
||||
// number: "singular" | "plural"
|
||||
|
||||
fn fro_decline(noun: String, gram_case: String, number: String) -> String {
|
||||
let gender: String = fro_gender(noun)
|
||||
if str_eq(gender, "masc") {
|
||||
return fro_decline_masc(noun, gram_case, number)
|
||||
}
|
||||
return fro_decline_fem(noun, number)
|
||||
}
|
||||
|
||||
// ── Definite article ──────────────────────────────────────────────────────────
|
||||
//
|
||||
// Old French definite articles are case- and gender-sensitive:
|
||||
//
|
||||
// Masculine:
|
||||
// nom sg: li obl sg: le
|
||||
// nom pl: li obl pl: les
|
||||
//
|
||||
// Feminine:
|
||||
// sg: la pl: les
|
||||
//
|
||||
// gender: "masc" | "fem"
|
||||
// gram_case: "nominative" | "oblique"
|
||||
// number: "singular" | "plural"
|
||||
|
||||
fn fro_article(gender: String, gram_case: String, number: String) -> String {
|
||||
if str_eq(gender, "masc") {
|
||||
if str_eq(number, "plural") { return "les" }
|
||||
if str_eq(gram_case, "nominative") { return "li" }
|
||||
return "le"
|
||||
}
|
||||
// feminine
|
||||
if str_eq(number, "plural") { return "les" }
|
||||
return "la"
|
||||
}
|
||||
|
||||
// ── fro_noun_phrase: noun phrase builder ──────────────────────────────────────
|
||||
//
|
||||
// Assembles a declined noun with an optional definite article.
|
||||
//
|
||||
// noun: citation form (nominative/oblique singular stem)
|
||||
// gram_case: "nominative" | "oblique"
|
||||
// number: "singular" | "plural"
|
||||
// definite: "true" to prepend the definite article; any other value omits it
|
||||
|
||||
fn fro_noun_phrase(noun: String, gram_case: String, number: String, definite: String) -> String {
|
||||
let gender: String = fro_gender(noun)
|
||||
let declined: String = fro_decline(noun, gram_case, number)
|
||||
|
||||
if str_eq(definite, "true") {
|
||||
let art: String = fro_article(gender, gram_case, number)
|
||||
return art + " " + declined
|
||||
}
|
||||
|
||||
return declined
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user