ci: skip elb on Linux — compile dist/soul.c directly to prevent OOM
elb runs elc which consumes 24GB+ virtual memory on the 16GB GCE runner, OOM-killing the runner process and crashing the VM. We already restore the repo's pre-built soul.c immediately after elb runs, so elb's output is discarded anyway. Skip elb entirely: download only the El runtime headers and compile dist/soul.c directly. Root cause: runner VM was unresponsive for 7+ weeks due to repeated elc OOM kills. VM was manually reset 2026-06-28 to restore CI.
This commit is contained in:
+14
-43
@@ -43,7 +43,7 @@ jobs:
|
||||
> /etc/apt/sources.list.d/google-cloud-sdk.list
|
||||
apt-get update -qq && apt-get install -y google-cloud-cli
|
||||
|
||||
- name: Download El SDK from Artifact Registry
|
||||
- name: Download El runtime from Artifact Registry
|
||||
env:
|
||||
GCP_SA_KEY: ${{ secrets.GCP_SA_KEY }}
|
||||
run: |
|
||||
@@ -51,10 +51,12 @@ jobs:
|
||||
gcloud auth activate-service-account --key-file=/tmp/gcp-key.json
|
||||
gcloud config set project neuron-785695
|
||||
|
||||
rm -rf /opt/el/dist /opt/el/runtime
|
||||
mkdir -p /opt/el/dist/platform /opt/el/dist/bin /opt/el/runtime
|
||||
rm -rf /opt/el/runtime
|
||||
mkdir -p /opt/el/runtime
|
||||
|
||||
# Get latest version of each package
|
||||
# Get latest version of each runtime package (elc/elb not needed — we compile
|
||||
# dist/soul.c directly; running elb on Linux OOM-kills the runner, and we
|
||||
# always use the repo's pre-built soul.c anyway).
|
||||
get_latest() {
|
||||
gcloud artifacts versions list \
|
||||
--repository=foundation-prod \
|
||||
@@ -66,22 +68,10 @@ jobs:
|
||||
--format="value(name)" 2>/dev/null | awk -F/ '{print $NF}'
|
||||
}
|
||||
|
||||
ELC_VER=$(get_latest el-elc)
|
||||
ELB_VER=$(get_latest el-elb)
|
||||
RC_VER=$(get_latest el-runtime-c)
|
||||
RH_VER=$(get_latest el-runtime-h)
|
||||
|
||||
echo "Downloading elc@${ELC_VER} elb@${ELB_VER} runtime@${RC_VER}"
|
||||
|
||||
gcloud artifacts generic download \
|
||||
--repository=foundation-prod --location=us-central1 --project=neuron-785695 \
|
||||
--package=el-elc --version="${ELC_VER}" \
|
||||
--destination=/opt/el/dist/platform/
|
||||
|
||||
gcloud artifacts generic download \
|
||||
--repository=foundation-prod --location=us-central1 --project=neuron-785695 \
|
||||
--package=el-elb --version="${ELB_VER}" \
|
||||
--destination=/opt/el/dist/bin/
|
||||
echo "Downloading runtime@${RC_VER}"
|
||||
|
||||
gcloud artifacts generic download \
|
||||
--repository=foundation-prod --location=us-central1 --project=neuron-785695 \
|
||||
@@ -93,39 +83,20 @@ jobs:
|
||||
--package=el-runtime-h --version="${RH_VER}" \
|
||||
--destination=/opt/el/runtime/
|
||||
|
||||
# Downloaded files keep original names; rename to canonical paths
|
||||
mv /opt/el/dist/platform/elc* /opt/el/dist/platform/elc 2>/dev/null || true
|
||||
mv /opt/el/dist/bin/elb* /opt/el/dist/bin/elb 2>/dev/null || true
|
||||
mv /opt/el/runtime/el_runtime.c* /opt/el/runtime/el_runtime.c 2>/dev/null || true
|
||||
mv /opt/el/runtime/el_runtime.h* /opt/el/runtime/el_runtime.h 2>/dev/null || true
|
||||
|
||||
chmod +x /opt/el/dist/platform/elc /opt/el/dist/bin/elb
|
||||
echo "El SDK ready"
|
||||
/opt/el/dist/platform/elc --version || true
|
||||
echo "El runtime ready: $(ls /opt/el/runtime/)"
|
||||
|
||||
- name: Build neuron soul binary
|
||||
run: |
|
||||
ELB=/opt/el/dist/bin/elb
|
||||
ELC=/opt/el/dist/platform/elc
|
||||
RUNTIME=/opt/el/runtime
|
||||
|
||||
# Preserve the pre-compiled dist/soul.c from the repo before running elb.
|
||||
# elb may overwrite it during compilation; we always want the repo version
|
||||
# since it contains the patched self-contained translation unit (all modules
|
||||
# inlined, workspace scope fix, agentic dedup fix, etc.).
|
||||
cp dist/soul.c /tmp/soul.c.prebuilt
|
||||
|
||||
# Compile all El modules to C via elb.
|
||||
# elb fails at link on Linux (GNU ld rejects duplicate strong symbols that
|
||||
# macOS ld accepts silently) — that's expected and captured with || true.
|
||||
$ELB --elc=$ELC --runtime=$RUNTIME/el_runtime.c || true
|
||||
|
||||
# Restore the repo's self-contained soul.c — elb may have overwritten it
|
||||
# with a partial (non-inlined) version that lacks module-level definitions.
|
||||
cp /tmp/soul.c.prebuilt dist/soul.c
|
||||
|
||||
# Compile the self-contained translation unit. No --allow-multiple-definition
|
||||
# needed since soul.c inlines all modules.
|
||||
# Compile the self-contained translation unit directly from dist/soul.c.
|
||||
# dist/soul.c is the authoritative combined unit maintained in the repo —
|
||||
# regenerated on macOS by running elb (which succeeds on arm64/macOS ld but
|
||||
# fails on Linux due to duplicate strong symbols). We skip the elb step here
|
||||
# entirely: elb on Linux would OOM the runner (elc uses 24GB+ virtual memory
|
||||
# on a 16GB host) and we always restore from the repo's soul.c anyway.
|
||||
mkdir -p dist
|
||||
cc -O2 -DHAVE_CURL \
|
||||
-I$RUNTIME \
|
||||
|
||||
Reference in New Issue
Block a user