Files
will.anderson cd1c6737e8
Dev — Build & local smoke test / build-smoke (pull_request) Successful in 2m11s
Replace k3s with direct soul-demo watchdog in Cloud Run container
Cloud Run gen2 doesn't provide eth0 with a unicast IP, causing k3s flannel
to crash on every container start. k3s was also wrong architecture for
Cloud Run (HPA inside a container, k3s overhead for one process).

Changes:
- entrypoint.sh: replace k3s server with a bash watchdog loop that starts
  soul-demo directly and restarts it on crash (3s backoff)
- Dockerfile.stage: remove k3s binary, soul-demo-image.tar, k3s manifests
  and their associated dirs/envvars; keep soul-demo binary only
- stage.yaml: remove 'Download k3s binary' step; rename and simplify
  soul-demo build step to compile binary only (no OCI image/tar)
- dev.yaml: update soul-demo placeholder step (binary not tar)
- manifest.el: document HAVE_CURL requirement since manifest.el has no
  c_flags/link_flags directive support
2026-05-10 19:46:35 -05:00

222 lines
8.8 KiB
YAML

name: Dev — Build & local smoke test
# Validates that the build compiles and the server starts cleanly.
# No GCP deployment — this is the inner dev loop gate.
# Merge to stage when you want a real environment.
#
# Build approach: pull ci-base from Artifact Registry (has elb + elc + runtime
# at /opt/el), extract the SDK onto the runner host, then run elb build.
# elb compiles each .el source independently — no combined mega-file, no OOM.
# Output: dist/neuron-landing (linux/amd64). Dockerfile.stage COPYs it directly.
#
# For pull_request events: secrets are not injected, so ci-base can't be pulled.
# Fall back to committed bin/elb-linux-amd64 + bin/elc-linux-amd64 + runtime/.
# No docker cache (no Artifact Registry auth), but the full build + smoke test runs.
on:
push:
branches: [dev]
paths:
- 'src/**'
- 'dist/**'
- 'runtime/**'
- 'manifest.el'
- 'Dockerfile.stage'
- '.gitea/workflows/dev.yaml'
- '.gitea/workflows/stage.yaml'
- '.gitea/workflows/deploy.yaml'
pull_request:
branches: [dev]
paths:
- 'src/**'
- 'dist/**'
- 'runtime/**'
- 'manifest.el'
- 'Dockerfile.stage'
- '.gitea/workflows/dev.yaml'
- '.gitea/workflows/stage.yaml'
- '.gitea/workflows/deploy.yaml'
workflow_dispatch:
jobs:
build-smoke:
runs-on: ubuntu-latest
timeout-minutes: 30
permissions:
contents: read
id-token: write
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 2
# ── GCP auth (push/workflow_dispatch only) ────────────────────────────
# pull_request events don't get secrets injected. GCP auth is skipped
# for PRs — El SDK comes from committed bin/ + runtime/ instead.
- name: Authenticate to GCP
if: github.event_name != 'pull_request'
uses: google-github-actions/auth@v2
with:
credentials_json: ${{ secrets.GCP_SA_KEY }}
- name: Set up gcloud SDK
if: github.event_name != 'pull_request'
uses: google-github-actions/setup-gcloud@v2
with:
project_id: neuron-785695
- name: Configure docker auth for Artifact Registry
if: github.event_name != 'pull_request'
run: gcloud auth configure-docker us-central1-docker.pkg.dev --quiet
- name: Prune Docker to reclaim disk
run: |
# Remove stopped containers, dangling images, unused volumes/networks.
# Do NOT prune build cache — that keeps Docker builds fast and under
# the ~26min runner restart window. Selective pruning frees ~4-5GB
# which is enough to prevent overlay2 "no space left on device" errors.
docker container prune -f 2>&1 || true
docker image prune -f 2>&1 || true
docker volume prune -f 2>&1 || true
df -h /
# ── El SDK setup ──────────────────────────────────────────────────────
# Push builds: extract elb + elc + runtime from ci-base (always latest).
# PR builds: use committed bin/elb-linux-amd64 + bin/elc-linux-amd64 + runtime/.
- name: Extract El SDK from ci-base (push builds)
if: github.event_name != 'pull_request'
run: |
set -euo pipefail
docker pull us-central1-docker.pkg.dev/neuron-785695/neuron-ci/ci-base:dev
CID=$(docker create us-central1-docker.pkg.dev/neuron-785695/neuron-ci/ci-base:dev)
sudo mkdir -p /opt/el
docker cp "$CID:/opt/el" /opt/
docker rm "$CID"
echo "ELB=/opt/el/dist/bin/elb" >> "$GITHUB_ENV"
echo "ELC=/opt/el/dist/platform/elc" >> "$GITHUB_ENV"
echo "EL_RUNTIME=$GITHUB_WORKSPACE/runtime" >> "$GITHUB_ENV"
- name: Set up El SDK from committed bin/ (PR builds)
if: github.event_name == 'pull_request'
run: |
set -euo pipefail
DEST="${{ github.workspace }}/../foundation-el"
mkdir -p "$DEST/dist/bin" "$DEST/dist/platform" "$DEST/el-compiler/runtime"
cp bin/elb-linux-amd64 "$DEST/dist/bin/elb"
cp bin/elc-linux-amd64 "$DEST/dist/platform/elc"
chmod +x "$DEST/dist/bin/elb" "$DEST/dist/platform/elc"
cp runtime/el_runtime.c "$DEST/el-compiler/runtime/"
cp runtime/el_runtime.h "$DEST/el-compiler/runtime/"
cp runtime/el_runtime.js "$DEST/el-compiler/runtime/"
echo "ELB=$DEST/dist/bin/elb" >> "$GITHUB_ENV"
echo "ELC=$DEST/dist/platform/elc" >> "$GITHUB_ENV"
echo "EL_RUNTIME=$DEST/el-compiler/runtime" >> "$GITHUB_ENV"
# ── Build neuron-web binary ───────────────────────────────────────────
- name: Build neuron-web with elb
run: |
set -uo pipefail
echo "ELB=$ELB ELC=$ELC EL_RUNTIME=$EL_RUNTIME"
ls -la "$ELB" "$ELC"
stdbuf -oL "$ELB" \
--elc="$ELC" \
--runtime="$EL_RUNTIME" 2>&1 | tee /tmp/elb.log
ELB_EXIT=${PIPESTATUS[0]}
if [ "$ELB_EXIT" -eq 0 ]; then
echo "Binary: $(ls -lh dist/neuron-landing)"
fi
exit "$ELB_EXIT"
- name: Dump full elb output (on failure)
if: failure()
run: |
echo "=== full elb output ==="
cat /tmp/elb.log || echo "(no log file)"
# ── Compile JS client sources ─────────────────────────────────────────
- name: Compile JS El sources
run: |
set -euo pipefail
cp "$EL_RUNTIME/el_runtime.js" src/js/
mkdir -p dist/js
for f in src/js/*.el; do
[ -f "$f" ] || continue
name=$(basename "$f" .el)
"$ELC" --target=js --bundle --minify --obfuscate "$f" > "dist/js/${name}.js"
echo " compiled: $f -> dist/js/${name}.js"
done
rm -f src/js/el_runtime.js
# ── Docker build + smoke test ─────────────────────────────────────────
#
# PR builds: binary is compiled by committed bin/elb-linux-amd64 which
# may lag behind the current El SDK. Smoke-testing that binary is
# unreliable (glibc mismatch in Docker; potential codegen differences
# when run directly). PRs only need to prove the code *compiles* and
# the Docker image *builds* — the authoritative runtime check runs on
# push to dev (ci-base SDK, always current).
- name: Compute image tag
id: tag
run: echo "tag=dev-${GITHUB_SHA:0:8}" >> "$GITHUB_OUTPUT"
- name: Touch HTML placeholder files
run: touch src/index.html src/about.html src/terms.html src/enterprise-terms.html
- name: Create soul-demo placeholder
# Dockerfile.stage COPYs dist/soul-demo. We only need the binary to exist
# for the Docker build to succeed; the real binary is compiled in stage CI.
run: |
touch dist/soul-demo
chmod +x dist/soul-demo
- name: Build Docker image (local only — no push)
run: |
set -euo pipefail
TAG="${{ steps.tag.outputs.tag }}"
CACHE_ARGS=""
if [ "${{ github.event_name }}" != "pull_request" ]; then
CACHE_ARGS="--cache-from us-central1-docker.pkg.dev/neuron-785695/neuron-marketing/marketing:latest"
fi
docker build \
--build-arg BUILDKIT_INLINE_CACHE=1 \
$CACHE_ARGS \
-f Dockerfile.stage \
-t "marketing:${TAG}" \
.
- name: Local smoke test
# Push builds only: binary compiled from ci-base is current and
# compatible with the runner glibc. Skipped for pull_request events
# because the committed bin/elb may produce a binary that requires
# a newer glibc than what the runner environment provides.
if: github.event_name != 'pull_request'
run: |
set -euo pipefail
PORT=8080 dist/neuron-landing &
SERVER_PID=$!
for i in $(seq 1 15); do
STATUS=$(curl -sSo /dev/null -w "%{http_code}" --max-time 5 http://localhost:8080/ || echo "000")
echo "Attempt $i/15: HTTP $STATUS"
if [ "$STATUS" = "200" ]; then
echo "Dev smoke test PASSED"
kill "$SERVER_PID" 2>/dev/null || true
exit 0
fi
sleep 3
done
kill "$SERVER_PID" 2>/dev/null || true
echo "Dev smoke test FAILED"
exit 1