Merge pull request 'Replace k3s with direct soul-demo watchdog' (#57) from fix/checkout-auth-reveal into dev
Dev — Build & local smoke test / build-smoke (push) Successful in 2m26s
Dev — Build & local smoke test / build-smoke (push) Successful in 2m26s
Merge fix/checkout-auth-reveal into dev
This commit was merged in pull request #57.
This commit is contained in:
@@ -172,11 +172,12 @@ jobs:
|
||||
- name: Touch HTML placeholder files
|
||||
run: touch src/index.html src/about.html src/terms.html src/enterprise-terms.html
|
||||
|
||||
- name: Create soul-demo-image.tar placeholder
|
||||
# Dockerfile.stage COPYs this file (used by k3s at runtime).
|
||||
# We only need the COPY to succeed here; real tar is built by
|
||||
# build-stage.sh in the deploy pipeline.
|
||||
run: touch dist/soul-demo-image.tar
|
||||
- name: Create soul-demo placeholder
|
||||
# Dockerfile.stage COPYs dist/soul-demo. We only need the binary to exist
|
||||
# for the Docker build to succeed; the real binary is compiled in stage CI.
|
||||
run: |
|
||||
touch dist/soul-demo
|
||||
chmod +x dist/soul-demo
|
||||
|
||||
- name: Build Docker image (local only — no push)
|
||||
run: |
|
||||
|
||||
@@ -173,15 +173,15 @@ jobs:
|
||||
|
||||
# ── Docker build + push ───────────────────────────────────────────────
|
||||
|
||||
- name: Build soul-demo image tar
|
||||
# Dockerfile.stage COPYs dist/soul-demo-image.tar so k3s can import
|
||||
# soul-demo:local at runtime. We compile soul-demo from source on the
|
||||
# host runner (ci-base has gcc), build a minimal OCI image, and save it.
|
||||
- name: Build soul-demo binary
|
||||
# Compile soul-demo directly on the host runner (ci-base has gcc).
|
||||
# Cloud Run runs soul-demo as a direct subprocess with a watchdog loop —
|
||||
# no k3s, no OCI image needed. One binary per container; Cloud Run
|
||||
# handles horizontal scaling.
|
||||
# Moved AFTER JS compilation to avoid Docker memory pressure killing elc.
|
||||
if: steps.changetype.outputs.asset_only != 'true'
|
||||
run: |
|
||||
set -euo pipefail
|
||||
# Compile el_runtime.o and soul-demo on the host runner
|
||||
cc -O2 -DHAVE_CURL -c runtime/el_runtime.c -I runtime/ -o /tmp/el_runtime.o
|
||||
cc -O2 -rdynamic -DEL_SOUL_DEMO_BUILD \
|
||||
-I runtime/ \
|
||||
@@ -189,26 +189,6 @@ jobs:
|
||||
dist/soul-demo.c dist/vessel_stubs.c /tmp/el_runtime.o \
|
||||
-lcurl -lpthread -ldl -lm -lssl -lcrypto
|
||||
echo "soul-demo compiled: $(ls -lh dist/soul-demo)"
|
||||
# Package as minimal OCI image for k3s import
|
||||
# --no-cache: prevents reuse of corrupted overlay2 layers from prior failed runs
|
||||
docker build --no-cache -f dist/Dockerfile.soul-demo -t soul-demo:local dist/
|
||||
docker save soul-demo:local -o dist/soul-demo-image.tar
|
||||
echo "soul-demo-image.tar: $(du -sh dist/soul-demo-image.tar | cut -f1)"
|
||||
docker rmi soul-demo:local 2>/dev/null || true
|
||||
|
||||
- name: Download k3s binary
|
||||
# Pre-download k3s on the host runner so Dockerfile.stage can COPY it
|
||||
# directly. Previously k3s was downloaded inside the Docker builder stage,
|
||||
# which combined with build-essential and C compilation caused RWLayer nil
|
||||
# corruption on the runner's overlay2 driver. Host-runner download is safe.
|
||||
if: steps.changetype.outputs.asset_only != 'true'
|
||||
run: |
|
||||
set -euo pipefail
|
||||
curl -fL --retry 3 --retry-delay 10 \
|
||||
https://github.com/k3s-io/k3s/releases/download/v1.32.4%2Bk3s1/k3s \
|
||||
-o dist/k3s
|
||||
chmod +x dist/k3s
|
||||
echo "k3s: $(ls -lh dist/k3s)"
|
||||
|
||||
- name: Build and tag image
|
||||
if: steps.changetype.outputs.asset_only != 'true'
|
||||
|
||||
+4
-24
@@ -4,15 +4,13 @@
|
||||
# - neuron-web on port 8080 (landing page server)
|
||||
# - soul-demo on port 7772 (demo chat, localhost only)
|
||||
#
|
||||
# All binaries (neuron-web, soul-demo, k3s) are pre-built by CI on the host
|
||||
# runner before this Dockerfile runs. This keeps the Docker build single-stage
|
||||
# with no compilation and no network downloads, eliminating the multi-stage
|
||||
# complexity that caused RWLayer corruption on the runner's overlay2 driver.
|
||||
# All binaries (neuron-web, soul-demo) are pre-built by CI on the host runner
|
||||
# before this Dockerfile runs. This keeps the Docker build single-stage with
|
||||
# no compilation and no network downloads.
|
||||
#
|
||||
# CI pre-build steps (in stage.yaml):
|
||||
# - neuron-web: built by `elb build` → dist/neuron-landing
|
||||
# - soul-demo: compiled by cc on host → dist/soul-demo
|
||||
# - k3s: downloaded by curl on host → dist/k3s
|
||||
|
||||
FROM ubuntu:24.04
|
||||
|
||||
@@ -27,9 +25,7 @@ RUN apt-get update \
|
||||
&& groupadd -r landing && useradd -r -g landing landing \
|
||||
&& mkdir -p /srv/landing/assets /srv/landing/js /srv/landing/shares \
|
||||
&& mkdir -p /srv/soul/engram-demo \
|
||||
&& chown -R landing:landing /srv/landing /srv/soul \
|
||||
&& mkdir -p /var/lib/rancher/k3s /tmp/k3s \
|
||||
&& chown -R landing:landing /var/lib/rancher /tmp/k3s
|
||||
&& chown -R landing:landing /srv/landing /srv/soul
|
||||
|
||||
# neuron-web binary — produced by `elb build` in CI (linux/amd64)
|
||||
COPY dist/neuron-landing /usr/local/bin/neuron-web
|
||||
@@ -39,18 +35,6 @@ RUN chmod +x /usr/local/bin/neuron-web
|
||||
COPY dist/soul-demo /usr/local/bin/soul-demo
|
||||
RUN chmod +x /usr/local/bin/soul-demo
|
||||
|
||||
# k3s binary — downloaded from GitHub releases by CI
|
||||
COPY dist/k3s /usr/local/bin/k3s
|
||||
RUN chmod +x /usr/local/bin/k3s
|
||||
|
||||
# soul-demo OCI image tar — k3s imports this at startup (no registry needed)
|
||||
RUN mkdir -p /var/lib/rancher/k3s/agent/images
|
||||
COPY dist/soul-demo-image.tar /var/lib/rancher/k3s/agent/images/soul-demo.tar
|
||||
|
||||
# k3s manifests — auto-applied when k3s starts
|
||||
RUN mkdir -p /var/lib/rancher/k3s/server/manifests
|
||||
COPY dist/k3s-soul-demo.yaml /var/lib/rancher/k3s/server/manifests/soul-demo.yaml
|
||||
|
||||
# Engram snapshot — baked in so soul has memory from cold start
|
||||
COPY dist/engram-snapshot.json /srv/soul/engram-demo/snapshot.json
|
||||
|
||||
@@ -73,11 +57,7 @@ ENV LANDING_ROOT=/srv/landing
|
||||
ENV PORT=8080
|
||||
ENV NEURON_HOME=/srv/soul/engram-demo
|
||||
ENV NEURON_PORT=7772
|
||||
ENV K3S_DATA_DIR=/var/lib/rancher/k3s
|
||||
ENV KUBECONFIG=/var/lib/rancher/k3s/server/cred/admin.kubeconfig
|
||||
|
||||
# k3s requires root to create network namespaces and mount cgroups.
|
||||
# Cloud Run gen2 sandbox is the security boundary here.
|
||||
EXPOSE 8080
|
||||
|
||||
CMD ["/usr/local/bin/entrypoint.sh"]
|
||||
|
||||
Vendored
+17
-32
@@ -1,41 +1,26 @@
|
||||
#!/bin/sh
|
||||
set -e
|
||||
|
||||
# SKIP_K3S=1 — bypass k3s/soul-demo startup and go straight to neuron-web.
|
||||
# Used by the dev CI smoke test where the container runtime doesn't support
|
||||
# the kernel capabilities k3s requires (overlayfs / privileged mode).
|
||||
if [ "${SKIP_K3S:-0}" = "1" ]; then
|
||||
echo "[entrypoint] SKIP_K3S=1: starting neuron-web directly (no k3s/soul-demo)."
|
||||
echo "[entrypoint] SKIP_K3S=1: starting neuron-web directly (no soul-demo)."
|
||||
exec /usr/local/bin/neuron-web
|
||||
fi
|
||||
|
||||
echo "[entrypoint] Starting k3s server (embedded soul-demo orchestrator)..."
|
||||
# Soul-demo watchdog: start soul-demo and restart it automatically on crash.
|
||||
# Cloud Run gen2 doesn't reliably provide eth0 with a unicast IP, so k3s flannel
|
||||
# fails at startup. Running soul-demo directly is simpler, lighter, and fully
|
||||
# self-healing. Cloud Run handles horizontal scaling — no HPA needed.
|
||||
echo "[entrypoint] Starting soul-demo watchdog on :${NEURON_PORT:-7772}..."
|
||||
(
|
||||
while true; do
|
||||
echo "[soul-watchdog] starting soul-demo (NEURON_HOME=${NEURON_HOME})"
|
||||
/usr/local/bin/soul-demo 2>&1 || true
|
||||
echo "[soul-watchdog] soul-demo exited, restarting in 3s..."
|
||||
sleep 3
|
||||
done
|
||||
) &
|
||||
|
||||
# k3s server — single-node mode, disable unused components
|
||||
# --disable traefik,servicelb: we don't need an ingress or LB
|
||||
# --disable metrics-server: saves ~50MB RAM
|
||||
# --write-kubeconfig-mode=644: allow non-root reads
|
||||
# --data-dir: use the pre-chowned dir
|
||||
# --flannel-iface=eth0: explicitly set the network interface.
|
||||
# Cloud Run gen2 provides eth0 but k3s default IP detection walks the routing
|
||||
# table looking for a default route, which fails in Cloud Run's network sandbox.
|
||||
# Pinning to eth0 bypasses that detection and lets k3s bind correctly.
|
||||
k3s server \
|
||||
--disable traefik \
|
||||
--disable servicelb \
|
||||
--disable metrics-server \
|
||||
--write-kubeconfig-mode=644 \
|
||||
--data-dir /var/lib/rancher/k3s \
|
||||
--node-name soul-node \
|
||||
--flannel-iface=eth0 &
|
||||
|
||||
K3S_PID=$!
|
||||
|
||||
# Start neuron-web immediately — do NOT block on k3s becoming ready.
|
||||
# Cloud Run's startup probe requires port 8080 to be listening within the
|
||||
# startup timeout. k3s may take 30-60s to initialise; blocking here causes
|
||||
# probe failures and container termination before neuron-web ever starts.
|
||||
# soul-demo becomes available asynchronously once k3s is ready. neuron-web
|
||||
# handles soul-demo being temporarily unavailable gracefully.
|
||||
echo "[entrypoint] Starting neuron-web on port ${PORT:-8080} (k3s initialising in background)..."
|
||||
# Start neuron-web immediately — do NOT block.
|
||||
# Cloud Run startup probe requires port 8080 to answer within the timeout.
|
||||
echo "[entrypoint] Starting neuron-web on port ${PORT:-8080}..."
|
||||
exec /usr/local/bin/neuron-web
|
||||
|
||||
@@ -16,4 +16,12 @@ build {
|
||||
c_source "dist/page_css.c"
|
||||
c_source "dist/page_ga.c"
|
||||
c_source "dist/page_schema.c"
|
||||
// NOTE: neuron-web requires el_runtime.c to be compiled with -DHAVE_CURL
|
||||
// so that http_get/http_post forward to libcurl instead of returning
|
||||
// {"error":"not built with HAVE_CURL"}. The elb binary in ci-base:dev
|
||||
// hardcodes -DHAVE_CURL in its cc invocation, but older elb versions may
|
||||
// not. manifest.el does not support c_flags or link_flags directives —
|
||||
// if upgrading elb breaks HTTP, ensure ci-base:dev ships an elb built
|
||||
// with HAVE_CURL enabled in its hardcoded cc command, or pre-compile
|
||||
// el_runtime.o with -DHAVE_CURL on the host and pass it as a c_source.
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user