From 180acc92a00fc8de8906cd2796475faed029511f Mon Sep 17 00:00:00 2001 From: Will Anderson Date: Sun, 10 May 2026 12:54:26 -0500 Subject: [PATCH] Non-blocking entrypoint: start neuron-web before k3s is ready k3s fails to start in Cloud Run gen2 with "unable to select an IP from default routes" because Cloud Run's network sandbox doesn't expose a standard default route for k3s to detect. The blocking wait on k3s prevented neuron-web from ever binding port 8080, causing Cloud Run's startup probe to time out and terminate the container. Two changes: 1. Add --flannel-iface=eth0 so k3s pins to Cloud Run's eth0 rather than walking the routing table to detect a default-route interface. 2. Start neuron-web immediately after launching k3s in background. soul-demo becomes available asynchronously; neuron-web handles it being temporarily unavailable gracefully. --- dist/entrypoint.sh | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/dist/entrypoint.sh b/dist/entrypoint.sh index 671dbcb..2955003 100644 --- a/dist/entrypoint.sh +++ b/dist/entrypoint.sh @@ -16,28 +16,26 @@ echo "[entrypoint] Starting k3s server (embedded soul-demo orchestrator)..." # --disable metrics-server: saves ~50MB RAM # --write-kubeconfig-mode=644: allow non-root reads # --data-dir: use the pre-chowned dir +# --flannel-iface=eth0: explicitly set the network interface. +# Cloud Run gen2 provides eth0 but k3s default IP detection walks the routing +# table looking for a default route, which fails in Cloud Run's network sandbox. +# Pinning to eth0 bypasses that detection and lets k3s bind correctly. k3s server \ --disable traefik \ --disable servicelb \ --disable metrics-server \ --write-kubeconfig-mode=644 \ --data-dir /var/lib/rancher/k3s \ - --node-name soul-node & + --node-name soul-node \ + --flannel-iface=eth0 & K3S_PID=$! -echo "[entrypoint] Waiting for k3s to become ready..." -until k3s kubectl get nodes --no-headers 2>/dev/null | grep -q "Ready"; do - sleep 2 -done -echo "[entrypoint] k3s ready. soul-demo Deployment will be applied automatically from manifests." - -# Wait for soul-demo pod to be Running before starting neuron-web -echo "[entrypoint] Waiting for soul-demo pod..." -until k3s kubectl get pods -l app=soul-demo --no-headers 2>/dev/null | grep -q "Running"; do - sleep 3 -done -echo "[entrypoint] soul-demo is running." - -echo "[entrypoint] Starting neuron-web on port ${PORT:-8080}..." +# Start neuron-web immediately — do NOT block on k3s becoming ready. +# Cloud Run's startup probe requires port 8080 to be listening within the +# startup timeout. k3s may take 30-60s to initialise; blocking here causes +# probe failures and container termination before neuron-web ever starts. +# soul-demo becomes available asynchronously once k3s is ready. neuron-web +# handles soul-demo being temporarily unavailable gracefully. +echo "[entrypoint] Starting neuron-web on port ${PORT:-8080} (k3s initialising in background)..." exec /usr/local/bin/neuron-web