From e6fd1100730c497d7a57893bc07c33275c9ebda4 Mon Sep 17 00:00:00 2001 From: Will Anderson Date: Sun, 10 May 2026 11:26:23 -0500 Subject: [PATCH] Single-stage Dockerfile.stage: pre-download k3s on host runner The multi-stage Docker builder (which installed build-essential, compiled soul-demo, and downloaded k3s inside Docker) was causing RWLayer nil corruption on the runner's overlay2 driver. Every affected run failed at apt-get install in the runtime stage after the builder stage completed. Fix: move k3s download to the CI host runner (same pattern as soul-demo compilation, which now passes reliably). Dockerfile.stage becomes single- stage: no apt-get in a builder stage, no network downloads, just COPY of pre-built binaries. Also adds --no-cache to the main docker build for consistency with the soul-demo step fix. --- .gitea/workflows/stage.yaml | 19 ++++++++++-- Dockerfile.stage | 59 +++++++++---------------------------- 2 files changed, 31 insertions(+), 47 deletions(-) diff --git a/.gitea/workflows/stage.yaml b/.gitea/workflows/stage.yaml index a7ad524..05b0e96 100644 --- a/.gitea/workflows/stage.yaml +++ b/.gitea/workflows/stage.yaml @@ -196,13 +196,28 @@ jobs: echo "soul-demo-image.tar: $(du -sh dist/soul-demo-image.tar | cut -f1)" docker rmi soul-demo:local 2>/dev/null || true + - name: Download k3s binary + # Pre-download k3s on the host runner so Dockerfile.stage can COPY it + # directly. Previously k3s was downloaded inside the Docker builder stage, + # which combined with build-essential and C compilation caused RWLayer nil + # corruption on the runner's overlay2 driver. Host-runner download is safe. + if: steps.changetype.outputs.asset_only != 'true' + run: | + set -euo pipefail + curl -fL --retry 3 --retry-delay 10 \ + https://github.com/k3s-io/k3s/releases/download/v1.32.4%2Bk3s1/k3s \ + -o dist/k3s + chmod +x dist/k3s + echo "k3s: $(ls -lh dist/k3s)" + - name: Build and tag image if: steps.changetype.outputs.asset_only != 'true' run: | set -euo pipefail + # --no-cache: prevents reuse of corrupted overlay2 layers from prior failed runs. + # Dockerfile.stage is now single-stage (no builder) so build is fast even without cache. docker build \ - --build-arg BUILDKIT_INLINE_CACHE=1 \ - --cache-from us-central1-docker.pkg.dev/neuron-785695/neuron-marketing/marketing:stage-latest \ + --no-cache \ -f Dockerfile.stage \ -t "marketing:${{ steps.tag.outputs.tag }}" \ . diff --git a/Dockerfile.stage b/Dockerfile.stage index e3197e2..bbf9e1d 100644 --- a/Dockerfile.stage +++ b/Dockerfile.stage @@ -4,50 +4,16 @@ # - neuron-web on port 8080 (landing page server) # - soul-demo on port 7772 (demo chat, localhost only) # -# neuron-web is built by `elb build` in CI (not here). elb compiles each -# .el source independently and links the result — no combined mega-file, -# no exponential memory growth. The binary lands at dist/neuron-landing -# (linux/amd64) and is COPY'd directly into the runtime image. +# All binaries (neuron-web, soul-demo, k3s) are pre-built by CI on the host +# runner before this Dockerfile runs. This keeps the Docker build single-stage +# with no compilation and no network downloads, eliminating the multi-stage +# complexity that caused RWLayer corruption on the runner's overlay2 driver. # -# soul-demo.c is pre-committed (small, no OOM risk) and compiled here. +# CI pre-build steps (in stage.yaml): +# - neuron-web: built by `elb build` → dist/neuron-landing +# - soul-demo: compiled by cc on host → dist/soul-demo +# - k3s: downloaded by curl on host → dist/k3s -# ── Stage 1: compile soul-demo ──────────────────────────────────────────────── -FROM debian:bookworm-slim AS builder - -RUN apt-get update \ - && apt-get install -y --no-install-recommends \ - build-essential \ - curl \ - libcurl4-openssl-dev \ - libssl-dev \ - ca-certificates \ - && rm -rf /var/lib/apt/lists/* - -WORKDIR /build - -COPY runtime/el_runtime.c runtime/el_runtime.h ./ - -# Pre-compile el_runtime as a separate cached layer. -# el_runtime.c changes rarely; main.c changes every run. -# Splitting this out means el_runtime.o is cached across builds when only main.c changes. -# -DHAVE_CURL: the staged el_runtime.c (from el.git) guards the OTLP observability -# section (emit_metric, emit_log, trace_span_*) behind #ifdef HAVE_CURL. -# libcurl IS installed above, so define HAVE_CURL to enable those functions. -RUN cc -O2 -DHAVE_CURL -c el_runtime.c -I. -o el_runtime.o - -COPY dist/soul-demo.c dist/vessel_stubs.c ./ - -RUN cc -O2 -rdynamic \ - -DEL_SOUL_DEMO_BUILD \ - -o soul-demo \ - soul-demo.c vessel_stubs.c el_runtime.o \ - -lcurl -lpthread -ldl -lm -lssl -lcrypto - -# ── Download k3s binary ─────────────────────────────────────────────────────── -RUN curl -fL --retry 3 --retry-delay 10 https://github.com/k3s-io/k3s/releases/download/v1.32.4%2Bk3s1/k3s -o /usr/local/bin/k3s \ - && chmod +x /usr/local/bin/k3s - -# ── Stage 2: runtime image ──────────────────────────────────────────────────── FROM debian:bookworm-slim RUN apt-get update \ @@ -67,10 +33,13 @@ RUN apt-get update \ COPY dist/neuron-landing /usr/local/bin/neuron-web RUN chmod +x /usr/local/bin/neuron-web -COPY --from=builder /build/soul-demo /usr/local/bin/soul-demo +# soul-demo binary — compiled by cc on host runner in CI +COPY dist/soul-demo /usr/local/bin/soul-demo +RUN chmod +x /usr/local/bin/soul-demo -# k3s binary -COPY --from=builder /usr/local/bin/k3s /usr/local/bin/k3s +# k3s binary — downloaded from GitHub releases by CI +COPY dist/k3s /usr/local/bin/k3s +RUN chmod +x /usr/local/bin/k3s # soul-demo OCI image tar — k3s imports this at startup (no registry needed) RUN mkdir -p /var/lib/rancher/k3s/agent/images