From bec948bccac65e355a8b671d9168adddfd08bc96 Mon Sep 17 00:00:00 2001 From: Will Anderson Date: Mon, 4 May 2026 16:19:35 -0500 Subject: [PATCH] route runner build container clones via public URL with CF Access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Gitea Actions runners on Legion need build containers to be able to clone repos. They run with network: host so they can't resolve gitea.git.svc.cluster.local — they have to use the public URL. The public URL sits behind Cloudflare Access, which is why the previous naive switch (#3) had to be reverted. This change keeps the runner daemon registered against the in-cluster URL (no CF Access on the polling loop) but rewrites http://gitea.git.svc.cluster.local:3000/ to https://git.neuralplatform.ai/ inside the build container, with the CF Access service-token headers injected via git extraHeader. The redirect script is sourced before every job step via BASH_ENV; the CF Access credentials reach the build container through act_runner's container.env, which we now populate from the new gitea-runner-cf-access Vault path that PR #5's Terraform writes. Known limitation documented in the init script: actions/checkout's per-job auth header is keyed to the in-cluster URL and gets dropped after the insteadOf rewrite. Public repos work; private repos that need that token will need a follow-up. --- servers/legion/k8s/gitea-runner/Dockerfile | 12 +++++ .../legion/k8s/gitea-runner/deployment.yaml | 32 +++++++++--- .../k8s/gitea-runner/external-secrets.yaml | 32 +++++++++++- .../k8s/gitea-runner/git-cf-access-init.sh | 50 +++++++++++++++++++ 4 files changed, 119 insertions(+), 7 deletions(-) create mode 100644 servers/legion/k8s/gitea-runner/git-cf-access-init.sh diff --git a/servers/legion/k8s/gitea-runner/Dockerfile b/servers/legion/k8s/gitea-runner/Dockerfile index b1ba306..e2c3780 100644 --- a/servers/legion/k8s/gitea-runner/Dockerfile +++ b/servers/legion/k8s/gitea-runner/Dockerfile @@ -85,3 +85,15 @@ RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \ && apt-get update \ && apt-get install -y --no-install-recommends gh \ && rm -rf /var/lib/apt/lists/* + +# Cloudflare Access bootstrap for git clones to git.neuralplatform.ai. +# This script is sourced by bash in build containers via BASH_ENV (set by +# act_runner's container.env in deployment.yaml) so it runs before every +# step. It configures git insteadOf + CF Access extraHeaders from +# CF_ACCESS_CLIENT_ID / CF_ACCESS_CLIENT_SECRET env vars. +# +# We deliberately don't set ENTRYPOINT / CMD here — act_runner spawns +# build containers with its own entrypoint to keep them alive between +# steps, and overriding it breaks job execution. +COPY git-cf-access-init.sh /usr/local/bin/git-cf-access-init.sh +RUN chmod +x /usr/local/bin/git-cf-access-init.sh diff --git a/servers/legion/k8s/gitea-runner/deployment.yaml b/servers/legion/k8s/gitea-runner/deployment.yaml index 6a32df9..7cacc58 100644 --- a/servers/legion/k8s/gitea-runner/deployment.yaml +++ b/servers/legion/k8s/gitea-runner/deployment.yaml @@ -8,7 +8,7 @@ metadata: labels: app: gitea-runner annotations: - config-version: "2026-05-04-docker-sock-fix" + config-version: "2026-05-04-cf-access-public-url" spec: replicas: 1 selector: @@ -19,7 +19,7 @@ spec: labels: app: gitea-runner annotations: - config-version: "2026-05-04-docker-sock-fix" + config-version: "2026-05-04-cf-access-public-url" spec: securityContext: runAsNonRoot: false @@ -35,7 +35,7 @@ spec: --name legion \ --labels "self-hosted:docker://registry.neuralplatform.ai/ci-base:latest,ubuntu-latest:docker://registry.neuralplatform.ai/ci-base:latest,ubuntu-24.04:docker://registry.neuralplatform.ai/ci-base:latest,linux,x64" \ --no-interactive - cat > /data/config.yaml << 'EOF' + cat > /data/config.yaml << EOF runner: capacity: 2 timeout: 3h @@ -45,6 +45,16 @@ spec: force_pull: false valid_volumes: [] default_image: "registry.neuralplatform.ai/ci-base:latest" + # Build containers run with network: host. The in-cluster + # gitea name does not resolve there, so we redirect git + # operations to https://git.neuralplatform.ai using CF + # Access service-token headers. BASH_ENV makes bash source + # /usr/local/bin/git-cf-access-init.sh before every step, + # which sets up the redirect + headers. + env: + CF_ACCESS_CLIENT_ID: "${CF_ACCESS_CLIENT_ID}" + CF_ACCESS_CLIENT_SECRET: "${CF_ACCESS_CLIENT_SECRET}" + BASH_ENV: "/usr/local/bin/git-cf-access-init.sh" extra_hosts: - "gitea.git.svc.cluster.local:10.43.1.53" EOF @@ -92,7 +102,7 @@ metadata: labels: app: neuron-technologies-runner annotations: - config-version: "2026-05-04-docker-sock-fix" + config-version: "2026-05-04-cf-access-public-url" spec: replicas: 2 selector: @@ -103,7 +113,7 @@ spec: labels: app: neuron-technologies-runner annotations: - config-version: "2026-05-04-docker-sock-fix" + config-version: "2026-05-04-cf-access-public-url" spec: securityContext: runAsNonRoot: false @@ -119,7 +129,7 @@ spec: --name "legion-nt-$(hostname)" \ --labels "self-hosted:docker://registry.neuralplatform.ai/ci-base:latest,ubuntu-latest:docker://registry.neuralplatform.ai/ci-base:latest,ubuntu-24.04:docker://registry.neuralplatform.ai/ci-base:latest,linux,x64" \ --no-interactive - cat > /data/config.yaml << 'EOF' + cat > /data/config.yaml << EOF runner: capacity: 2 timeout: 3h @@ -129,6 +139,16 @@ spec: force_pull: false valid_volumes: [] default_image: "registry.neuralplatform.ai/ci-base:latest" + # Build containers run with network: host. The in-cluster + # gitea name does not resolve there, so we redirect git + # operations to https://git.neuralplatform.ai using CF + # Access service-token headers. BASH_ENV makes bash source + # /usr/local/bin/git-cf-access-init.sh before every step, + # which sets up the redirect + headers. + env: + CF_ACCESS_CLIENT_ID: "${CF_ACCESS_CLIENT_ID}" + CF_ACCESS_CLIENT_SECRET: "${CF_ACCESS_CLIENT_SECRET}" + BASH_ENV: "/usr/local/bin/git-cf-access-init.sh" extra_hosts: - "gitea.git.svc.cluster.local:10.43.1.53" EOF diff --git a/servers/legion/k8s/gitea-runner/external-secrets.yaml b/servers/legion/k8s/gitea-runner/external-secrets.yaml index e5ac5d1..a41daa6 100644 --- a/servers/legion/k8s/gitea-runner/external-secrets.yaml +++ b/servers/legion/k8s/gitea-runner/external-secrets.yaml @@ -1,12 +1,20 @@ --- # gitea-runner-secret — neural-platform org runner token +# +# GITEA_INSTANCE_URL stays as the in-cluster URL — the act_runner daemon +# polls it constantly and we don't want every poll to hit Cloudflare Access. +# Build containers, however, need the public URL because they run with +# network: host and can't resolve gitea.git.svc.cluster.local. The +# git-cf-access-init.sh entrypoint in the ci-base image rewrites the +# in-cluster URL to https://git.neuralplatform.ai with the CF Access +# headers from CF_ACCESS_CLIENT_ID / CF_ACCESS_CLIENT_SECRET below. apiVersion: external-secrets.io/v1beta1 kind: ExternalSecret metadata: name: gitea-runner-secret namespace: ci annotations: - force-sync: "2026-04-23" + force-sync: "2026-05-04-cf-access" spec: refreshInterval: 1h secretStoreRef: @@ -19,11 +27,21 @@ spec: data: GITEA_INSTANCE_URL: "http://gitea.git.svc.cluster.local:3000" GITEA_RUNNER_REGISTRATION_TOKEN: "{{ .runner_token }}" + CF_ACCESS_CLIENT_ID: "{{ .cf_access_client_id }}" + CF_ACCESS_CLIENT_SECRET: "{{ .cf_access_client_secret }}" data: - secretKey: runner_token remoteRef: key: secret/data/gitea property: runner_token + - secretKey: cf_access_client_id + remoteRef: + key: secret/data/gitea-runner-cf-access + property: client_id + - secretKey: cf_access_client_secret + remoteRef: + key: secret/data/gitea-runner-cf-access + property: client_secret --- # neuron-technologies-runner-secret — neuron-technologies org runner token apiVersion: external-secrets.io/v1beta1 @@ -31,6 +49,8 @@ kind: ExternalSecret metadata: name: neuron-technologies-runner-secret namespace: ci + annotations: + force-sync: "2026-05-04-cf-access" spec: refreshInterval: 1h secretStoreRef: @@ -43,8 +63,18 @@ spec: data: GITEA_INSTANCE_URL: "http://gitea.git.svc.cluster.local:3000" GITEA_RUNNER_REGISTRATION_TOKEN: "{{ .runner_token }}" + CF_ACCESS_CLIENT_ID: "{{ .cf_access_client_id }}" + CF_ACCESS_CLIENT_SECRET: "{{ .cf_access_client_secret }}" data: - secretKey: runner_token remoteRef: key: secret/data/gitea property: neuron_technologies_runner_token + - secretKey: cf_access_client_id + remoteRef: + key: secret/data/gitea-runner-cf-access + property: client_id + - secretKey: cf_access_client_secret + remoteRef: + key: secret/data/gitea-runner-cf-access + property: client_secret diff --git a/servers/legion/k8s/gitea-runner/git-cf-access-init.sh b/servers/legion/k8s/gitea-runner/git-cf-access-init.sh new file mode 100644 index 0000000..e197f06 --- /dev/null +++ b/servers/legion/k8s/gitea-runner/git-cf-access-init.sh @@ -0,0 +1,50 @@ +#!/bin/sh +# git-cf-access-init.sh +# +# Configures git so any clone/fetch from Gitea ends up going to +# git.neuralplatform.ai with the runner's Cloudflare Access service-token +# headers attached. +# +# How this gets invoked: +# The forgejo-runner job execution path runs each step via a +# non-interactive bash invocation inside the build container. Setting +# BASH_ENV=/usr/local/bin/git-cf-access-init.sh in act_runner's +# container.env causes bash to source this script before any step's +# commands run. (See servers/legion/k8s/gitea-runner/deployment.yaml.) +# +# What it does: +# 1. Rewrites http://gitea.git.svc.cluster.local:3000/ → https://git.neuralplatform.ai/ +# via insteadOf. The runner registered against the in-cluster URL (no +# CF Access on the daemon's polling loop), so act_runner advertises +# that URL to the build container as github.server_url. Build +# containers run with network: host and can't resolve +# *.svc.cluster.local, so we need to redirect to the public URL. +# 2. Adds the CF Access service-token headers to outbound requests to +# git.neuralplatform.ai so the clone authenticates through CF Access. +# +# Idempotent — re-runs replace any prior config keys without accumulating +# duplicate header entries. +# +# Known limitation: actions/checkout sets an Authorization extraheader +# keyed to the server URL it was given (the in-cluster URL). After +# insteadOf substitution the request goes to the public URL where git +# matches http..extraheader, and the in-cluster-keyed +# Authorization header is dropped. For public repos this is fine. For +# private repos the per-job token will not be sent — see the PR +# description for the follow-up plan if dharma-el's CI needs that token. + +if [ -n "${CF_ACCESS_CLIENT_ID:-}" ] && [ -n "${CF_ACCESS_CLIENT_SECRET:-}" ]; then + git config --global --replace-all \ + url."https://git.neuralplatform.ai/".insteadOf \ + "http://gitea.git.svc.cluster.local:3000/" 2>/dev/null || true + + # Reset extraHeader on the public URL, then add both CF Access headers. + git config --global --unset-all \ + http."https://git.neuralplatform.ai/".extraHeader 2>/dev/null || true + git config --global --add \ + http."https://git.neuralplatform.ai/".extraHeader \ + "CF-Access-Client-Id: ${CF_ACCESS_CLIENT_ID}" 2>/dev/null || true + git config --global --add \ + http."https://git.neuralplatform.ai/".extraHeader \ + "CF-Access-Client-Secret: ${CF_ACCESS_CLIENT_SECRET}" 2>/dev/null || true +fi