1 Commits

Author SHA1 Message Date
Will Anderson bec948bcca route runner build container clones via public URL with CF Access
The Gitea Actions runners on Legion need build containers to be able
to clone repos. They run with network: host so they can't resolve
gitea.git.svc.cluster.local — they have to use the public URL. The
public URL sits behind Cloudflare Access, which is why the previous
naive switch (#3) had to be reverted.

This change keeps the runner daemon registered against the in-cluster
URL (no CF Access on the polling loop) but rewrites
http://gitea.git.svc.cluster.local:3000/ to
https://git.neuralplatform.ai/ inside the build container, with the
CF Access service-token headers injected via git extraHeader.

The redirect script is sourced before every job step via BASH_ENV;
the CF Access credentials reach the build container through
act_runner's container.env, which we now populate from the new
gitea-runner-cf-access Vault path that PR #5's Terraform writes.

Known limitation documented in the init script: actions/checkout's
per-job auth header is keyed to the in-cluster URL and gets dropped
after the insteadOf rewrite. Public repos work; private repos that
need that token will need a follow-up.
2026-05-04 16:19:35 -05:00
6 changed files with 196 additions and 91 deletions
+165
View File
@@ -0,0 +1,165 @@
---
# Gitea CI runner — general-purpose (legion)
# Uses host Docker socket for container management and docker build/push.
apiVersion: apps/v1
kind: Deployment
metadata:
name: gitea-runner
namespace: ci
labels:
app: gitea-runner
spec:
replicas: 1
selector:
matchLabels:
app: gitea-runner
template:
metadata:
labels:
app: gitea-runner
annotations:
config-version: "2026-04-27-containerd-sock"
spec:
securityContext:
runAsNonRoot: false # act_runner needs root for container management
initContainers:
- name: register
image: registry.neuralplatform.ai/ci-base:latest
workingDir: /data
command: ["/bin/sh", "-c"]
args:
- |
act_runner register \
--instance "$GITEA_INSTANCE_URL" \
--token "$GITEA_RUNNER_REGISTRATION_TOKEN" \
--name legion \
--labels "self-hosted:docker://registry.neuralplatform.ai/ci-base:latest,ubuntu-latest:docker://registry.neuralplatform.ai/ci-base:latest,ubuntu-24.04:docker://registry.neuralplatform.ai/ci-base:latest,linux,x64" \
--no-interactive
cat > /data/config.yaml << 'EOF'
runner:
capacity: 2
timeout: 3h
container:
network: host
docker_host: "unix:///run/k3s/containerd/containerd.sock"
force_pull: false
valid_volumes: []
default_image: "registry.neuralplatform.ai/ci-base:latest"
extra_hosts:
- "gitea.git.svc.cluster.local:10.43.1.53"
EOF
envFrom:
- secretRef:
name: gitea-runner-secret
volumeMounts:
- name: data
mountPath: /data
containers:
- name: runner
image: registry.neuralplatform.ai/ci-base:latest
workingDir: /data
command: ["act_runner", "daemon", "--config", "/data/config.yaml"]
envFrom:
- secretRef:
name: gitea-runner-secret
volumeMounts:
- name: data
mountPath: /data
- name: docker-sock
mountPath: /var/run/docker.sock
resources:
requests:
memory: 512Mi
cpu: 250m
limits:
memory: 4Gi
cpu: "4"
volumes:
- name: data
emptyDir: {}
- name: docker-sock
hostPath:
path: /run/k3s/containerd/containerd.sock
type: Socket
---
# Neuron Technologies CI runner
apiVersion: apps/v1
kind: Deployment
metadata:
name: neuron-technologies-runner
namespace: ci
labels:
app: neuron-technologies-runner
spec:
replicas: 1
selector:
matchLabels:
app: neuron-technologies-runner
template:
metadata:
labels:
app: neuron-technologies-runner
annotations:
config-version: "2026-04-27-containerd-sock"
spec:
securityContext:
runAsNonRoot: false
initContainers:
- name: register
image: registry.neuralplatform.ai/ci-base:latest
workingDir: /data
command: ["/bin/sh", "-c"]
args:
- |
act_runner register \
--instance "$GITEA_INSTANCE_URL" \
--token "$GITEA_RUNNER_REGISTRATION_TOKEN" \
--name neuron-technologies \
--labels "self-hosted:docker://registry.neuralplatform.ai/ci-base:latest,ubuntu-latest:docker://registry.neuralplatform.ai/ci-base:latest,ubuntu-24.04:docker://registry.neuralplatform.ai/ci-base:latest,linux,x64" \
--no-interactive
cat > /data/config.yaml << 'EOF'
runner:
capacity: 2
timeout: 3h
container:
network: host
docker_host: "unix:///run/k3s/containerd/containerd.sock"
force_pull: false
valid_volumes: []
default_image: "registry.neuralplatform.ai/ci-base:latest"
extra_hosts:
- "gitea.git.svc.cluster.local:10.43.1.53"
EOF
envFrom:
- secretRef:
name: neuron-technologies-runner-secret
volumeMounts:
- name: data
mountPath: /data
containers:
- name: runner
image: registry.neuralplatform.ai/ci-base:latest
workingDir: /data
command: ["act_runner", "daemon", "--config", "/data/config.yaml"]
envFrom:
- secretRef:
name: neuron-technologies-runner-secret
volumeMounts:
- name: data
mountPath: /data
- name: docker-sock
mountPath: /var/run/docker.sock
resources:
requests:
memory: 512Mi
cpu: 250m
limits:
memory: 4Gi
cpu: "4"
volumes:
- name: data
emptyDir: {}
- name: docker-sock
hostPath:
path: /run/k3s/containerd/containerd.sock
type: Socket
-13
View File
@@ -23,16 +23,3 @@ resource "cloudflare_record" "np_web_stage" {
proxied = true
ttl = 1
}
# vault.neuralplatform.ai — GCE Raft HA Vault cluster via GCP Global HTTPS LB.
# DNS-only (not proxied) — GCP managed TLS cert terminates at the LB.
# Vault nodes listen on plain HTTP 8200 internally; LB does TLS.
# IP: terraform output vault_lb_ip from servers/gcp workspace = 34.54.164.21
resource "cloudflare_record" "np_vault" {
zone_id = local.zone_neuralplatform_ai
name = "vault"
type = "A"
content = "34.54.164.21"
proxied = false
ttl = 60
}
-42
View File
@@ -1,42 +0,0 @@
# Cloudflare Zero Trust Access — git.neuralplatform.ai (Gitea)
#
# The Gitea Access application itself is currently managed in the Cloudflare
# dashboard, NOT in Terraform. This file only manages the *service token* the
# Gitea Actions runners use to authenticate through CF Access while still
# keeping the human Google-OAuth gate for browser users.
#
# Why not import the application here?
# - Importing the existing dashboard app risks drifting the human-auth
# policy (Google IdP, allowed emails) which is settled and working.
# - Service tokens can be added to a dashboard-managed app without
# importing the app itself; the token resource lives at the account
# level and is referenced from a policy.
# - We pay only the cost we need to. If we later want all Access apps
# in TF we can do a focused import pass.
#
# After `terraform apply` produces the token id/secret, Will must:
# 1. Run `vault kv put secret/gitea-runner-cf-access ...` (see outputs).
# 2. In the Cloudflare dashboard, edit the existing "Gitea" Access
# application's policies and add a new policy:
# Action: Service Auth (decision = non_identity)
# Include: Service Token = "gitea-runner"
# This grants the service token bypass through CF Access on
# git.neuralplatform.ai without changing the human-auth flow.
resource "cloudflare_zero_trust_access_service_token" "gitea_runner" {
account_id = var.cloudflare_account_id
name = "gitea-runner"
# Default duration is "8760h" (1 year). Rotate via re-apply when needed.
duration = "forever"
}
output "gitea_runner_cf_access_client_id" {
description = "CF Access service token client ID for the Gitea Actions runner. Store in Vault at secret/gitea-runner-cf-access."
value = cloudflare_zero_trust_access_service_token.gitea_runner.client_id
}
output "gitea_runner_cf_access_client_secret" {
description = "CF Access service token client secret. Store in Vault at secret/gitea-runner-cf-access. Only emitted at creation time."
value = cloudflare_zero_trust_access_service_token.gitea_runner.client_secret
sensitive = true
}
@@ -82,12 +82,7 @@ spec:
apiVersion: apps/v1
kind: Deployment
name: neuron-marketing
# minReplicas=1 to match the file's own convention (see header comment).
# Kubernetes only allows minReplicas=0 when at least one Object or External
# metric is configured (queue depth, custom signal, etc.); with only a
# Resource (CPU) metric, scale-to-zero is rejected and the whole HPA is
# invalid — which was blocking neuron-prod's Argo CD sync.
minReplicas: 1
minReplicas: 0
maxReplicas: 8
metrics:
- type: Resource
@@ -117,32 +117,6 @@ spec:
matchLabels:
kubernetes.io/metadata.name: neuron-prod
---
# ── dharma: accept from Traefik (kube-system) and neuron-prod namespace ──────
# The dharma pod was healthy and the IngressRoute was correct, but cross-
# namespace ingress from kube-system (Traefik) was denied by default-deny-all,
# so every external request landed at Traefik and bounced back as 502. This
# allow rule mirrors `allow-mcp-ingress` and brings dharma into line with the
# other neuron-prod services.
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: allow-dharma-ingress
namespace: neuron-prod
spec:
podSelector:
matchLabels:
app: dharma
policyTypes:
- Ingress
ingress:
- from:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: kube-system
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: neuron-prod
---
# ── Egress: all prod pods may reach platform (postgres/redis), vault,
# monitoring (alloy OTLP), kube-dns, and the internet (external APIs) ─
apiVersion: networking.k8s.io/v1
+30 -4
View File
@@ -92,11 +92,37 @@ resource "cloudflare_zero_trust_tunnel_cloudflared_config" "legion" {
}
}
# vault.neuralplatform.ai — moved to GCP Global HTTPS LB (34.54.164.21)
# DNS is now a direct A record (not proxied) in dns-neuralplatform.tf
ingress_rule {
hostname = "vault.neuralplatform.ai"
service = "https://traefik.kube-system.svc:443"
origin_request {
no_tls_verify = true
}
}
# watch.nook.family, jellyfin.nook.family, bazarr.nook.family — removed
# This infrastructure is focused on Neuron; nook.family media stack retired
ingress_rule {
hostname = "watch.nook.family"
service = "https://traefik.kube-system.svc:443"
origin_request {
no_tls_verify = true
}
}
ingress_rule {
hostname = "jellyfin.nook.family"
service = "https://traefik.kube-system.svc:443"
origin_request {
no_tls_verify = true
}
}
ingress_rule {
hostname = "bazarr.nook.family"
service = "https://traefik.kube-system.svc:443"
origin_request {
no_tls_verify = true
}
}
# fornax.neuralplatform.ai — Fornax torrent coordinator (qBittorrent API proxy)