1 Commits

Author SHA1 Message Date
Will Anderson a529690235 Revert "fix(ci): point Gitea Actions runners at public instance URL"
This reverts commit 6f5d041440.
2026-05-04 16:05:10 -05:00
10 changed files with 203 additions and 210 deletions
+165
View File
@@ -0,0 +1,165 @@
---
# Gitea CI runner — general-purpose (legion)
# Uses host Docker socket for container management and docker build/push.
apiVersion: apps/v1
kind: Deployment
metadata:
name: gitea-runner
namespace: ci
labels:
app: gitea-runner
spec:
replicas: 1
selector:
matchLabels:
app: gitea-runner
template:
metadata:
labels:
app: gitea-runner
annotations:
config-version: "2026-04-27-containerd-sock"
spec:
securityContext:
runAsNonRoot: false # act_runner needs root for container management
initContainers:
- name: register
image: registry.neuralplatform.ai/ci-base:latest
workingDir: /data
command: ["/bin/sh", "-c"]
args:
- |
act_runner register \
--instance "$GITEA_INSTANCE_URL" \
--token "$GITEA_RUNNER_REGISTRATION_TOKEN" \
--name legion \
--labels "self-hosted:docker://registry.neuralplatform.ai/ci-base:latest,ubuntu-latest:docker://registry.neuralplatform.ai/ci-base:latest,ubuntu-24.04:docker://registry.neuralplatform.ai/ci-base:latest,linux,x64" \
--no-interactive
cat > /data/config.yaml << 'EOF'
runner:
capacity: 2
timeout: 3h
container:
network: host
docker_host: "unix:///run/k3s/containerd/containerd.sock"
force_pull: false
valid_volumes: []
default_image: "registry.neuralplatform.ai/ci-base:latest"
extra_hosts:
- "gitea.git.svc.cluster.local:10.43.1.53"
EOF
envFrom:
- secretRef:
name: gitea-runner-secret
volumeMounts:
- name: data
mountPath: /data
containers:
- name: runner
image: registry.neuralplatform.ai/ci-base:latest
workingDir: /data
command: ["act_runner", "daemon", "--config", "/data/config.yaml"]
envFrom:
- secretRef:
name: gitea-runner-secret
volumeMounts:
- name: data
mountPath: /data
- name: docker-sock
mountPath: /var/run/docker.sock
resources:
requests:
memory: 512Mi
cpu: 250m
limits:
memory: 4Gi
cpu: "4"
volumes:
- name: data
emptyDir: {}
- name: docker-sock
hostPath:
path: /run/k3s/containerd/containerd.sock
type: Socket
---
# Neuron Technologies CI runner
apiVersion: apps/v1
kind: Deployment
metadata:
name: neuron-technologies-runner
namespace: ci
labels:
app: neuron-technologies-runner
spec:
replicas: 1
selector:
matchLabels:
app: neuron-technologies-runner
template:
metadata:
labels:
app: neuron-technologies-runner
annotations:
config-version: "2026-04-27-containerd-sock"
spec:
securityContext:
runAsNonRoot: false
initContainers:
- name: register
image: registry.neuralplatform.ai/ci-base:latest
workingDir: /data
command: ["/bin/sh", "-c"]
args:
- |
act_runner register \
--instance "$GITEA_INSTANCE_URL" \
--token "$GITEA_RUNNER_REGISTRATION_TOKEN" \
--name neuron-technologies \
--labels "self-hosted:docker://registry.neuralplatform.ai/ci-base:latest,ubuntu-latest:docker://registry.neuralplatform.ai/ci-base:latest,ubuntu-24.04:docker://registry.neuralplatform.ai/ci-base:latest,linux,x64" \
--no-interactive
cat > /data/config.yaml << 'EOF'
runner:
capacity: 2
timeout: 3h
container:
network: host
docker_host: "unix:///run/k3s/containerd/containerd.sock"
force_pull: false
valid_volumes: []
default_image: "registry.neuralplatform.ai/ci-base:latest"
extra_hosts:
- "gitea.git.svc.cluster.local:10.43.1.53"
EOF
envFrom:
- secretRef:
name: neuron-technologies-runner-secret
volumeMounts:
- name: data
mountPath: /data
containers:
- name: runner
image: registry.neuralplatform.ai/ci-base:latest
workingDir: /data
command: ["act_runner", "daemon", "--config", "/data/config.yaml"]
envFrom:
- secretRef:
name: neuron-technologies-runner-secret
volumeMounts:
- name: data
mountPath: /data
- name: docker-sock
mountPath: /var/run/docker.sock
resources:
requests:
memory: 512Mi
cpu: 250m
limits:
memory: 4Gi
cpu: "4"
volumes:
- name: data
emptyDir: {}
- name: docker-sock
hostPath:
path: /run/k3s/containerd/containerd.sock
type: Socket
-13
View File
@@ -23,16 +23,3 @@ resource "cloudflare_record" "np_web_stage" {
proxied = true
ttl = 1
}
# vault.neuralplatform.ai — GCE Raft HA Vault cluster via GCP Global HTTPS LB.
# DNS-only (not proxied) — GCP managed TLS cert terminates at the LB.
# Vault nodes listen on plain HTTP 8200 internally; LB does TLS.
# IP: terraform output vault_lb_ip from servers/gcp workspace = 34.54.164.21
resource "cloudflare_record" "np_vault" {
zone_id = local.zone_neuralplatform_ai
name = "vault"
type = "A"
content = "34.54.164.21"
proxied = false
ttl = 60
}
-42
View File
@@ -1,42 +0,0 @@
# Cloudflare Zero Trust Access — git.neuralplatform.ai (Gitea)
#
# The Gitea Access application itself is currently managed in the Cloudflare
# dashboard, NOT in Terraform. This file only manages the *service token* the
# Gitea Actions runners use to authenticate through CF Access while still
# keeping the human Google-OAuth gate for browser users.
#
# Why not import the application here?
# - Importing the existing dashboard app risks drifting the human-auth
# policy (Google IdP, allowed emails) which is settled and working.
# - Service tokens can be added to a dashboard-managed app without
# importing the app itself; the token resource lives at the account
# level and is referenced from a policy.
# - We pay only the cost we need to. If we later want all Access apps
# in TF we can do a focused import pass.
#
# After `terraform apply` produces the token id/secret, Will must:
# 1. Run `vault kv put secret/gitea-runner-cf-access ...` (see outputs).
# 2. In the Cloudflare dashboard, edit the existing "Gitea" Access
# application's policies and add a new policy:
# Action: Service Auth (decision = non_identity)
# Include: Service Token = "gitea-runner"
# This grants the service token bypass through CF Access on
# git.neuralplatform.ai without changing the human-auth flow.
resource "cloudflare_zero_trust_access_service_token" "gitea_runner" {
account_id = var.cloudflare_account_id
name = "gitea-runner"
# Default duration is "8760h" (1 year). Rotate via re-apply when needed.
duration = "forever"
}
output "gitea_runner_cf_access_client_id" {
description = "CF Access service token client ID for the Gitea Actions runner. Store in Vault at secret/gitea-runner-cf-access."
value = cloudflare_zero_trust_access_service_token.gitea_runner.client_id
}
output "gitea_runner_cf_access_client_secret" {
description = "CF Access service token client secret. Store in Vault at secret/gitea-runner-cf-access. Only emitted at creation time."
value = cloudflare_zero_trust_access_service_token.gitea_runner.client_secret
sensitive = true
}
@@ -85,15 +85,3 @@ RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \
&& apt-get update \
&& apt-get install -y --no-install-recommends gh \
&& rm -rf /var/lib/apt/lists/*
# Cloudflare Access bootstrap for git clones to git.neuralplatform.ai.
# This script is sourced by bash in build containers via BASH_ENV (set by
# act_runner's container.env in deployment.yaml) so it runs before every
# step. It configures git insteadOf + CF Access extraHeaders from
# CF_ACCESS_CLIENT_ID / CF_ACCESS_CLIENT_SECRET env vars.
#
# We deliberately don't set ENTRYPOINT / CMD here — act_runner spawns
# build containers with its own entrypoint to keep them alive between
# steps, and overriding it breaks job execution.
COPY git-cf-access-init.sh /usr/local/bin/git-cf-access-init.sh
RUN chmod +x /usr/local/bin/git-cf-access-init.sh
@@ -8,7 +8,7 @@ metadata:
labels:
app: gitea-runner
annotations:
config-version: "2026-05-04-cf-access-public-url"
config-version: "2026-05-04-docker-sock-fix"
spec:
replicas: 1
selector:
@@ -19,7 +19,7 @@ spec:
labels:
app: gitea-runner
annotations:
config-version: "2026-05-04-cf-access-public-url"
config-version: "2026-05-04-docker-sock-fix"
spec:
securityContext:
runAsNonRoot: false
@@ -35,7 +35,7 @@ spec:
--name legion \
--labels "self-hosted:docker://registry.neuralplatform.ai/ci-base:latest,ubuntu-latest:docker://registry.neuralplatform.ai/ci-base:latest,ubuntu-24.04:docker://registry.neuralplatform.ai/ci-base:latest,linux,x64" \
--no-interactive
cat > /data/config.yaml << EOF
cat > /data/config.yaml << 'EOF'
runner:
capacity: 2
timeout: 3h
@@ -45,16 +45,6 @@ spec:
force_pull: false
valid_volumes: []
default_image: "registry.neuralplatform.ai/ci-base:latest"
# Build containers run with network: host. The in-cluster
# gitea name does not resolve there, so we redirect git
# operations to https://git.neuralplatform.ai using CF
# Access service-token headers. BASH_ENV makes bash source
# /usr/local/bin/git-cf-access-init.sh before every step,
# which sets up the redirect + headers.
env:
CF_ACCESS_CLIENT_ID: "${CF_ACCESS_CLIENT_ID}"
CF_ACCESS_CLIENT_SECRET: "${CF_ACCESS_CLIENT_SECRET}"
BASH_ENV: "/usr/local/bin/git-cf-access-init.sh"
extra_hosts:
- "gitea.git.svc.cluster.local:10.43.1.53"
EOF
@@ -102,7 +92,7 @@ metadata:
labels:
app: neuron-technologies-runner
annotations:
config-version: "2026-05-04-cf-access-public-url"
config-version: "2026-05-04-docker-sock-fix"
spec:
replicas: 2
selector:
@@ -113,7 +103,7 @@ spec:
labels:
app: neuron-technologies-runner
annotations:
config-version: "2026-05-04-cf-access-public-url"
config-version: "2026-05-04-docker-sock-fix"
spec:
securityContext:
runAsNonRoot: false
@@ -129,7 +119,7 @@ spec:
--name "legion-nt-$(hostname)" \
--labels "self-hosted:docker://registry.neuralplatform.ai/ci-base:latest,ubuntu-latest:docker://registry.neuralplatform.ai/ci-base:latest,ubuntu-24.04:docker://registry.neuralplatform.ai/ci-base:latest,linux,x64" \
--no-interactive
cat > /data/config.yaml << EOF
cat > /data/config.yaml << 'EOF'
runner:
capacity: 2
timeout: 3h
@@ -139,16 +129,6 @@ spec:
force_pull: false
valid_volumes: []
default_image: "registry.neuralplatform.ai/ci-base:latest"
# Build containers run with network: host. The in-cluster
# gitea name does not resolve there, so we redirect git
# operations to https://git.neuralplatform.ai using CF
# Access service-token headers. BASH_ENV makes bash source
# /usr/local/bin/git-cf-access-init.sh before every step,
# which sets up the redirect + headers.
env:
CF_ACCESS_CLIENT_ID: "${CF_ACCESS_CLIENT_ID}"
CF_ACCESS_CLIENT_SECRET: "${CF_ACCESS_CLIENT_SECRET}"
BASH_ENV: "/usr/local/bin/git-cf-access-init.sh"
extra_hosts:
- "gitea.git.svc.cluster.local:10.43.1.53"
EOF
@@ -1,20 +1,12 @@
---
# gitea-runner-secret — neural-platform org runner token
#
# GITEA_INSTANCE_URL stays as the in-cluster URL — the act_runner daemon
# polls it constantly and we don't want every poll to hit Cloudflare Access.
# Build containers, however, need the public URL because they run with
# network: host and can't resolve gitea.git.svc.cluster.local. The
# git-cf-access-init.sh entrypoint in the ci-base image rewrites the
# in-cluster URL to https://git.neuralplatform.ai with the CF Access
# headers from CF_ACCESS_CLIENT_ID / CF_ACCESS_CLIENT_SECRET below.
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: gitea-runner-secret
namespace: ci
annotations:
force-sync: "2026-05-04-cf-access"
force-sync: "2026-04-23"
spec:
refreshInterval: 1h
secretStoreRef:
@@ -27,21 +19,11 @@ spec:
data:
GITEA_INSTANCE_URL: "http://gitea.git.svc.cluster.local:3000"
GITEA_RUNNER_REGISTRATION_TOKEN: "{{ .runner_token }}"
CF_ACCESS_CLIENT_ID: "{{ .cf_access_client_id }}"
CF_ACCESS_CLIENT_SECRET: "{{ .cf_access_client_secret }}"
data:
- secretKey: runner_token
remoteRef:
key: secret/data/gitea
property: runner_token
- secretKey: cf_access_client_id
remoteRef:
key: secret/data/gitea-runner-cf-access
property: client_id
- secretKey: cf_access_client_secret
remoteRef:
key: secret/data/gitea-runner-cf-access
property: client_secret
---
# neuron-technologies-runner-secret — neuron-technologies org runner token
apiVersion: external-secrets.io/v1beta1
@@ -49,8 +31,6 @@ kind: ExternalSecret
metadata:
name: neuron-technologies-runner-secret
namespace: ci
annotations:
force-sync: "2026-05-04-cf-access"
spec:
refreshInterval: 1h
secretStoreRef:
@@ -63,18 +43,8 @@ spec:
data:
GITEA_INSTANCE_URL: "http://gitea.git.svc.cluster.local:3000"
GITEA_RUNNER_REGISTRATION_TOKEN: "{{ .runner_token }}"
CF_ACCESS_CLIENT_ID: "{{ .cf_access_client_id }}"
CF_ACCESS_CLIENT_SECRET: "{{ .cf_access_client_secret }}"
data:
- secretKey: runner_token
remoteRef:
key: secret/data/gitea
property: neuron_technologies_runner_token
- secretKey: cf_access_client_id
remoteRef:
key: secret/data/gitea-runner-cf-access
property: client_id
- secretKey: cf_access_client_secret
remoteRef:
key: secret/data/gitea-runner-cf-access
property: client_secret
@@ -1,50 +0,0 @@
#!/bin/sh
# git-cf-access-init.sh
#
# Configures git so any clone/fetch from Gitea ends up going to
# git.neuralplatform.ai with the runner's Cloudflare Access service-token
# headers attached.
#
# How this gets invoked:
# The forgejo-runner job execution path runs each step via a
# non-interactive bash invocation inside the build container. Setting
# BASH_ENV=/usr/local/bin/git-cf-access-init.sh in act_runner's
# container.env causes bash to source this script before any step's
# commands run. (See servers/legion/k8s/gitea-runner/deployment.yaml.)
#
# What it does:
# 1. Rewrites http://gitea.git.svc.cluster.local:3000/ → https://git.neuralplatform.ai/
# via insteadOf. The runner registered against the in-cluster URL (no
# CF Access on the daemon's polling loop), so act_runner advertises
# that URL to the build container as github.server_url. Build
# containers run with network: host and can't resolve
# *.svc.cluster.local, so we need to redirect to the public URL.
# 2. Adds the CF Access service-token headers to outbound requests to
# git.neuralplatform.ai so the clone authenticates through CF Access.
#
# Idempotent — re-runs replace any prior config keys without accumulating
# duplicate header entries.
#
# Known limitation: actions/checkout sets an Authorization extraheader
# keyed to the server URL it was given (the in-cluster URL). After
# insteadOf substitution the request goes to the public URL where git
# matches http.<public>.extraheader, and the in-cluster-keyed
# Authorization header is dropped. For public repos this is fine. For
# private repos the per-job token will not be sent — see the PR
# description for the follow-up plan if dharma-el's CI needs that token.
if [ -n "${CF_ACCESS_CLIENT_ID:-}" ] && [ -n "${CF_ACCESS_CLIENT_SECRET:-}" ]; then
git config --global --replace-all \
url."https://git.neuralplatform.ai/".insteadOf \
"http://gitea.git.svc.cluster.local:3000/" 2>/dev/null || true
# Reset extraHeader on the public URL, then add both CF Access headers.
git config --global --unset-all \
http."https://git.neuralplatform.ai/".extraHeader 2>/dev/null || true
git config --global --add \
http."https://git.neuralplatform.ai/".extraHeader \
"CF-Access-Client-Id: ${CF_ACCESS_CLIENT_ID}" 2>/dev/null || true
git config --global --add \
http."https://git.neuralplatform.ai/".extraHeader \
"CF-Access-Client-Secret: ${CF_ACCESS_CLIENT_SECRET}" 2>/dev/null || true
fi
@@ -82,12 +82,7 @@ spec:
apiVersion: apps/v1
kind: Deployment
name: neuron-marketing
# minReplicas=1 to match the file's own convention (see header comment).
# Kubernetes only allows minReplicas=0 when at least one Object or External
# metric is configured (queue depth, custom signal, etc.); with only a
# Resource (CPU) metric, scale-to-zero is rejected and the whole HPA is
# invalid — which was blocking neuron-prod's Argo CD sync.
minReplicas: 1
minReplicas: 0
maxReplicas: 8
metrics:
- type: Resource
@@ -117,32 +117,6 @@ spec:
matchLabels:
kubernetes.io/metadata.name: neuron-prod
---
# ── dharma: accept from Traefik (kube-system) and neuron-prod namespace ──────
# The dharma pod was healthy and the IngressRoute was correct, but cross-
# namespace ingress from kube-system (Traefik) was denied by default-deny-all,
# so every external request landed at Traefik and bounced back as 502. This
# allow rule mirrors `allow-mcp-ingress` and brings dharma into line with the
# other neuron-prod services.
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: allow-dharma-ingress
namespace: neuron-prod
spec:
podSelector:
matchLabels:
app: dharma
policyTypes:
- Ingress
ingress:
- from:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: kube-system
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: neuron-prod
---
# ── Egress: all prod pods may reach platform (postgres/redis), vault,
# monitoring (alloy OTLP), kube-dns, and the internet (external APIs) ─
apiVersion: networking.k8s.io/v1
+30 -4
View File
@@ -92,11 +92,37 @@ resource "cloudflare_zero_trust_tunnel_cloudflared_config" "legion" {
}
}
# vault.neuralplatform.ai — moved to GCP Global HTTPS LB (34.54.164.21)
# DNS is now a direct A record (not proxied) in dns-neuralplatform.tf
ingress_rule {
hostname = "vault.neuralplatform.ai"
service = "https://traefik.kube-system.svc:443"
origin_request {
no_tls_verify = true
}
}
# watch.nook.family, jellyfin.nook.family, bazarr.nook.family — removed
# This infrastructure is focused on Neuron; nook.family media stack retired
ingress_rule {
hostname = "watch.nook.family"
service = "https://traefik.kube-system.svc:443"
origin_request {
no_tls_verify = true
}
}
ingress_rule {
hostname = "jellyfin.nook.family"
service = "https://traefik.kube-system.svc:443"
origin_request {
no_tls_verify = true
}
}
ingress_rule {
hostname = "bazarr.nook.family"
service = "https://traefik.kube-system.svc:443"
origin_request {
no_tls_verify = true
}
}
# fornax.neuralplatform.ai — Fornax torrent coordinator (qBittorrent API proxy)