1 Commits

Author SHA1 Message Date
Will Anderson bec948bcca route runner build container clones via public URL with CF Access
The Gitea Actions runners on Legion need build containers to be able
to clone repos. They run with network: host so they can't resolve
gitea.git.svc.cluster.local — they have to use the public URL. The
public URL sits behind Cloudflare Access, which is why the previous
naive switch (#3) had to be reverted.

This change keeps the runner daemon registered against the in-cluster
URL (no CF Access on the polling loop) but rewrites
http://gitea.git.svc.cluster.local:3000/ to
https://git.neuralplatform.ai/ inside the build container, with the
CF Access service-token headers injected via git extraHeader.

The redirect script is sourced before every job step via BASH_ENV;
the CF Access credentials reach the build container through
act_runner's container.env, which we now populate from the new
gitea-runner-cf-access Vault path that PR #5's Terraform writes.

Known limitation documented in the init script: actions/checkout's
per-job auth header is keyed to the in-cluster URL and gets dropped
after the insteadOf rewrite. Public repos work; private repos that
need that token will need a follow-up.
2026-05-04 16:19:35 -05:00
66 changed files with 534 additions and 1816 deletions
-115
View File
@@ -148,118 +148,3 @@ resource "google_secret_manager_secret" "license_admin_token" {
ignore_changes = [id]
}
}
# ── Gitea — database + user on neuron-prod-pg15 ───────────────────────────────
# Gitea on GKE uses the existing Cloud SQL instance with a Cloud SQL Auth Proxy
# sidecar. Connection via unix socket — no public IP exposure.
resource "google_sql_database" "gitea" {
name = "gitea"
instance = google_sql_database_instance.main.name
project = var.project_id
}
resource "random_password" "gitea_db" {
length = 32
special = false # Cloud SQL unix socket path DSN; keep alphanumeric for simplicity
}
resource "google_sql_user" "gitea" {
name = "gitea"
instance = google_sql_database_instance.main.name
project = var.project_id
password = random_password.gitea_db.result
}
# ── Gitea service account (for Workload Identity → Cloud SQL) ─────────────────
resource "google_service_account" "gitea" {
account_id = "gitea-gke"
display_name = "Gitea GKE SA"
description = "Service account for the Gitea pod on GKE. Used by the Cloud SQL Auth Proxy sidecar via Workload Identity."
project = var.project_id
}
resource "google_project_iam_member" "gitea_sql_client" {
project = var.project_id
role = "roles/cloudsql.client"
member = "serviceAccount:${google_service_account.gitea.email}"
}
# ── Secret Manager — Gitea database URL ───────────────────────────────────────
# Full DSN using the Cloud SQL Auth Proxy unix socket path.
# The proxy sidecar mounts the socket at /cloudsql/<connection_name>.
resource "google_secret_manager_secret" "gitea_database_url" {
secret_id = "gitea-database-url"
project = var.project_id
replication {
auto {}
}
}
resource "google_secret_manager_secret_version" "gitea_database_url" {
secret = google_secret_manager_secret.gitea_database_url.id
secret_data = "host=/cloudsql/${google_sql_database_instance.main.connection_name} user=gitea password=${random_password.gitea_db.result} dbname=gitea sslmode=disable"
}
# gitea-db-password — the raw password only, for use in Gitea's GITEA__database__PASSWD.
# The Cloud SQL Auth Proxy provides the unix socket; Gitea uses standard postgres
# password auth through the proxy socket.
resource "google_secret_manager_secret" "gitea_db_password" {
secret_id = "gitea-db-password"
project = var.project_id
replication {
auto {}
}
}
resource "google_secret_manager_secret_version" "gitea_db_password" {
secret = google_secret_manager_secret.gitea_db_password.id
secret_data = random_password.gitea_db.result
}
# Allow the Gitea GCP SA to access its secrets
resource "google_secret_manager_secret_iam_member" "gitea_database_url_accessor" {
project = var.project_id
secret_id = google_secret_manager_secret.gitea_database_url.secret_id
role = "roles/secretmanager.secretAccessor"
member = "serviceAccount:${google_service_account.gitea.email}"
}
resource "google_secret_manager_secret_iam_member" "gitea_db_password_accessor" {
project = var.project_id
secret_id = google_secret_manager_secret.gitea_db_password.secret_id
role = "roles/secretmanager.secretAccessor"
member = "serviceAccount:${google_service_account.gitea.email}"
}
# ESO Workload Identity — allows the external-secrets controller SA on GKE
# to impersonate the gitea GCP SA for Secret Manager access.
resource "google_service_account_iam_member" "eso_gitea_wi" {
service_account_id = google_service_account.gitea.name
role = "roles/iam.workloadIdentityUser"
member = "serviceAccount:${var.project_id}.svc.id.goog[external-secrets/external-secrets]"
depends_on = [google_container_cluster.neuron_platform]
}
resource "google_service_account_iam_member" "eso_gitea_token_creator" {
service_account_id = google_service_account.gitea.name
role = "roles/iam.serviceAccountTokenCreator"
member = "serviceAccount:${var.project_id}.svc.id.goog[external-secrets/external-secrets]"
depends_on = [google_container_cluster.neuron_platform]
}
output "gitea_service_account_email" {
description = "Gitea GKE SA email — used in the Workload Identity annotation"
value = google_service_account.gitea.email
}
output "gitea_database_secret_id" {
description = "Secret Manager secret ID for the Gitea database URL"
value = google_secret_manager_secret.gitea_database_url.secret_id
}
-68
View File
@@ -1,68 +0,0 @@
# ── DocuSeal GKE service account and Workload Identity ───────────────────────
# DocuSeal runs in the `docuseal` namespace on GKE. It accesses GCP Secret
# Manager via Workload Identity — no JSON key files.
#
# Secret flow: Vault → GCP Secret Manager → ExternalSecret → k8s Secret → pod
resource "google_service_account" "docuseal_gke" {
account_id = "docuseal-gke"
display_name = "DocuSeal GKE"
description = "Identity for DocuSeal pod on GKE. Accesses Secret Manager via Workload Identity."
project = var.project_id
}
# ── Secret Manager access ─────────────────────────────────────────────────────
resource "google_secret_manager_secret_iam_member" "docuseal_secret_key_base" {
project = var.project_id
secret_id = "docuseal-secret-key-base"
role = "roles/secretmanager.secretAccessor"
member = "serviceAccount:${google_service_account.docuseal_gke.email}"
}
resource "google_secret_manager_secret_iam_member" "docuseal_smtp_host" {
project = var.project_id
secret_id = "docuseal-smtp-host"
role = "roles/secretmanager.secretAccessor"
member = "serviceAccount:${google_service_account.docuseal_gke.email}"
}
resource "google_secret_manager_secret_iam_member" "docuseal_smtp_port" {
project = var.project_id
secret_id = "docuseal-smtp-port"
role = "roles/secretmanager.secretAccessor"
member = "serviceAccount:${google_service_account.docuseal_gke.email}"
}
resource "google_secret_manager_secret_iam_member" "docuseal_smtp_username" {
project = var.project_id
secret_id = "docuseal-smtp-username"
role = "roles/secretmanager.secretAccessor"
member = "serviceAccount:${google_service_account.docuseal_gke.email}"
}
resource "google_secret_manager_secret_iam_member" "docuseal_smtp_password" {
project = var.project_id
secret_id = "docuseal-smtp-password"
role = "roles/secretmanager.secretAccessor"
member = "serviceAccount:${google_service_account.docuseal_gke.email}"
}
resource "google_secret_manager_secret_iam_member" "docuseal_smtp_from" {
project = var.project_id
secret_id = "docuseal-smtp-from"
role = "roles/secretmanager.secretAccessor"
member = "serviceAccount:${google_service_account.docuseal_gke.email}"
}
# ── Workload Identity binding ─────────────────────────────────────────────────
# The k8s SA `docuseal` in the `docuseal` namespace can impersonate this GCP SA.
# ESO uses this to authenticate to Secret Manager without a key file.
resource "google_service_account_iam_member" "docuseal_workload_identity" {
service_account_id = google_service_account.docuseal_gke.name
role = "roles/iam.workloadIdentityUser"
member = "serviceAccount:${var.project_id}.svc.id.goog[docuseal/docuseal]"
depends_on = [google_container_cluster.neuron_platform]
}
-40
View File
@@ -254,46 +254,6 @@ resource "google_service_account_iam_member" "ci_pusher_wif_neuron_web" {
member = "principalSet://iam.googleapis.com/${google_iam_workload_identity_pool.gitea.name}/attribute.repository/neuron-technologies/neuron-web"
}
# ── Gitea Actions runner on GKE ───────────────────────────────────────────────
# A k8s Deployment in the `ci` namespace on GKE runs act_runner with a DinD
# sidecar. The pod uses this GCP SA via Workload Identity to pull the runner
# registration token from Secret Manager at startup.
#
# See servers/gcp/k8s/gitea-runner/ for the k8s manifests.
resource "google_service_account" "gitea_runner_gke" {
account_id = "gitea-runner-gke"
display_name = "Gitea Actions runner (GKE identity)"
description = "Workload Identity SA for the GKE-hosted Gitea Actions runner. Read-only access to the registration token."
project = var.project_id
}
# Allow the GKE SA to read the runner registration token from Secret Manager.
resource "google_secret_manager_secret_iam_member" "runner_gke_token_access" {
project = var.project_id
secret_id = "gitea-runner-token"
role = "roles/secretmanager.secretAccessor"
member = "serviceAccount:${google_service_account.gitea_runner_gke.email}"
}
# Workload Identity binding — the k8s SA `gitea-runner` in the `ci` namespace
# can impersonate this GCP SA without a JSON key file.
resource "google_service_account_iam_member" "gitea_runner_gke_workload_identity" {
service_account_id = google_service_account.gitea_runner_gke.name
role = "roles/iam.workloadIdentityUser"
member = "serviceAccount:${var.project_id}.svc.id.goog[ci/gitea-runner]"
depends_on = [google_container_cluster.neuron_platform]
}
# Allow the runner to pull images from Artifact Registry (needed for build jobs
# that pull from neuron-* repos, and for the runner's own ci-base image).
resource "google_project_iam_member" "gitea_runner_gke_ar_reader" {
project = var.project_id
role = "roles/artifactregistry.reader"
member = "serviceAccount:${google_service_account.gitea_runner_gke.email}"
}
# ── Outputs ───────────────────────────────────────────────────────────────────
output "gitea_runner_vm_name" {
-94
View File
@@ -1,94 +0,0 @@
# ── GKE Autopilot Cluster — neuron-platform ───────────────────────────────────
#
# Regional Autopilot cluster in us-central1. Autopilot manages the node pool
# automatically — no node pools to configure, no VM sizes to choose.
# Pods spread across a/b/c zones automatically by GKE.
#
# Workload Identity is required so Vault pods can access KMS (auto-unseal)
# and Gitea pods can use Cloud SQL Auth Proxy without key files.
#
# After `terraform apply`, register the cluster with Legion Argo CD:
# 1. Get the endpoint: terraform output gke_cluster_endpoint
# 2. Get credentials: gcloud container clusters get-credentials neuron-platform \
# --region us-central1 --project neuron-785695
# 3. Register with Argo CD:
# argocd cluster add <context-name> --name gke-neuron-platform
# 4. Update servers/gcp/k8s/argocd-apps/*.yaml with the actual cluster endpoint.
resource "google_container_cluster" "neuron_platform" {
provider = google-beta
name = "neuron-platform"
location = "us-central1"
project = var.project_id
# Autopilot — GKE manages node pools, scaling, and node security
enable_autopilot = true
release_channel {
channel = "REGULAR"
}
# Workload Identity — required for Vault KMS and Gitea Cloud SQL proxy
workload_identity_config {
workload_pool = "${var.project_id}.svc.id.goog"
}
# Deletion protection — prevent accidental cluster destruction
deletion_protection = true
# Cluster networking — uses default VPC (same as Vault GCE nodes)
ip_allocation_policy {}
# Disable basic auth and client cert (Workload Identity handles auth)
master_auth {
client_certificate_config {
issue_client_certificate = false
}
}
}
# ── Workload Identity bindings ────────────────────────────────────────────────
#
# Each GCP SA gets a binding that allows its corresponding k8s ServiceAccount
# (in the GKE cluster) to impersonate it via Workload Identity.
# The k8s SA must also have the annotation:
# iam.gke.io/gcp-service-account: <gcp-sa-email>
# Vault pods (namespace: vault, k8s SA: vault) → vault-unseal GCP SA
# This lets the Vault StatefulSet access KMS for auto-unseal without key files.
resource "google_service_account_iam_member" "vault_workload_identity" {
service_account_id = google_service_account.vault_unseal.name
role = "roles/iam.workloadIdentityUser"
member = "serviceAccount:${var.project_id}.svc.id.goog[vault/vault]"
depends_on = [google_container_cluster.neuron_platform]
}
# Gitea pod (namespace: gitea, k8s SA: gitea) → gitea GCP SA
# This lets the Cloud SQL Auth Proxy sidecar connect to neuron-prod-pg15.
resource "google_service_account_iam_member" "gitea_workload_identity" {
service_account_id = google_service_account.gitea.name
role = "roles/iam.workloadIdentityUser"
member = "serviceAccount:${var.project_id}.svc.id.goog[gitea/gitea]"
depends_on = [google_container_cluster.neuron_platform]
}
# ── Outputs ───────────────────────────────────────────────────────────────────
output "gke_cluster_endpoint" {
description = "GKE cluster API endpoint — use as destination.server in Argo CD Applications"
value = "https://${google_container_cluster.neuron_platform.endpoint}"
sensitive = true
}
output "gke_cluster_name" {
description = "GKE cluster name"
value = google_container_cluster.neuron_platform.name
}
output "gke_workload_pool" {
description = "Workload Identity pool — used in Workload Identity bindings"
value = "${var.project_id}.svc.id.goog"
}
@@ -1,26 +0,0 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: docuseal-gke
namespace: argocd
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
targetRevision: main
path: servers/gcp/k8s/docuseal
destination:
server: https://34.63.89.52
namespace: docuseal
ignoreDifferences:
- group: apps
kind: Deployment
jsonPointers:
- /status
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true
- ServerSideApply=true
@@ -1,46 +0,0 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: gitea-gke
namespace: argocd
annotations:
# Apply to the Legion Argo CD instance after registering the GKE cluster.
# See vault-gke.yaml for the cluster registration steps.
#
# Migration checklist (Gitea from Legion to GKE):
# 1. terraform apply (creates GKE cluster, Cloud SQL gitea DB, secrets)
# 2. Register GKE cluster with Legion Argo CD (see vault-gke.yaml)
# 3. Install ESO on GKE: helm install external-secrets external-secrets/external-secrets
# --namespace external-secrets --create-namespace
# 4. Apply this Application to Legion Argo CD
# 5. Verify gitea pod is running on GKE with DB connectivity
# 6. Take a tar of /data from the Legion Gitea pod and restore to GKE PVC
# 7. Update Cloudflare Tunnel on Legion: remove git.neuralplatform.ai route
# 8. Add GKE ingress / GCP LB rule for git.neuralplatform.ai
# 9. Decommission Gitea on Legion (remove gitea*.yaml from servers/legion/apps/)
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
targetRevision: main
path: servers/gcp/k8s/gitea
destination:
# Replace GKE_CLUSTER_ENDPOINT after `terraform apply`:
# terraform -chdir=servers/gcp output -raw gke_cluster_endpoint
server: https://34.63.89.52
namespace: gitea
# terminatingReplicas is a newer Deployment status field that Argo CD's
# bundled schema doesn't know about — causes ComparisonError during diff.
# Ignoring /status entirely is safe: Argo CD never manages status fields.
ignoreDifferences:
- group: apps
kind: Deployment
jsonPointers:
- /status
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true
- ServerSideApply=true
@@ -1,26 +0,0 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: gitea-runner-gke
namespace: argocd
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
targetRevision: main
path: servers/gcp/k8s/gitea-runner
destination:
server: https://34.63.89.52
namespace: ci
ignoreDifferences:
- group: apps
kind: Deployment
jsonPointers:
- /status
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true
- ServerSideApply=true
@@ -1,33 +0,0 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: vault-gke
namespace: argocd
annotations:
# Syncs plain k8s manifests for Vault to the GKE cluster.
# The Vault Helm release itself is in vault-helm-gke.yaml.
#
# Apply to Legion Argo CD after registering the GKE cluster:
# 1. gcloud container clusters get-credentials neuron-platform \
# --region us-central1 --project neuron-785695
# 2. argocd cluster add <context-name> --name gke-neuron-platform
# 3. Update GKE_CLUSTER_ENDPOINT in all argocd-apps/*.yaml:
# terraform -chdir=servers/gcp output -raw gke_cluster_endpoint
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
targetRevision: main
path: servers/gcp/k8s/vault
destination:
# Replace GKE_CLUSTER_ENDPOINT after `terraform apply`:
# terraform -chdir=servers/gcp output -raw gke_cluster_endpoint
server: https://34.63.89.52
namespace: vault
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true
- ServerSideApply=true
@@ -1,175 +0,0 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: vault-helm-gke
namespace: argocd
annotations:
# Deploys the Vault Helm chart to the GKE cluster via Legion Argo CD.
# The destination.server must be updated after `terraform apply`:
# terraform -chdir=servers/gcp output -raw gke_cluster_endpoint
spec:
project: default
source:
repoURL: https://helm.releases.hashicorp.com
chart: vault
targetRevision: "0.29.1"
helm:
values: |
global:
enabled: true
injector:
enabled: false
ui:
enabled: true
server:
image:
repository: hashicorp/vault
tag: "1.19.2"
# Workload Identity — Vault pod k8s SA impersonates vault-unseal GCP SA
# for KMS auto-unseal. Binding is in servers/gcp/gke.tf.
serviceAccount:
create: true
name: vault
annotations:
iam.gke.io/gcp-service-account: vault-unseal@neuron-785695.iam.gserviceaccount.com
# GKE Autopilot: no privileged containers. Vault doesn't need privilege.
# Request IPC_LOCK so Vault can lock memory (prevents secrets swap).
securityContext:
capabilities:
add:
- IPC_LOCK
# Single replica for now — raft standby unsealing requires the barrier
# key to be shared over port 8201 (cluster port), but 8201 only opens
# after unsealing (deadlock). Running solo on pod 0 until we resolve
# the standby bootstrap path. Scale back to 3 once fixed.
ha:
enabled: true
replicas: 1
raft:
enabled: true
setNodeId: true
config: |
ui = true
listener "tcp" {
tls_disable = 1
address = "[::]:8200"
cluster_address = "[::]:8201"
}
storage "raft" {
path = "/vault/data"
retry_join {
leader_api_addr = "http://vault-helm-gke-0.vault-helm-gke-internal:8200"
}
retry_join {
leader_api_addr = "http://vault-helm-gke-1.vault-helm-gke-internal:8200"
}
retry_join {
leader_api_addr = "http://vault-helm-gke-2.vault-helm-gke-internal:8200"
}
}
seal "gcpckms" {
project = "neuron-785695"
region = "global"
key_ring = "vault"
crypto_key = "vault-unseal"
}
telemetry {
prometheus_retention_time = "30s"
disable_hostname = false
}
# Spread pods across GKE zones
topologySpreadConstraints:
- maxSkew: 1
topologyKey: topology.kubernetes.io/zone
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app.kubernetes.io/name: vault
component: server
# 10Gi SSD per pod — premium-rwo = pd-ssd on GKE Autopilot
dataStorage:
enabled: true
size: 10Gi
storageClass: standard-rwo
accessMode: ReadWriteOnce
# Annotations on the pod template — bumping rollme triggers a
# StatefulSet rolling restart (pods 2→1→0) to pick up the
# corrected raft retry_join addresses.
podAnnotations:
rollme: "2026-05-05-raft-join-fix"
readinessProbe:
enabled: true
# standbyok=true: standby pods pass (they serve reads).
# sealedok removed: sealed/uninit pods fail readiness so they
# leave the service endpoints and stop receiving external traffic.
path: "/v1/sys/health?standbyok=true"
initialDelaySeconds: 5
periodSeconds: 5
failureThreshold: 3
livenessProbe:
enabled: true
# Liveness keeps sealedok=true so sealed pods aren't killed —
# they need time to join raft and auto-unseal.
path: "/v1/sys/health?standbyok=true&sealedok=true&uninitcode=200"
initialDelaySeconds: 60
periodSeconds: 10
failureThreshold: 3
# GKE Autopilot requires resource requests on all containers
resources:
requests:
memory: 256Mi
cpu: 500m
limits:
memory: 512Mi
cpu: 1000m
service:
enabled: true
type: ClusterIP
port: 8200
targetPort: 8200
annotations:
# Container-native NEG for GCP Global HTTPS LB backend cutover.
# GKE will create a zonal NEG named k8s1-<hash>-vault-vault-helm-gke-8200-<hash>
# in each zone where Vault pods are scheduled.
# After this syncs, list NEGs:
# gcloud compute network-endpoint-groups list --filter="name~vault" --project neuron-785695
# Then reference them in servers/gcp/vault-gke-lb.tf.
cloud.google.com/neg: '{"exposed_ports":{"8200":{}}}'
# Ingress disabled — Vault is exposed via GCP HTTPS LB.
# After migration, update the existing LB backend (vault-nodes.tf)
# to target a GKE NEG instead of the GCE instance groups.
# See: https://cloud.google.com/kubernetes-engine/docs/how-to/standalone-neg
ingress:
enabled: false
destination:
# Replace GKE_CLUSTER_ENDPOINT after `terraform apply`:
# terraform -chdir=servers/gcp output -raw gke_cluster_endpoint
server: https://34.63.89.52
namespace: vault
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true
- ServerSideApply=true
-101
View File
@@ -1,101 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: docuseal
namespace: docuseal
labels:
app: docuseal
spec:
replicas: 1
# Recreate — only one pod can hold the RWO PVC at a time.
strategy:
type: Recreate
selector:
matchLabels:
app: docuseal
template:
metadata:
labels:
app: docuseal
spec:
serviceAccountName: docuseal
containers:
- name: docuseal
image: ee.docuseal.com/lip5viwf/ds-ee:latest
ports:
- containerPort: 3000
env:
- name: SECRET_KEY_BASE
valueFrom:
secretKeyRef:
name: docuseal-secrets
key: secret_key_base
- name: FORCE_SSL
value: "false"
- name: HOST
value: "sign.neurontechnologies.ai"
- name: SMTP_ADDRESS
valueFrom:
secretKeyRef:
name: docuseal-secrets
key: smtp_host
- name: SMTP_PORT
valueFrom:
secretKeyRef:
name: docuseal-secrets
key: smtp_port
- name: SMTP_USERNAME
valueFrom:
secretKeyRef:
name: docuseal-secrets
key: smtp_username
- name: SMTP_PASSWORD
valueFrom:
secretKeyRef:
name: docuseal-secrets
key: smtp_password
- name: SMTP_FROM
valueFrom:
secretKeyRef:
name: docuseal-secrets
key: smtp_from
volumeMounts:
- name: data
mountPath: /data
readinessProbe:
httpGet:
path: /
port: 3000
initialDelaySeconds: 15
periodSeconds: 10
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
cpu: 500m
memory: 512Mi
volumes:
- name: data
persistentVolumeClaim:
claimName: docuseal-data
---
apiVersion: v1
kind: Service
metadata:
name: docuseal
namespace: docuseal
annotations:
# External GCP Network LB — Cloudflare proxies in front, provides TLS.
# After provisioning, get the external IP:
# kubectl -n docuseal get svc docuseal
# Update Cloudflare DNS A record for sign.neurontechnologies.ai to this IP.
cloud.google.com/load-balancer-type: "External"
spec:
selector:
app: docuseal
ports:
- name: http
port: 80
targetPort: 3000
type: LoadBalancer
@@ -1,56 +0,0 @@
---
# SecretStore for GKE — uses GCP Secret Manager directly via Workload Identity.
# The DocuSeal GCP SA has secretmanager.secretAccessor on its own secrets
# (see servers/gcp/docuseal.tf).
apiVersion: external-secrets.io/v1
kind: SecretStore
metadata:
name: gcp-secretmanager
namespace: docuseal
spec:
provider:
gcpsm:
projectID: neuron-785695
# Workload Identity — ESO impersonates the docuseal-gke GCP SA to access
# Secret Manager. No JSON key file required.
auth:
workloadIdentity:
clusterLocation: us-central1
clusterName: neuron-platform
serviceAccountRef:
name: docuseal
namespace: docuseal
---
# Pull all DocuSeal secrets from GCP Secret Manager into a single k8s Secret.
apiVersion: external-secrets.io/v1
kind: ExternalSecret
metadata:
name: docuseal-secrets
namespace: docuseal
spec:
refreshInterval: 1h
secretStoreRef:
name: gcp-secretmanager
kind: SecretStore
target:
name: docuseal-secrets
creationPolicy: Owner
data:
- secretKey: secret_key_base
remoteRef:
key: docuseal-secret-key-base
- secretKey: smtp_host
remoteRef:
key: docuseal-smtp-host
- secretKey: smtp_port
remoteRef:
key: docuseal-smtp-port
- secretKey: smtp_username
remoteRef:
key: docuseal-smtp-username
- secretKey: smtp_password
remoteRef:
key: docuseal-smtp-password
- secretKey: smtp_from
remoteRef:
key: docuseal-smtp-from
-6
View File
@@ -1,6 +0,0 @@
apiVersion: v1
kind: Namespace
metadata:
name: docuseal
labels:
app.kubernetes.io/name: docuseal
-12
View File
@@ -1,12 +0,0 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: docuseal-data
namespace: docuseal
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 5Gi
storageClassName: standard-rwo
@@ -1,10 +0,0 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: docuseal
namespace: docuseal
annotations:
# Workload Identity — allows ESO to authenticate to GCP Secret Manager
# as the docuseal-gke GCP SA without a JSON key file.
# The GCP SA binding is in servers/gcp/docuseal.tf (docuseal_workload_identity).
iam.gke.io/gcp-service-account: docuseal-gke@neuron-785695.iam.gserviceaccount.com
@@ -1,116 +0,0 @@
---
# Gitea Actions runner on GKE Autopilot.
#
# Architecture:
# - init container registers the runner with Gitea (--no-interactive, idempotent)
# - main container runs act_runner daemon
#
# LIMITATION: GKE Autopilot does not allow privileged containers, so Docker-in-Docker
# (DinD) is not available. The runner uses "host" label mode, which runs steps
# directly inside the runner pod container rather than spawning Docker containers.
# forgejo-runner v11 still requires a Docker daemon to start — this deployment
# will fail until one of the following is resolved:
#
# Option A (recommended): Switch to a GKE Standard node pool for the ci namespace.
# Add a standard node pool with sandbox.config.sandboxType=gvisor or allow
# privileged pods on specific node pools.
#
# Option B: Use Cloud Build for docker build steps; use this runner for shell steps.
# Requires forking all workflow files to remove docker steps.
#
# Option C: Point docker_host at an external Docker TCP daemon (e.g., on the GCE runner VM).
#
# Until resolved, the existing GCE VM runner (gitea-runner-1 in us-central1-a)
# handles CI jobs. This deployment is a placeholder for when privileged pods are available.
#
# The ci namespace has pod-security.kubernetes.io/enforce: privileged set
# (see namespace.yaml) but Autopilot's Warden enforces this at the node level.
apiVersion: apps/v1
kind: Deployment
metadata:
name: gitea-runner
namespace: ci
labels:
app: gitea-runner
spec:
# Set to 0 until the DinD/privileged-pod blocker is resolved (see comments above).
# Change to 1 when a Docker daemon is available (Standard node pool or external TCP).
replicas: 0
selector:
matchLabels:
app: gitea-runner
template:
metadata:
labels:
app: gitea-runner
spec:
serviceAccountName: gitea-runner
securityContext:
runAsNonRoot: false
initContainers:
- name: register
image: us-central1-docker.pkg.dev/neuron-785695/neuron-ci/ci-base:latest
command: ["/bin/sh", "-c"]
args:
- |
# Idempotent registration — skip if already registered (.runner file exists).
if [ -f /data/.runner ]; then
echo "Runner already registered, skipping."
else
act_runner register \
--instance "${GITEA_INSTANCE_URL}" \
--token "${GITEA_RUNNER_REGISTRATION_TOKEN}" \
--name "gke-runner-$(hostname)" \
--labels "ubuntu-latest:host,ubuntu-24.04:host,linux,x64" \
--no-interactive
fi
cat > /data/config.yaml << 'CONFIGEOF'
runner:
capacity: 2
timeout: 3h
envs:
BASH_ENV: "/usr/local/bin/git-ssh-init.sh"
container:
# GKE Autopilot blocks privileged DinD — steps run directly
# in the runner container (host mode). Labels use :host suffix.
network: host
docker_host: "-"
force_pull: false
valid_volumes: []
CONFIGEOF
env:
# Use the internal Gitea service to avoid Cloudflare Access auth.
# Gitea is in the same GKE cluster — use the ClusterIP service.
- name: GITEA_INSTANCE_URL
value: "http://gitea.gitea.svc.cluster.local:80"
envFrom:
- secretRef:
name: gitea-runner-token
volumeMounts:
- mountPath: /data
name: data
workingDir: /data
containers:
- name: runner
image: us-central1-docker.pkg.dev/neuron-785695/neuron-ci/ci-base:latest
command: ["act_runner", "daemon", "--config", "/data/config.yaml"]
env:
- name: GITEA_INSTANCE_URL
value: "http://gitea.gitea.svc.cluster.local:80"
envFrom:
- secretRef:
name: gitea-runner-token
resources:
requests:
cpu: "500m"
memory: "512Mi"
limits:
cpu: "4"
memory: "4Gi"
volumeMounts:
- mountPath: /data
name: data
workingDir: /data
volumes:
- name: data
emptyDir: {}
@@ -1,39 +0,0 @@
---
# SecretStore for the CI namespace — uses GCP Secret Manager via Workload Identity.
# The gitea-runner-gke GCP SA has secretmanager.secretAccessor on gitea-runner-token
# (see servers/gcp/gitea-runner.tf).
apiVersion: external-secrets.io/v1
kind: SecretStore
metadata:
name: gcp-secretmanager
namespace: ci
spec:
provider:
gcpsm:
projectID: neuron-785695
auth:
workloadIdentity:
clusterLocation: us-central1
clusterName: neuron-platform
serviceAccountRef:
name: gitea-runner
namespace: ci
---
# Pull the Gitea runner registration token from Secret Manager into a k8s Secret.
apiVersion: external-secrets.io/v1
kind: ExternalSecret
metadata:
name: gitea-runner-token
namespace: ci
spec:
refreshInterval: 1h
secretStoreRef:
name: gcp-secretmanager
kind: SecretStore
target:
name: gitea-runner-token
creationPolicy: Owner
data:
- secretKey: GITEA_RUNNER_REGISTRATION_TOKEN
remoteRef:
key: gitea-runner-token
@@ -1,8 +0,0 @@
apiVersion: v1
kind: Namespace
metadata:
name: ci
labels:
app.kubernetes.io/name: ci
# DinD (Docker-in-Docker) requires privileged pods.
pod-security.kubernetes.io/enforce: privileged
@@ -1,11 +0,0 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: gitea-runner
namespace: ci
annotations:
# Workload Identity — allows ESO (and optionally the runner pod) to
# authenticate to GCP Secret Manager as the gitea-runner-gke GCP SA
# without a JSON key file.
# The GCP SA binding is in servers/gcp/gitea-runner.tf (gitea_runner_gke_workload_identity).
iam.gke.io/gcp-service-account: gitea-runner-gke@neuron-785695.iam.gserviceaccount.com
-104
View File
@@ -1,104 +0,0 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: gitea-custom-css
namespace: gitea
data:
header.tmpl: |
<link rel="stylesheet" href="/assets/css/custom.css">
custom.css: |
/* ── Typography & base ── */
:root {
--color-primary: #6366f1;
--color-primary-dark: #4f46e5;
--color-secondary: #8b5cf6;
--color-bg: #0f0f17;
--color-surface: #16161f;
--color-surface-2: #1e1e2e;
--color-border: #2a2a3d;
--color-text: #e2e2f0;
--color-text-muted: #8888aa;
--color-green: #22d3a5;
--radius: 10px;
}
body {
background: var(--color-bg) !important;
color: var(--color-text) !important;
font-family: "Inter", -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif !important;
}
/* ── Top navbar ── */
.navbar, #navbar {
background: var(--color-surface) !important;
border-bottom: 1px solid var(--color-border) !important;
box-shadow: 0 1px 12px rgba(0,0,0,0.4) !important;
}
.navbar .brand svg, .navbar .brand img { filter: brightness(1.2); }
/* ── Sidebar & panels ── */
.repository, .ui.container, .ui.segment,
.ui.card, .ui.cards > .card {
background: var(--color-surface) !important;
border: 1px solid var(--color-border) !important;
border-radius: var(--radius) !important;
}
/* ── Buttons ── */
.ui.primary.button, .ui.green.button {
background: var(--color-primary) !important;
border: none !important;
border-radius: 8px !important;
}
.ui.primary.button:hover { background: var(--color-primary-dark) !important; }
/* ── Inputs ── */
input, textarea, select,
.ui.input > input, .ui.dropdown {
background: var(--color-surface-2) !important;
border: 1px solid var(--color-border) !important;
color: var(--color-text) !important;
border-radius: 8px !important;
}
/* ── Sign-in page ── */
.user.signin .ui.segment,
.user.signup .ui.segment {
background: var(--color-surface) !important;
border: 1px solid var(--color-border) !important;
border-radius: 16px !important;
box-shadow: 0 8px 32px rgba(0,0,0,0.5) !important;
padding: 2.5rem !important;
}
/* ── Repo file tree ── */
.repository.file.list .file-list {
background: var(--color-surface) !important;
border-radius: var(--radius) !important;
border: 1px solid var(--color-border) !important;
}
.repository.file.list .file-list tr:hover td {
background: var(--color-surface-2) !important;
}
/* ── Labels & badges ── */
.ui.label { border-radius: 6px !important; }
/* ── Dashboard activity feed ── */
.feeds .news { border-bottom: 1px solid var(--color-border) !important; }
/* ── Code blocks ── */
pre, code {
background: var(--color-surface-2) !important;
border: 1px solid var(--color-border) !important;
border-radius: 6px !important;
}
/* ── Muted footer ── */
#footer {
background: var(--color-surface) !important;
border-top: 1px solid var(--color-border) !important;
color: var(--color-text-muted) !important;
}
-163
View File
@@ -1,163 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: gitea
namespace: gitea
labels:
app: gitea
spec:
replicas: 1
# Recreate — only one pod can hold the RWO PVC at a time.
# Scale to multiple replicas only after adding shared storage (e.g. Filestore).
strategy:
type: Recreate
selector:
matchLabels:
app: gitea
template:
metadata:
labels:
app: gitea
spec:
serviceAccountName: gitea
containers:
- name: gitea
image: gitea/gitea:1.25.5
ports:
- name: http
containerPort: 3000
- name: ssh
containerPort: 22
env:
# Database — connect through the Cloud SQL Auth Proxy unix socket
- name: GITEA__database__DB_TYPE
value: postgres
# Unix socket path used by the Cloud SQL Auth Proxy sidecar
- name: GITEA__database__HOST
value: /cloudsql/neuron-785695:us-central1:neuron-prod-pg15
- name: GITEA__database__NAME
value: gitea
- name: GITEA__database__USER
value: gitea
- name: GITEA__database__PASSWD
valueFrom:
secretKeyRef:
name: gitea-db
key: password
# Server
- name: GITEA__server__DOMAIN
value: git.neuralplatform.ai
- name: GITEA__server__ROOT_URL
value: https://git.neuralplatform.ai
- name: GITEA__server__SSH_DOMAIN
value: git.neuralplatform.ai
- name: GITEA__server__SSH_PORT
value: "22"
- name: GITEA__server__START_SSH_SERVER
value: "false"
# Service
- name: GITEA__service__DISABLE_REGISTRATION
value: "true"
- name: GITEA__service__REQUIRE_SIGNIN_VIEW
value: "false"
# Security
- name: GITEA__security__INSTALL_LOCK
value: "true"
# Packages
- name: GITEA__packages__ENABLED
value: "true"
# Webhooks — allow calls back into the cluster and GKE VPC
- name: GITEA__webhook__ALLOWED_HOST_LIST
value: 10.0.0.0/8,172.16.0.0/12,192.168.0.0/16
# Actions
- name: GITEA__actions__ENABLED
value: "true"
- name: GITEA__actions__DEFAULT_ACTIONS_URL
value: https://code.forgejo.org
volumeMounts:
- name: data
mountPath: /data
- name: cloudsql-socket
mountPath: /cloudsql
- name: custom-css
mountPath: /data/gitea/custom/public/assets/css/custom.css
subPath: custom.css
- name: custom-css
mountPath: /data/gitea/custom/templates/custom/header.tmpl
subPath: header.tmpl
resources:
requests:
memory: 256Mi
cpu: 100m
limits:
memory: 512Mi
cpu: 500m
readinessProbe:
httpGet:
path: /
port: 3000
initialDelaySeconds: 10
periodSeconds: 10
failureThreshold: 3
livenessProbe:
httpGet:
path: /
port: 3000
initialDelaySeconds: 30
periodSeconds: 15
failureThreshold: 3
# Cloud SQL Auth Proxy sidecar — provides unix socket at /cloudsql/
# Authenticates to Cloud SQL using Workload Identity (no key file).
- name: cloud-sql-proxy
image: gcr.io/cloud-sql-connectors/cloud-sql-proxy:2
args:
- "--structured-logs"
- "--unix-socket=/cloudsql"
- "neuron-785695:us-central1:neuron-prod-pg15"
securityContext:
runAsNonRoot: true
volumeMounts:
- name: cloudsql-socket
mountPath: /cloudsql
resources:
requests:
memory: 32Mi
cpu: 10m
limits:
memory: 128Mi
cpu: 100m
volumes:
- name: data
persistentVolumeClaim:
claimName: gitea-data
- name: cloudsql-socket
emptyDir: {}
- name: custom-css
configMap:
name: gitea-custom-css
---
apiVersion: v1
kind: Service
metadata:
name: gitea
namespace: gitea
annotations:
# External GCP Network LB — Cloudflare proxies in front, provides TLS.
# After provisioning, get the external IP:
# kubectl --context=gke_neuron-785695_us-central1_neuron-platform -n gitea get svc gitea
# Update Cloudflare DNS A record for git.neuralplatform.ai to this IP.
cloud.google.com/load-balancer-type: "External"
spec:
selector:
app: gitea
ports:
- name: http
port: 80
targetPort: 3000
- name: ssh
port: 22
targetPort: 22
type: LoadBalancer
@@ -1,75 +0,0 @@
---
# SecretStore for GKE — uses GCP Secret Manager directly via Workload Identity.
# On GKE we use the GCP provider instead of a Vault-backed store, since
# Vault itself may be in the process of being migrated.
# The Gitea GCP SA has secretmanager.secretAccessor on its own secret (see cloud-sql.tf).
#
# Pre-requisite: install ESO on GKE before applying this:
# helm install external-secrets external-secrets/external-secrets \
# --namespace external-secrets --create-namespace
apiVersion: external-secrets.io/v1
kind: SecretStore
metadata:
name: gcp-secretmanager
namespace: gitea
spec:
provider:
gcpsm:
projectID: neuron-785695
# Workload Identity — ESO impersonates the gitea-gke GCP SA to access
# Secret Manager. The gitea SA has secretAccessor on gitea-db-password.
auth:
workloadIdentity:
clusterLocation: us-central1
clusterName: neuron-platform
serviceAccountRef:
name: gitea
namespace: gitea
---
# gitea-db — Gitea database password, pulled from GCP Secret Manager.
# The Secret Manager secret "gitea-database-url" stores the full DSN, but we
# extract just the password field for use in GITEA__database__PASSWD.
#
# The full DSN format from Terraform:
# host=/cloudsql/<conn> user=gitea password=<pw> dbname=gitea sslmode=disable
#
# ESO extracts the raw secret value. Since GCP Secret Manager stores the full
# DSN as a single string, we store the password separately as "gitea-db-password"
# so Gitea can receive it as a discrete env var.
#
# Bootstrap: after `terraform apply`, run:
# PASSWORD=$(gcloud secrets versions access latest --secret=gitea-database-url \
# | grep -oP '(?<=password=)\S+')
# echo -n "$PASSWORD" | gcloud secrets create gitea-db-password \
# --data-file=- --project=neuron-785695
#
# Or simpler — let Terraform write it directly. The gitea-db-password secret
# is managed by the gitea-database-url secret version output. Use the full DSN
# secret and parse in-pod, or store password separately.
#
# For simplicity: pull the full DSN and use it as GITEA__database__PASSWD
# is wrong (it's a DSN, not a password). Instead, use the Cloud SQL proxy
# unix socket and no password — configure Gitea to use peer auth.
#
# ACTUAL APPROACH: ExternalSecret pulls the full DSN string into a k8s Secret
# key "dsn". A separate gitea-db secret provides just the password field.
# Terraform outputs both; add a gitea-db-password Secret Manager secret in cloud-sql.tf.
apiVersion: external-secrets.io/v1
kind: ExternalSecret
metadata:
name: gitea-db
namespace: gitea
spec:
refreshInterval: 1h
secretStoreRef:
name: gcp-secretmanager
kind: SecretStore
target:
name: gitea-db
creationPolicy: Owner
data:
- secretKey: password
remoteRef:
# This secret is populated by Terraform (gitea-db-password in cloud-sql.tf)
# It contains just the raw database password (no DSN prefix).
key: gitea-db-password
-6
View File
@@ -1,6 +0,0 @@
apiVersion: v1
kind: Namespace
metadata:
name: gitea
labels:
app.kubernetes.io/name: gitea
-14
View File
@@ -1,14 +0,0 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: gitea-data
namespace: gitea
spec:
# standard-rwo = pd-balanced on GKE Autopilot (ReadWriteOnce)
# Use premium-rwo (pd-ssd) if repo performance becomes a bottleneck.
storageClassName: standard-rwo
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 50Gi
-10
View File
@@ -1,10 +0,0 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: gitea
namespace: gitea
annotations:
# Workload Identity — allows the Cloud SQL Auth Proxy sidecar to authenticate
# to Cloud SQL as the gitea GCP SA without a JSON key file.
# The GCP SA binding is in servers/gcp/gke.tf (gitea_workload_identity).
iam.gke.io/gcp-service-account: gitea-gke@neuron-785695.iam.gserviceaccount.com
-6
View File
@@ -1,6 +0,0 @@
apiVersion: v1
kind: Namespace
metadata:
name: vault
labels:
app.kubernetes.io/name: vault
+11 -3
View File
@@ -20,9 +20,17 @@ terraform {
}
}
backend "gcs" {
bucket = "neuron-785695-terraform-state"
prefix = "gcp"
backend "s3" {
bucket = "legion-terraform-state"
key = "gcp/terraform.tfstate"
region = "auto"
endpoint = "https://651161e0a3d321561b4c90b5bcd5f15b.r2.cloudflarestorage.com"
# R2 is S3-compatible but not AWS — skip AWS-specific checks
skip_credentials_validation = true
skip_metadata_api_check = true
skip_region_validation = true
force_path_style = true
}
}
+3 -39
View File
@@ -15,41 +15,9 @@ set -euxo pipefail
exec > >(tee /var/log/runner-bootstrap.log) 2>&1
apt-get update
# Core system tools and C build dependencies required by CI jobs.
# libcurl4-openssl-dev + build-essential are needed by the El compiler C build;
# libssl-dev/libsqlite3-dev/libpq-dev for downstream projects.
# python3 is for the inline label-rewrite step below.
apt-get install -y --no-install-recommends \
curl \
ca-certificates \
docker.io \
git \
jq \
python3 \
wget \
unzip \
zip \
xz-utils \
rsync \
file \
sudo \
make \
build-essential \
pkg-config \
gcc \
libcurl4-openssl-dev \
libssl-dev \
libsqlite3-dev \
libpq-dev \
libffi-dev \
zlib1g-dev
# Node.js 20 LTS via NodeSource — Ubuntu 24.04's bundled nodejs is 18.x
# which works but 20 LTS matches what our other CI images use.
curl -fsSL https://deb.nodesource.com/setup_20.x | bash -
apt-get install -y --no-install-recommends nodejs
npm install -g yarn
# nodejs/npm needed for JavaScript actions like actions/checkout and
# google-github-actions/auth. python3 is for our inline label-rewrite below.
apt-get install -y curl ca-certificates docker.io git jq nodejs npm python3
# Make docker usable by the unprivileged runner user
systemctl enable --now docker
@@ -57,10 +25,6 @@ systemctl enable --now docker
useradd -m -s /bin/bash runner || true
usermod -aG docker runner
# Allow the runner user to install packages and run system commands
# in CI workflow steps without a password prompt.
echo "runner ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
# act_runner — pinned to a known-good release. Bump RUNNER_VERSION when
# upgrading. The project moved from gitea/act_runner to gitea/runner around
# the 0.6.x series; the binary inside the asset is still called act_runner.
+226 -45
View File
@@ -1,49 +1,204 @@
# ── Vault LB — GCP Global HTTPS LB ───────────────────────────────────────────
# ── Vault HA Cluster — GCE-based Raft ────────────────────────────────────────
#
# LB frontend is unchanged: forwarding rule → target HTTPS proxy → url map → backend.
# Backend uses GKE container-native NEGs (cut over from GCE instance groups).
# Three GCE e2-small VMs across us-central1-{a,b,c} running HashiCorp Vault
# in Raft HA mode. Auto-unseal via the existing GCP KMS key (vault-kms.tf).
#
# Architecture (GKE):
# - Vault Helm chart runs 3 pods in namespace: vault on GKE neuron-platform cluster
# - GKE auto-creates zonal container-native NEGs via Service annotation:
# cloud.google.com/neg: '{"exposed_ports":{"8200":{}}}'
# - NEG backends replace GCE instance group backends in the Global HTTPS LB
# - DNS unchanged: vault.neuralplatform.ai → 34.54.164.21 (same GCP Global LB IP)
# Architecture:
# - vault-node-1 (us-central1-a) — bootstrapped first, others join via retry_join
# - vault-node-2 (us-central1-b)
# - vault-node-3 (us-central1-c)
# - Each VM gets the vault-node SA attached for KMS auto-unseal + ADC
# - Internal traffic: VMs talk Raft over port 8201 on GCP internal IPs
# - External access: HTTPS regional LB → port 8200
# (vault.neuralplatform.ai Cloudflare DNS → GCP LB IP)
#
# GCE nodes (vault-node-1/2/3) and their instance groups have been removed.
# GKE Vault (via Workload Identity + vault-unseal SA) is the sole Vault backend.
# Ops SSH:
# gcloud compute ssh vault-node-1 --zone=us-central1-a --tunnel-through-iap
#
# After first boot, initialize Vault on node-1:
# vault operator init (save the root token + recovery keys securely)
# Nodes 2 and 3 auto-join via retry_join once they boot.
locals {
# Container-native NEG names created by GKE from the Vault Service annotation:
# cloud.google.com/neg: '{"exposed_ports":{"8200":{}}}'
# GKE creates one NEG per zone where Vault pods are scheduled.
# Confirmed with: gcloud compute network-endpoint-groups list --project neuron-785695
vault_neg_names = {
"us-central1-b" = "k8s1-bfbeff02-vault-vault-helm-gke-8200-db1a474f"
"us-central1-c" = "k8s1-bfbeff02-vault-vault-helm-gke-8200-db1a474f"
"us-central1-f" = "k8s1-bfbeff02-vault-vault-helm-gke-8200-db1a474f"
vault_version = "1.19.2"
vault_nodes = {
"vault-node-1" = { zone = "us-central1-a", node_id = "vault-node-1" }
"vault-node-2" = { zone = "us-central1-b", node_id = "vault-node-2" }
"vault-node-3" = { zone = "us-central1-c", node_id = "vault-node-3" }
}
}
# ── GKE container-native NEG data sources ────────────────────────────────────
# GKE auto-creates these when the Vault Service annotation is applied.
# ── Service Account for Vault nodes ──────────────────────────────────────────
# Reuses the vault-unseal SA from vault-kms.tf for KMS access.
# Additional roles: logging + metrics from ops agent.
data "google_compute_network_endpoint_group" "vault_gke" {
for_each = local.vault_neg_names
name = each.value
zone = each.key
project = var.project_id
resource "google_project_iam_member" "vault_node_log_writer" {
project = var.project_id
role = "roles/logging.logWriter"
member = "serviceAccount:${google_service_account.vault_unseal.email}"
}
# ── Firewall — GKE health checks ─────────────────────────────────────────────
# Allow GCP health check probers to reach pod IPs on port 8200.
# Container-native NEGs direct health checks to pod IPs (not node IPs).
# GKE Autopilot pod CIDR: 10.45.128.0/22 (from cluster ip_allocation_policy).
# Without this rule, GCP LB health checks fail → "no healthy upstream".
# No target_tags — applies to all instances/pods in the default network.
# Safe: GCP health check ranges (130.211.0.0/22, 35.191.0.0/16) are GCP-internal only.
resource "google_compute_firewall" "vault_api_gke" {
name = "vault-api-from-lb-gke"
resource "google_project_iam_member" "vault_node_metric_writer" {
project = var.project_id
role = "roles/monitoring.metricWriter"
member = "serviceAccount:${google_service_account.vault_unseal.email}"
}
# Allow Vault nodes to read their own instance metadata (needed for GCP auth method later)
resource "google_project_iam_member" "vault_node_compute_viewer" {
project = var.project_id
role = "roles/compute.viewer"
member = "serviceAccount:${google_service_account.vault_unseal.email}"
}
# ── Startup script staged in GCS ─────────────────────────────────────────────
# Stored in the existing runner-assets bucket (reuse infrastructure).
# The script installs Vault, writes the config, and starts the systemd unit.
resource "google_storage_bucket_object" "vault_startup" {
name = "vault/startup.sh"
bucket = google_storage_bucket.runner_assets.name
source = "${path.module}/vault/startup.sh"
metadata = {
sha256 = filesha256("${path.module}/vault/startup.sh")
}
}
resource "google_storage_bucket_iam_member" "vault_node_bucket_read" {
bucket = google_storage_bucket.runner_assets.name
role = "roles/storage.objectViewer"
member = "serviceAccount:${google_service_account.vault_unseal.email}"
}
# ── Vault node VMs ────────────────────────────────────────────────────────────
resource "google_compute_instance" "vault_node" {
for_each = local.vault_nodes
name = each.key
machine_type = "e2-small"
zone = each.value.zone
project = var.project_id
tags = ["vault-node", "allow-iap-ssh"]
boot_disk {
initialize_params {
image = "projects/debian-cloud/global/images/family/debian-12"
size = 20
type = "pd-balanced"
}
}
# Separate persistent disk for Raft data — survives VM recreation
attached_disk {
source = google_compute_disk.vault_data[each.key].self_link
device_name = "vault-data"
mode = "READ_WRITE"
}
network_interface {
network = "default"
# No external IP — accessed via IAP SSH and the internal LB
# Vault API is published externally via the regional LB below
access_config {}
}
service_account {
email = google_service_account.vault_unseal.email
scopes = ["cloud-platform"]
}
metadata = {
# Pull and execute the real startup script from GCS
startup-script = <<-EOT
#!/usr/bin/env bash
set -euxo pipefail
apt-get update -y
apt-get install -y curl ca-certificates apt-transport-https gnupg google-cloud-cli
gsutil cat gs://${google_storage_bucket.runner_assets.name}/${google_storage_bucket_object.vault_startup.name} \
> /tmp/vault-startup.sh
chmod +x /tmp/vault-startup.sh
VAULT_NODE_ID="${each.value.node_id}" \
VAULT_VERSION="${local.vault_version}" \
/tmp/vault-startup.sh
EOT
enable-oslogin = "TRUE"
}
allow_stopping_for_update = true
depends_on = [
google_compute_disk.vault_data,
google_storage_bucket_object.vault_startup,
google_storage_bucket_iam_member.vault_node_bucket_read,
]
}
# ── Persistent data disks ─────────────────────────────────────────────────────
# 10 GiB per node. Kept separate from the boot disk so Raft data
# survives a full VM deletion and recreation.
resource "google_compute_disk" "vault_data" {
for_each = local.vault_nodes
name = "${each.key}-data"
zone = each.value.zone
project = var.project_id
type = "pd-ssd"
size = 10
labels = {
managed-by = "terraform"
service = "vault"
node = each.key
}
lifecycle {
prevent_destroy = true
}
}
# ── Firewall rules ────────────────────────────────────────────────────────────
# IAP SSH — ops access without a public SSH port
resource "google_compute_firewall" "vault_iap_ssh" {
name = "vault-iap-ssh"
network = "default"
project = var.project_id
direction = "INGRESS"
priority = 1000
allow {
protocol = "tcp"
ports = ["22"]
}
source_ranges = ["35.235.240.0/20"] # GCP IAP CIDR
target_tags = ["vault-node"]
}
# Raft cluster traffic — node-to-node port 8201 (internal only)
resource "google_compute_firewall" "vault_raft" {
name = "vault-raft-internal"
network = "default"
project = var.project_id
direction = "INGRESS"
priority = 1000
allow {
protocol = "tcp"
ports = ["8201"]
}
source_tags = ["vault-node"]
target_tags = ["vault-node"]
}
# Vault API — allow from the GCP health check ranges and the LB
resource "google_compute_firewall" "vault_api" {
name = "vault-api-from-lb"
network = "default"
project = var.project_id
direction = "INGRESS"
@@ -54,15 +209,35 @@ resource "google_compute_firewall" "vault_api_gke" {
ports = ["8200"]
}
# GCP health check source ranges only — restricted port, low risk
source_ranges = ["130.211.0.0/22", "35.191.0.0/16"]
# GCP health check ranges (130.211.0.0/22, 35.191.0.0/16)
# and RFC1918 for any internal service access
source_ranges = ["130.211.0.0/22", "35.191.0.0/16", "10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16"]
target_tags = ["vault-node"]
}
# ── Instance Groups (one unmanaged group per zone for the LB) ─────────────────
resource "google_compute_instance_group" "vault" {
for_each = local.vault_nodes
name = "vault-${each.key}"
zone = each.value.zone
project = var.project_id
instances = [google_compute_instance.vault_node[each.key].self_link]
named_port {
name = "vault-api"
port = 8200
}
}
# ── Regional HTTPS LB for vault.neuralplatform.ai ─────────────────────────────
# Global external HTTPS LB with a Google-managed cert for vault.neuralplatform.ai.
# We use a global external HTTPS LB (same scheme as the prod marketing LB)
# so we can attach a Google-managed cert for vault.neuralplatform.ai.
#
# TLS is terminated at the LB. Vault listens on plain 8200 internally.
# Cloudflare DNS A record for vault.neuralplatform.ai → vault_lb_ip output below.
# The Cloudflare DNS A record for vault.neuralplatform.ai (neuralplatform zone)
# must point to vault_lb_ip output below — add it in Cloudflare dashboard
# or in the legion Terraform if you bring that zone under TF management.
resource "google_compute_global_address" "vault" {
name = "vault-ip"
@@ -100,17 +275,16 @@ resource "google_compute_backend_service" "vault" {
load_balancing_scheme = "EXTERNAL_MANAGED"
protocol = "HTTP" # Vault serves plain HTTP; TLS terminates at the LB
timeout_sec = 30
port_name = "vault-api"
health_checks = [google_compute_health_check.vault.self_link]
# GKE container-native NEG backends — one per zone where Vault pods are scheduled.
# RATE balancing mode is required for NEGs with EXTERNAL_MANAGED load balancers.
dynamic "backend" {
for_each = data.google_compute_network_endpoint_group.vault_gke
for_each = local.vault_nodes
content {
group = backend.value.self_link
balancing_mode = "RATE"
max_rate_per_endpoint = 100
group = google_compute_instance_group.vault[backend.key].self_link
balancing_mode = "UTILIZATION"
max_utilization = 0.8
}
}
@@ -174,3 +348,10 @@ output "vault_lb_ip" {
description = "Global IP for vault.neuralplatform.ai — set as DNS A record in Cloudflare (neuralplatform.ai zone)"
value = google_compute_global_address.vault.address
}
output "vault_node_zones" {
description = "Zone placement of each Vault node"
value = {
for k, v in local.vault_nodes : k => v.zone
}
}
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/backup
destination:
+2 -2
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/github-runner
destination:
@@ -27,7 +27,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/gitea-runner
destination:
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: neuron-prod
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/neuron-technologies/dharma
destination:
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/external-secrets
destination:
+3 -3
View File
@@ -7,7 +7,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/fornax/coordinator
destination:
@@ -29,7 +29,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/fornax/ui
destination:
@@ -51,7 +51,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/fornax/grafana
destination:
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/gitea
destination:
+165
View File
@@ -0,0 +1,165 @@
---
# Gitea CI runner — general-purpose (legion)
# Uses host Docker socket for container management and docker build/push.
apiVersion: apps/v1
kind: Deployment
metadata:
name: gitea-runner
namespace: ci
labels:
app: gitea-runner
spec:
replicas: 1
selector:
matchLabels:
app: gitea-runner
template:
metadata:
labels:
app: gitea-runner
annotations:
config-version: "2026-04-27-containerd-sock"
spec:
securityContext:
runAsNonRoot: false # act_runner needs root for container management
initContainers:
- name: register
image: registry.neuralplatform.ai/ci-base:latest
workingDir: /data
command: ["/bin/sh", "-c"]
args:
- |
act_runner register \
--instance "$GITEA_INSTANCE_URL" \
--token "$GITEA_RUNNER_REGISTRATION_TOKEN" \
--name legion \
--labels "self-hosted:docker://registry.neuralplatform.ai/ci-base:latest,ubuntu-latest:docker://registry.neuralplatform.ai/ci-base:latest,ubuntu-24.04:docker://registry.neuralplatform.ai/ci-base:latest,linux,x64" \
--no-interactive
cat > /data/config.yaml << 'EOF'
runner:
capacity: 2
timeout: 3h
container:
network: host
docker_host: "unix:///run/k3s/containerd/containerd.sock"
force_pull: false
valid_volumes: []
default_image: "registry.neuralplatform.ai/ci-base:latest"
extra_hosts:
- "gitea.git.svc.cluster.local:10.43.1.53"
EOF
envFrom:
- secretRef:
name: gitea-runner-secret
volumeMounts:
- name: data
mountPath: /data
containers:
- name: runner
image: registry.neuralplatform.ai/ci-base:latest
workingDir: /data
command: ["act_runner", "daemon", "--config", "/data/config.yaml"]
envFrom:
- secretRef:
name: gitea-runner-secret
volumeMounts:
- name: data
mountPath: /data
- name: docker-sock
mountPath: /var/run/docker.sock
resources:
requests:
memory: 512Mi
cpu: 250m
limits:
memory: 4Gi
cpu: "4"
volumes:
- name: data
emptyDir: {}
- name: docker-sock
hostPath:
path: /run/k3s/containerd/containerd.sock
type: Socket
---
# Neuron Technologies CI runner
apiVersion: apps/v1
kind: Deployment
metadata:
name: neuron-technologies-runner
namespace: ci
labels:
app: neuron-technologies-runner
spec:
replicas: 1
selector:
matchLabels:
app: neuron-technologies-runner
template:
metadata:
labels:
app: neuron-technologies-runner
annotations:
config-version: "2026-04-27-containerd-sock"
spec:
securityContext:
runAsNonRoot: false
initContainers:
- name: register
image: registry.neuralplatform.ai/ci-base:latest
workingDir: /data
command: ["/bin/sh", "-c"]
args:
- |
act_runner register \
--instance "$GITEA_INSTANCE_URL" \
--token "$GITEA_RUNNER_REGISTRATION_TOKEN" \
--name neuron-technologies \
--labels "self-hosted:docker://registry.neuralplatform.ai/ci-base:latest,ubuntu-latest:docker://registry.neuralplatform.ai/ci-base:latest,ubuntu-24.04:docker://registry.neuralplatform.ai/ci-base:latest,linux,x64" \
--no-interactive
cat > /data/config.yaml << 'EOF'
runner:
capacity: 2
timeout: 3h
container:
network: host
docker_host: "unix:///run/k3s/containerd/containerd.sock"
force_pull: false
valid_volumes: []
default_image: "registry.neuralplatform.ai/ci-base:latest"
extra_hosts:
- "gitea.git.svc.cluster.local:10.43.1.53"
EOF
envFrom:
- secretRef:
name: neuron-technologies-runner-secret
volumeMounts:
- name: data
mountPath: /data
containers:
- name: runner
image: registry.neuralplatform.ai/ci-base:latest
workingDir: /data
command: ["act_runner", "daemon", "--config", "/data/config.yaml"]
envFrom:
- secretRef:
name: neuron-technologies-runner-secret
volumeMounts:
- name: data
mountPath: /data
- name: docker-sock
mountPath: /var/run/docker.sock
resources:
requests:
memory: 512Mi
cpu: 250m
limits:
memory: 4Gi
cpu: "4"
volumes:
- name: data
emptyDir: {}
- name: docker-sock
hostPath:
path: /run/k3s/containerd/containerd.sock
type: Socket
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
labels:
app: gitea
spec:
replicas: 0 # Scaled down — Gitea has been migrated to GKE. DNS now points at GKE LB.
replicas: 1
strategy:
type: Recreate
selector:
-22
View File
@@ -1,22 +0,0 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: gke-apps
namespace: argocd
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
targetRevision: main
path: servers/gcp/k8s/argocd-apps
destination:
# This Application lives on Legion Argo CD — it creates child Applications
# there that target the GKE cluster.
server: https://kubernetes.default.svc
namespace: argocd
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/monitoring
destination:
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/mudcraft
destination:
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/neuron
destination:
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/neuron-technologies/daemon
destination:
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/neuron-technologies/dev
destination:
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/neuron-technologies/marketing
destination:
+1 -1
View File
@@ -15,7 +15,7 @@ spec:
# Only allow deployments sourced from the infrastructure repo.
sourceRepos:
- "https://git.neuralplatform.ai/will/infrastructure.git"
- "http://gitea.git.svc.cluster.local:3000/will/infrastructure.git"
- "https://code.forgejo.org"
- "registry.neuralplatform.ai"
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/neuron-technologies/prod
destination:
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/neuron-technologies/stage
destination:
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/neuron-technologies/swarm
destination:
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/neuron-technologies/tim
destination:
+1 -1
View File
@@ -12,7 +12,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/neuron-technologies/web
destination:
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/ollama
destination:
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/packages
destination:
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/postgres
destination:
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/registry
destination:
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/vault
destination:
+3 -3
View File
@@ -36,7 +36,7 @@ resource "helm_release" "argocd" {
server.insecure: true # TLS terminated at Traefik
repositories:
gitea-infrastructure:
url: https://git.neuralplatform.ai/will/infrastructure.git
url: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
name: infrastructure
type: git
YAML
@@ -96,7 +96,7 @@ resource "kubernetes_secret" "argocd_gitea_repo" {
data = {
type = "git"
url = "https://git.neuralplatform.ai/will/infrastructure.git"
url = "http://gitea.git.svc.cluster.local:3000/will/infrastructure.git"
username = "will"
password = var.gitea_api_token
}
@@ -136,7 +136,7 @@ resource "kubernetes_manifest" "argocd_root_app" {
spec = {
project = "default"
source = {
repoURL = "https://git.neuralplatform.ai/will/infrastructure.git"
repoURL = "http://gitea.git.svc.cluster.local:3000/will/infrastructure.git"
targetRevision = "main"
path = "servers/legion/apps"
}
-28
View File
@@ -23,31 +23,3 @@ resource "cloudflare_record" "np_web_stage" {
proxied = true
ttl = 1
}
# vault.neuralplatform.ai GCP Global HTTPS LB with managed cert.
# DNS-only (not proxied) GCP managed TLS cert terminates at the LB.
# Backend is now GKE container-native NEGs (cut over from GCE instance groups).
# IP: terraform output vault_lb_ip from servers/gcp workspace = 34.54.164.21
resource "cloudflare_record" "np_vault" {
zone_id = local.zone_neuralplatform_ai
name = "vault"
type = "A"
content = "34.54.164.21"
proxied = false
ttl = 60
}
# git.neuralplatform.ai Gitea on GKE via GCP Network LB.
# Cloudflare proxied (provides TLS termination). Gitea listens on port 80 internally.
# IP: external IP of the GKE LoadBalancer service in namespace gitea.
# Get it with:
# kubectl --context=gke_neuron-785695_us-central1_neuron-platform -n gitea get svc gitea
# Update content below with the actual IP, then terraform apply.
resource "cloudflare_record" "np_gitea" {
zone_id = local.zone_neuralplatform_ai
name = "git"
type = "A"
content = "34.31.145.131" # GKE LoadBalancer external IP gitea svc in namespace gitea
proxied = true
ttl = 1
}
-42
View File
@@ -1,42 +0,0 @@
# Cloudflare Zero Trust Access git.neuralplatform.ai (Gitea)
#
# The Gitea Access application itself is currently managed in the Cloudflare
# dashboard, NOT in Terraform. This file only manages the *service token* the
# Gitea Actions runners use to authenticate through CF Access while still
# keeping the human Google-OAuth gate for browser users.
#
# Why not import the application here?
# - Importing the existing dashboard app risks drifting the human-auth
# policy (Google IdP, allowed emails) which is settled and working.
# - Service tokens can be added to a dashboard-managed app without
# importing the app itself; the token resource lives at the account
# level and is referenced from a policy.
# - We pay only the cost we need to. If we later want all Access apps
# in TF we can do a focused import pass.
#
# After `terraform apply` produces the token id/secret, Will must:
# 1. Run `vault kv put secret/gitea-runner-cf-access ...` (see outputs).
# 2. In the Cloudflare dashboard, edit the existing "Gitea" Access
# application's policies and add a new policy:
# Action: Service Auth (decision = non_identity)
# Include: Service Token = "gitea-runner"
# This grants the service token bypass through CF Access on
# git.neuralplatform.ai without changing the human-auth flow.
resource "cloudflare_zero_trust_access_service_token" "gitea_runner" {
account_id = var.cloudflare_account_id
name = "gitea-runner"
# Default duration is "8760h" (1 year). Rotate via re-apply when needed.
duration = "forever"
}
output "gitea_runner_cf_access_client_id" {
description = "CF Access service token client ID for the Gitea Actions runner. Store in Vault at secret/gitea-runner-cf-access."
value = cloudflare_zero_trust_access_service_token.gitea_runner.client_id
}
output "gitea_runner_cf_access_client_secret" {
description = "CF Access service token client secret. Store in Vault at secret/gitea-runner-cf-access. Only emitted at creation time."
value = cloudflare_zero_trust_access_service_token.gitea_runner.client_secret
sensitive = true
}
+3 -19
View File
@@ -6,7 +6,7 @@ ENV DEBIAN_FRONTEND=noninteractive
COPY --from=runner-bin /bin/forgejo-runner /usr/local/bin/act_runner
# Core system tools + C build deps needed by El compiler and other CI jobs
# Core system tools
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
curl \
@@ -27,19 +27,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
pkg-config \
software-properties-common \
zstd \
sudo \
libcurl4-openssl-dev \
libssl-dev \
libsqlite3-dev \
libpq-dev \
libffi-dev \
zlib1g-dev \
dpkg-dev \
&& rm -rf /var/lib/apt/lists/*
# Allow any user to run sudo without a password (CI containers need apt-get etc.)
RUN echo "ALL ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
# Node.js 20 LTS via binary tarball (nodesource apt repo is unreliable on Ubuntu 24.04)
RUN NODE_VERSION=20.19.1 \
&& curl -fsSL "https://nodejs.org/dist/v${NODE_VERSION}/node-v${NODE_VERSION}-linux-x64.tar.xz" \
@@ -106,10 +95,5 @@ RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \
# We deliberately don't set ENTRYPOINT / CMD here — act_runner spawns
# build containers with its own entrypoint to keep them alive between
# steps, and overriding it breaks job execution.
# SSH-based git clone init script.
# Sourced before every CI step via BASH_ENV (set in deployment.yaml).
# Writes GITEA_SSH_PRIVATE_KEY to ~/.ssh/gitea_key and rewrites HTTPS
# Gitea URLs to SSH so actions/checkout and git clone both use SSH auth.
COPY git-ssh-init.sh /usr/local/bin/git-ssh-init.sh
RUN chmod +x /usr/local/bin/git-ssh-init.sh
COPY git-cf-access-init.sh /usr/local/bin/git-cf-access-init.sh
RUN chmod +x /usr/local/bin/git-cf-access-init.sh
+26 -20
View File
@@ -8,7 +8,7 @@ metadata:
labels:
app: gitea-runner
annotations:
config-version: "2026-05-05-extra-hosts"
config-version: "2026-05-04-cf-access-public-url"
spec:
replicas: 1
selector:
@@ -19,7 +19,7 @@ spec:
labels:
app: gitea-runner
annotations:
config-version: "2026-05-05-extra-hosts"
config-version: "2026-05-04-cf-access-public-url"
spec:
securityContext:
runAsNonRoot: false
@@ -42,18 +42,21 @@ spec:
container:
network: host
docker_host: "unix:///var/run/docker.sock"
force_pull: true
force_pull: false
valid_volumes: []
default_image: "registry.neuralplatform.ai/ci-base:latest"
# Build containers run with network: host. The in-cluster
# gitea name does not resolve there, so we redirect git
# operations to https://git.neuralplatform.ai using CF
# Access service-token headers. BASH_ENV makes bash source
# /usr/local/bin/git-cf-access-init.sh before every step,
# which sets up the redirect + headers.
env:
GITEA_SSH_PRIVATE_KEY: "${GITEA_SSH_PRIVATE_KEY}"
BASH_ENV: "/usr/local/bin/git-ssh-init.sh"
# Build containers run with network: host, so k8s DNS names
# don't resolve. Inject the gitea-proxy ClusterIP so that
# actions/checkout can reach gitea-proxy.ci.svc.cluster.local
# (the URL Gitea passes as GITHUB_SERVER_URL to the runner).
CF_ACCESS_CLIENT_ID: "${CF_ACCESS_CLIENT_ID}"
CF_ACCESS_CLIENT_SECRET: "${CF_ACCESS_CLIENT_SECRET}"
BASH_ENV: "/usr/local/bin/git-cf-access-init.sh"
extra_hosts:
- "gitea-proxy.ci.svc.cluster.local:10.43.88.7"
- "gitea.git.svc.cluster.local:10.43.1.53"
EOF
envFrom:
- secretRef:
@@ -99,7 +102,7 @@ metadata:
labels:
app: neuron-technologies-runner
annotations:
config-version: "2026-05-05-extra-hosts"
config-version: "2026-05-04-cf-access-public-url"
spec:
replicas: 2
selector:
@@ -110,7 +113,7 @@ spec:
labels:
app: neuron-technologies-runner
annotations:
config-version: "2026-05-05-extra-hosts"
config-version: "2026-05-04-cf-access-public-url"
spec:
securityContext:
runAsNonRoot: false
@@ -133,18 +136,21 @@ spec:
container:
network: host
docker_host: "unix:///var/run/docker.sock"
force_pull: true
force_pull: false
valid_volumes: []
default_image: "registry.neuralplatform.ai/ci-base:latest"
# Build containers run with network: host, so k8s DNS names
# don't resolve. Inject the gitea-proxy ClusterIP so that
# actions/checkout can reach gitea-proxy.ci.svc.cluster.local
# (the URL Gitea passes as GITHUB_SERVER_URL to the runner).
# Build containers run with network: host. The in-cluster
# gitea name does not resolve there, so we redirect git
# operations to https://git.neuralplatform.ai using CF
# Access service-token headers. BASH_ENV makes bash source
# /usr/local/bin/git-cf-access-init.sh before every step,
# which sets up the redirect + headers.
env:
GITEA_SSH_PRIVATE_KEY: "${GITEA_SSH_PRIVATE_KEY}"
BASH_ENV: "/usr/local/bin/git-ssh-init.sh"
CF_ACCESS_CLIENT_ID: "${CF_ACCESS_CLIENT_ID}"
CF_ACCESS_CLIENT_SECRET: "${CF_ACCESS_CLIENT_SECRET}"
BASH_ENV: "/usr/local/bin/git-cf-access-init.sh"
extra_hosts:
- "gitea-proxy.ci.svc.cluster.local:10.43.88.7"
- "gitea.git.svc.cluster.local:10.43.1.53"
EOF
envFrom:
- secretRef:
@@ -1,19 +1,20 @@
---
# gitea-runner-secret — neural-platform org runner token
#
# GITEA_INSTANCE_URL: act_runner daemon polls git.neuralplatform.ai via HTTPS.
# GITEA_SSH_PRIVATE_KEY: ED25519 deploy key for SSH-based git clones inside
# build containers. git-ssh-init.sh (sourced via BASH_ENV) writes this key
# to ~/.ssh/gitea_key and rewrites HTTPS Gitea URLs to SSH so that
# actions/checkout and plain `git clone` both use SSH auth without any
# HTTPS credential or CF Access token.
# GITEA_INSTANCE_URL stays as the in-cluster URL — the act_runner daemon
# polls it constantly and we don't want every poll to hit Cloudflare Access.
# Build containers, however, need the public URL because they run with
# network: host and can't resolve gitea.git.svc.cluster.local. The
# git-cf-access-init.sh entrypoint in the ci-base image rewrites the
# in-cluster URL to https://git.neuralplatform.ai with the CF Access
# headers from CF_ACCESS_CLIENT_ID / CF_ACCESS_CLIENT_SECRET below.
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: gitea-runner-secret
namespace: ci
annotations:
force-sync: "2026-05-05-gitea-proxy"
force-sync: "2026-05-04-cf-access"
spec:
refreshInterval: 1h
secretStoreRef:
@@ -24,18 +25,23 @@ spec:
creationPolicy: Owner
template:
data:
GITEA_INSTANCE_URL: "http://gitea-proxy.ci.svc.cluster.local:3000"
GITEA_INSTANCE_URL: "http://gitea.git.svc.cluster.local:3000"
GITEA_RUNNER_REGISTRATION_TOKEN: "{{ .runner_token }}"
GITEA_SSH_PRIVATE_KEY: "{{ .ci_ssh_private_key }}"
CF_ACCESS_CLIENT_ID: "{{ .cf_access_client_id }}"
CF_ACCESS_CLIENT_SECRET: "{{ .cf_access_client_secret }}"
data:
- secretKey: runner_token
remoteRef:
key: secret/data/gitea
property: runner_token
- secretKey: ci_ssh_private_key
- secretKey: cf_access_client_id
remoteRef:
key: secret/data/gitea
property: ci_ssh_private_key
key: secret/data/gitea-runner-cf-access
property: client_id
- secretKey: cf_access_client_secret
remoteRef:
key: secret/data/gitea-runner-cf-access
property: client_secret
---
# neuron-technologies-runner-secret — neuron-technologies org runner token
apiVersion: external-secrets.io/v1beta1
@@ -44,7 +50,7 @@ metadata:
name: neuron-technologies-runner-secret
namespace: ci
annotations:
force-sync: "2026-05-05-gitea-proxy"
force-sync: "2026-05-04-cf-access"
spec:
refreshInterval: 1h
secretStoreRef:
@@ -55,15 +61,20 @@ spec:
creationPolicy: Owner
template:
data:
GITEA_INSTANCE_URL: "http://gitea-proxy.ci.svc.cluster.local:3000"
GITEA_INSTANCE_URL: "http://gitea.git.svc.cluster.local:3000"
GITEA_RUNNER_REGISTRATION_TOKEN: "{{ .runner_token }}"
GITEA_SSH_PRIVATE_KEY: "{{ .ci_ssh_private_key }}"
CF_ACCESS_CLIENT_ID: "{{ .cf_access_client_id }}"
CF_ACCESS_CLIENT_SECRET: "{{ .cf_access_client_secret }}"
data:
- secretKey: runner_token
remoteRef:
key: secret/data/gitea
property: neuron_technologies_runner_token
- secretKey: ci_ssh_private_key
- secretKey: cf_access_client_id
remoteRef:
key: secret/data/gitea
property: ci_ssh_private_key
key: secret/data/gitea-runner-cf-access
property: client_id
- secretKey: cf_access_client_secret
remoteRef:
key: secret/data/gitea-runner-cf-access
property: client_secret
@@ -1,48 +0,0 @@
#!/bin/sh
# git-ssh-init.sh
#
# Sets up SSH authentication for git operations inside CI build containers.
#
# How this gets invoked:
# act_runner runs each step via a non-interactive bash invocation.
# Setting BASH_ENV=/usr/local/bin/git-ssh-init.sh in act_runner's
# container.env causes bash to source this before any step's commands.
# (See servers/legion/k8s/gitea-runner/deployment.yaml.)
#
# What it does:
# 1. Writes GITEA_SSH_PRIVATE_KEY (from the runner secret) to ~/.ssh/gitea_key
# 2. Creates an ~/.ssh/config entry so git uses that key for git.neuralplatform.ai
# 3. Sets a git insteadOf rule to rewrite HTTPS Gitea URLs to SSH,
# so `actions/checkout` and any direct `git clone https://...` also use SSH
#
# Idempotent — safe to re-run on every step.
if [ -n "${GITEA_SSH_PRIVATE_KEY:-}" ]; then
mkdir -p ~/.ssh
chmod 700 ~/.ssh
# Write the private key
printf '%s\n' "${GITEA_SSH_PRIVATE_KEY}" > ~/.ssh/gitea_key
chmod 600 ~/.ssh/gitea_key
# SSH config — use this key for git.neuralplatform.ai, skip host key checking
# (build containers are ephemeral; we don't persist a known_hosts file)
cat > ~/.ssh/gitea_config << 'EOF'
Host git.neuralplatform.ai
HostName git.neuralplatform.ai
User git
IdentityFile ~/.ssh/gitea_key
StrictHostKeyChecking no
UserKnownHostsFile /dev/null
EOF
chmod 600 ~/.ssh/gitea_config
# Point SSH at our per-job config (merge with any existing config if present)
export GIT_SSH_COMMAND="ssh -F ~/.ssh/gitea_config"
# NOTE: Do NOT add url.insteadOf SSH rewrite here.
# Gitea's built-in SSH server is disabled (START_SSH_SERVER=false) so
# SSH git clones would fail. HTTPS git operations work directly from
# the build container host network — Cloudflare bypasses the CF Access
# gate for git smart-HTTP and release asset paths.
fi
@@ -1,100 +0,0 @@
---
# gitea-proxy — plain nginx proxy to GKE Gitea LB IP
#
# Routes directly to the GKE Network LB IP, bypassing Cloudflare entirely.
# No CF Access service-token auth needed. The runners can't add custom HTTP
# headers, so going through Cloudflare Access is not viable for the gRPC
# Actions ping endpoint (/api/actions/*).
#
# Runners use:
# GITEA_INSTANCE_URL: http://gitea-proxy.ci.svc.cluster.local:3000
#
# The GKE Gitea LB IP (34.31.145.131) is the external IP of the
# gitea Service in the gitea namespace on the GKE cluster.
apiVersion: v1
kind: ConfigMap
metadata:
name: gitea-proxy-config
namespace: ci
data:
default.conf: |
server {
listen 3000;
location / {
# Direct to GKE Gitea LB IP — bypasses Cloudflare/CF Access entirely.
proxy_pass http://34.31.145.131;
# Tell Gitea which hostname it's being served as so self-referencing
# URLs (clone URLs, webhook URLs) are generated correctly.
proxy_set_header Host git.neuralplatform.ai;
# Forward all original request headers (auth tokens, etc).
proxy_pass_request_headers on;
# Rewrite Location headers in redirects back to the proxy URL.
proxy_redirect http://34.31.145.131/
http://gitea-proxy.ci.svc.cluster.local:3000/;
# Standard proxy headers.
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto http;
# Allow large request bodies (git push payloads).
client_max_body_size 512m;
}
}
---
# Deployment
apiVersion: apps/v1
kind: Deployment
metadata:
name: gitea-proxy
namespace: ci
labels:
app: gitea-proxy
spec:
replicas: 1
selector:
matchLabels:
app: gitea-proxy
template:
metadata:
labels:
app: gitea-proxy
annotations:
config-version: "2026-05-05-direct-lb"
spec:
containers:
- name: nginx
image: nginx:alpine
ports:
- containerPort: 3000
volumeMounts:
- name: config
mountPath: /etc/nginx/conf.d
resources:
requests:
cpu: 50m
memory: 32Mi
limits:
cpu: 200m
memory: 64Mi
volumes:
- name: config
configMap:
name: gitea-proxy-config
---
# Service — ClusterIP reachable by all runner pods as gitea-proxy.ci:3000
apiVersion: v1
kind: Service
metadata:
name: gitea-proxy
namespace: ci
spec:
selector:
app: gitea-proxy
ports:
- port: 3000
targetPort: 3000
@@ -82,12 +82,7 @@ spec:
apiVersion: apps/v1
kind: Deployment
name: neuron-marketing
# minReplicas=1 to match the file's own convention (see header comment).
# Kubernetes only allows minReplicas=0 when at least one Object or External
# metric is configured (queue depth, custom signal, etc.); with only a
# Resource (CPU) metric, scale-to-zero is rejected and the whole HPA is
# invalid — which was blocking neuron-prod's Argo CD sync.
minReplicas: 1
minReplicas: 0
maxReplicas: 8
metrics:
- type: Resource
@@ -117,32 +117,6 @@ spec:
matchLabels:
kubernetes.io/metadata.name: neuron-prod
---
# ── dharma: accept from Traefik (kube-system) and neuron-prod namespace ──────
# The dharma pod was healthy and the IngressRoute was correct, but cross-
# namespace ingress from kube-system (Traefik) was denied by default-deny-all,
# so every external request landed at Traefik and bounced back as 502. This
# allow rule mirrors `allow-mcp-ingress` and brings dharma into line with the
# other neuron-prod services.
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: allow-dharma-ingress
namespace: neuron-prod
spec:
podSelector:
matchLabels:
app: dharma
policyTypes:
- Ingress
ingress:
- from:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: kube-system
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: neuron-prod
---
# ── Egress: all prod pods may reach platform (postgres/redis), vault,
# monitoring (alloy OTLP), kube-dns, and the internet (external APIs) ─
apiVersion: networking.k8s.io/v1
@@ -8,7 +8,7 @@
# routes still go to the Cloud Run prod URL until we cut over.
#
# Vault paths used:
# secret/neuron-technologies/anthropic api_key, web_demo_key
# secret/neuron-technologies/anthropic api_key
# secret/neuron-technologies/supabase anon_key, service_role_key, project_url
# secret/neuron-technologies/marketing-test stripe_*, etc.
# secret/neuron-technologies/notifications resend_api_key
@@ -38,7 +38,7 @@ spec:
- secretKey: NEURON_LLM_0_KEY
remoteRef:
key: secret/data/neuron-technologies/anthropic
property: web_demo_key
property: api_key
- secretKey: SUPABASE_ANON_KEY
remoteRef:
key: secret/data/neuron-technologies/supabase
+38 -8
View File
@@ -92,11 +92,37 @@ resource "cloudflare_zero_trust_tunnel_cloudflared_config" "legion" {
}
}
# vault.neuralplatform.ai moved to GCP Global HTTPS LB (34.54.164.21)
# DNS is now a direct A record (not proxied) in dns-neuralplatform.tf
ingress_rule {
hostname = "vault.neuralplatform.ai"
service = "https://traefik.kube-system.svc:443"
origin_request {
no_tls_verify = true
}
}
# watch.nook.family, jellyfin.nook.family, bazarr.nook.family removed
# This infrastructure is focused on Neuron; nook.family media stack retired
ingress_rule {
hostname = "watch.nook.family"
service = "https://traefik.kube-system.svc:443"
origin_request {
no_tls_verify = true
}
}
ingress_rule {
hostname = "jellyfin.nook.family"
service = "https://traefik.kube-system.svc:443"
origin_request {
no_tls_verify = true
}
}
ingress_rule {
hostname = "bazarr.nook.family"
service = "https://traefik.kube-system.svc:443"
origin_request {
no_tls_verify = true
}
}
# fornax.neuralplatform.ai Fornax torrent coordinator (qBittorrent API proxy)
@@ -108,12 +134,16 @@ resource "cloudflare_zero_trust_tunnel_cloudflared_config" "legion" {
}
}
# git.neuralplatform.ai REMOVED: Gitea is now on GKE.
# DNS is a direct Cloudflare A record (proxied) pointing at the GKE LoadBalancer IP.
# See: servers/legion/dns-neuralplatform.tf (cloudflare_record.np_gitea)
# git.neuralplatform.ai Gitea web UI (HTTP via Traefik)
ingress_rule {
hostname = "git.neuralplatform.ai"
service = "https://traefik.kube-system.svc:443"
origin_request {
no_tls_verify = true
}
}
# ssh.git.neuralplatform.ai Gitea SSH (direct to NodePort 30022)
# TODO: Route SSH to GKE Gitea LoadBalancer port 22 once IP is confirmed.
ingress_rule {
hostname = "ssh.git.neuralplatform.ai"
service = "ssh://localhost:30022"