1 Commits

Author SHA1 Message Date
Will Anderson a529690235 Revert "fix(ci): point Gitea Actions runners at public instance URL"
This reverts commit 6f5d041440.
2026-05-04 16:05:10 -05:00
67 changed files with 490 additions and 1884 deletions
-115
View File
@@ -148,118 +148,3 @@ resource "google_secret_manager_secret" "license_admin_token" {
ignore_changes = [id]
}
}
# ── Gitea — database + user on neuron-prod-pg15 ───────────────────────────────
# Gitea on GKE uses the existing Cloud SQL instance with a Cloud SQL Auth Proxy
# sidecar. Connection via unix socket — no public IP exposure.
resource "google_sql_database" "gitea" {
name = "gitea"
instance = google_sql_database_instance.main.name
project = var.project_id
}
resource "random_password" "gitea_db" {
length = 32
special = false # Cloud SQL unix socket path DSN; keep alphanumeric for simplicity
}
resource "google_sql_user" "gitea" {
name = "gitea"
instance = google_sql_database_instance.main.name
project = var.project_id
password = random_password.gitea_db.result
}
# ── Gitea service account (for Workload Identity → Cloud SQL) ─────────────────
resource "google_service_account" "gitea" {
account_id = "gitea-gke"
display_name = "Gitea GKE SA"
description = "Service account for the Gitea pod on GKE. Used by the Cloud SQL Auth Proxy sidecar via Workload Identity."
project = var.project_id
}
resource "google_project_iam_member" "gitea_sql_client" {
project = var.project_id
role = "roles/cloudsql.client"
member = "serviceAccount:${google_service_account.gitea.email}"
}
# ── Secret Manager — Gitea database URL ───────────────────────────────────────
# Full DSN using the Cloud SQL Auth Proxy unix socket path.
# The proxy sidecar mounts the socket at /cloudsql/<connection_name>.
resource "google_secret_manager_secret" "gitea_database_url" {
secret_id = "gitea-database-url"
project = var.project_id
replication {
auto {}
}
}
resource "google_secret_manager_secret_version" "gitea_database_url" {
secret = google_secret_manager_secret.gitea_database_url.id
secret_data = "host=/cloudsql/${google_sql_database_instance.main.connection_name} user=gitea password=${random_password.gitea_db.result} dbname=gitea sslmode=disable"
}
# gitea-db-password — the raw password only, for use in Gitea's GITEA__database__PASSWD.
# The Cloud SQL Auth Proxy provides the unix socket; Gitea uses standard postgres
# password auth through the proxy socket.
resource "google_secret_manager_secret" "gitea_db_password" {
secret_id = "gitea-db-password"
project = var.project_id
replication {
auto {}
}
}
resource "google_secret_manager_secret_version" "gitea_db_password" {
secret = google_secret_manager_secret.gitea_db_password.id
secret_data = random_password.gitea_db.result
}
# Allow the Gitea GCP SA to access its secrets
resource "google_secret_manager_secret_iam_member" "gitea_database_url_accessor" {
project = var.project_id
secret_id = google_secret_manager_secret.gitea_database_url.secret_id
role = "roles/secretmanager.secretAccessor"
member = "serviceAccount:${google_service_account.gitea.email}"
}
resource "google_secret_manager_secret_iam_member" "gitea_db_password_accessor" {
project = var.project_id
secret_id = google_secret_manager_secret.gitea_db_password.secret_id
role = "roles/secretmanager.secretAccessor"
member = "serviceAccount:${google_service_account.gitea.email}"
}
# ESO Workload Identity — allows the external-secrets controller SA on GKE
# to impersonate the gitea GCP SA for Secret Manager access.
resource "google_service_account_iam_member" "eso_gitea_wi" {
service_account_id = google_service_account.gitea.name
role = "roles/iam.workloadIdentityUser"
member = "serviceAccount:${var.project_id}.svc.id.goog[external-secrets/external-secrets]"
depends_on = [google_container_cluster.neuron_platform]
}
resource "google_service_account_iam_member" "eso_gitea_token_creator" {
service_account_id = google_service_account.gitea.name
role = "roles/iam.serviceAccountTokenCreator"
member = "serviceAccount:${var.project_id}.svc.id.goog[external-secrets/external-secrets]"
depends_on = [google_container_cluster.neuron_platform]
}
output "gitea_service_account_email" {
description = "Gitea GKE SA email — used in the Workload Identity annotation"
value = google_service_account.gitea.email
}
output "gitea_database_secret_id" {
description = "Secret Manager secret ID for the Gitea database URL"
value = google_secret_manager_secret.gitea_database_url.secret_id
}
-68
View File
@@ -1,68 +0,0 @@
# ── DocuSeal GKE service account and Workload Identity ───────────────────────
# DocuSeal runs in the `docuseal` namespace on GKE. It accesses GCP Secret
# Manager via Workload Identity — no JSON key files.
#
# Secret flow: Vault → GCP Secret Manager → ExternalSecret → k8s Secret → pod
resource "google_service_account" "docuseal_gke" {
account_id = "docuseal-gke"
display_name = "DocuSeal GKE"
description = "Identity for DocuSeal pod on GKE. Accesses Secret Manager via Workload Identity."
project = var.project_id
}
# ── Secret Manager access ─────────────────────────────────────────────────────
resource "google_secret_manager_secret_iam_member" "docuseal_secret_key_base" {
project = var.project_id
secret_id = "docuseal-secret-key-base"
role = "roles/secretmanager.secretAccessor"
member = "serviceAccount:${google_service_account.docuseal_gke.email}"
}
resource "google_secret_manager_secret_iam_member" "docuseal_smtp_host" {
project = var.project_id
secret_id = "docuseal-smtp-host"
role = "roles/secretmanager.secretAccessor"
member = "serviceAccount:${google_service_account.docuseal_gke.email}"
}
resource "google_secret_manager_secret_iam_member" "docuseal_smtp_port" {
project = var.project_id
secret_id = "docuseal-smtp-port"
role = "roles/secretmanager.secretAccessor"
member = "serviceAccount:${google_service_account.docuseal_gke.email}"
}
resource "google_secret_manager_secret_iam_member" "docuseal_smtp_username" {
project = var.project_id
secret_id = "docuseal-smtp-username"
role = "roles/secretmanager.secretAccessor"
member = "serviceAccount:${google_service_account.docuseal_gke.email}"
}
resource "google_secret_manager_secret_iam_member" "docuseal_smtp_password" {
project = var.project_id
secret_id = "docuseal-smtp-password"
role = "roles/secretmanager.secretAccessor"
member = "serviceAccount:${google_service_account.docuseal_gke.email}"
}
resource "google_secret_manager_secret_iam_member" "docuseal_smtp_from" {
project = var.project_id
secret_id = "docuseal-smtp-from"
role = "roles/secretmanager.secretAccessor"
member = "serviceAccount:${google_service_account.docuseal_gke.email}"
}
# ── Workload Identity binding ─────────────────────────────────────────────────
# The k8s SA `docuseal` in the `docuseal` namespace can impersonate this GCP SA.
# ESO uses this to authenticate to Secret Manager without a key file.
resource "google_service_account_iam_member" "docuseal_workload_identity" {
service_account_id = google_service_account.docuseal_gke.name
role = "roles/iam.workloadIdentityUser"
member = "serviceAccount:${var.project_id}.svc.id.goog[docuseal/docuseal]"
depends_on = [google_container_cluster.neuron_platform]
}
-40
View File
@@ -254,46 +254,6 @@ resource "google_service_account_iam_member" "ci_pusher_wif_neuron_web" {
member = "principalSet://iam.googleapis.com/${google_iam_workload_identity_pool.gitea.name}/attribute.repository/neuron-technologies/neuron-web"
}
# ── Gitea Actions runner on GKE ───────────────────────────────────────────────
# A k8s Deployment in the `ci` namespace on GKE runs act_runner with a DinD
# sidecar. The pod uses this GCP SA via Workload Identity to pull the runner
# registration token from Secret Manager at startup.
#
# See servers/gcp/k8s/gitea-runner/ for the k8s manifests.
resource "google_service_account" "gitea_runner_gke" {
account_id = "gitea-runner-gke"
display_name = "Gitea Actions runner (GKE identity)"
description = "Workload Identity SA for the GKE-hosted Gitea Actions runner. Read-only access to the registration token."
project = var.project_id
}
# Allow the GKE SA to read the runner registration token from Secret Manager.
resource "google_secret_manager_secret_iam_member" "runner_gke_token_access" {
project = var.project_id
secret_id = "gitea-runner-token"
role = "roles/secretmanager.secretAccessor"
member = "serviceAccount:${google_service_account.gitea_runner_gke.email}"
}
# Workload Identity binding — the k8s SA `gitea-runner` in the `ci` namespace
# can impersonate this GCP SA without a JSON key file.
resource "google_service_account_iam_member" "gitea_runner_gke_workload_identity" {
service_account_id = google_service_account.gitea_runner_gke.name
role = "roles/iam.workloadIdentityUser"
member = "serviceAccount:${var.project_id}.svc.id.goog[ci/gitea-runner]"
depends_on = [google_container_cluster.neuron_platform]
}
# Allow the runner to pull images from Artifact Registry (needed for build jobs
# that pull from neuron-* repos, and for the runner's own ci-base image).
resource "google_project_iam_member" "gitea_runner_gke_ar_reader" {
project = var.project_id
role = "roles/artifactregistry.reader"
member = "serviceAccount:${google_service_account.gitea_runner_gke.email}"
}
# ── Outputs ───────────────────────────────────────────────────────────────────
output "gitea_runner_vm_name" {
-94
View File
@@ -1,94 +0,0 @@
# ── GKE Autopilot Cluster — neuron-platform ───────────────────────────────────
#
# Regional Autopilot cluster in us-central1. Autopilot manages the node pool
# automatically — no node pools to configure, no VM sizes to choose.
# Pods spread across a/b/c zones automatically by GKE.
#
# Workload Identity is required so Vault pods can access KMS (auto-unseal)
# and Gitea pods can use Cloud SQL Auth Proxy without key files.
#
# After `terraform apply`, register the cluster with Legion Argo CD:
# 1. Get the endpoint: terraform output gke_cluster_endpoint
# 2. Get credentials: gcloud container clusters get-credentials neuron-platform \
# --region us-central1 --project neuron-785695
# 3. Register with Argo CD:
# argocd cluster add <context-name> --name gke-neuron-platform
# 4. Update servers/gcp/k8s/argocd-apps/*.yaml with the actual cluster endpoint.
resource "google_container_cluster" "neuron_platform" {
provider = google-beta
name = "neuron-platform"
location = "us-central1"
project = var.project_id
# Autopilot — GKE manages node pools, scaling, and node security
enable_autopilot = true
release_channel {
channel = "REGULAR"
}
# Workload Identity — required for Vault KMS and Gitea Cloud SQL proxy
workload_identity_config {
workload_pool = "${var.project_id}.svc.id.goog"
}
# Deletion protection — prevent accidental cluster destruction
deletion_protection = true
# Cluster networking — uses default VPC (same as Vault GCE nodes)
ip_allocation_policy {}
# Disable basic auth and client cert (Workload Identity handles auth)
master_auth {
client_certificate_config {
issue_client_certificate = false
}
}
}
# ── Workload Identity bindings ────────────────────────────────────────────────
#
# Each GCP SA gets a binding that allows its corresponding k8s ServiceAccount
# (in the GKE cluster) to impersonate it via Workload Identity.
# The k8s SA must also have the annotation:
# iam.gke.io/gcp-service-account: <gcp-sa-email>
# Vault pods (namespace: vault, k8s SA: vault) → vault-unseal GCP SA
# This lets the Vault StatefulSet access KMS for auto-unseal without key files.
resource "google_service_account_iam_member" "vault_workload_identity" {
service_account_id = google_service_account.vault_unseal.name
role = "roles/iam.workloadIdentityUser"
member = "serviceAccount:${var.project_id}.svc.id.goog[vault/vault]"
depends_on = [google_container_cluster.neuron_platform]
}
# Gitea pod (namespace: gitea, k8s SA: gitea) → gitea GCP SA
# This lets the Cloud SQL Auth Proxy sidecar connect to neuron-prod-pg15.
resource "google_service_account_iam_member" "gitea_workload_identity" {
service_account_id = google_service_account.gitea.name
role = "roles/iam.workloadIdentityUser"
member = "serviceAccount:${var.project_id}.svc.id.goog[gitea/gitea]"
depends_on = [google_container_cluster.neuron_platform]
}
# ── Outputs ───────────────────────────────────────────────────────────────────
output "gke_cluster_endpoint" {
description = "GKE cluster API endpoint — use as destination.server in Argo CD Applications"
value = "https://${google_container_cluster.neuron_platform.endpoint}"
sensitive = true
}
output "gke_cluster_name" {
description = "GKE cluster name"
value = google_container_cluster.neuron_platform.name
}
output "gke_workload_pool" {
description = "Workload Identity pool — used in Workload Identity bindings"
value = "${var.project_id}.svc.id.goog"
}
@@ -1,26 +0,0 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: docuseal-gke
namespace: argocd
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
targetRevision: main
path: servers/gcp/k8s/docuseal
destination:
server: https://34.63.89.52
namespace: docuseal
ignoreDifferences:
- group: apps
kind: Deployment
jsonPointers:
- /status
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true
- ServerSideApply=true
@@ -1,46 +0,0 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: gitea-gke
namespace: argocd
annotations:
# Apply to the Legion Argo CD instance after registering the GKE cluster.
# See vault-gke.yaml for the cluster registration steps.
#
# Migration checklist (Gitea from Legion to GKE):
# 1. terraform apply (creates GKE cluster, Cloud SQL gitea DB, secrets)
# 2. Register GKE cluster with Legion Argo CD (see vault-gke.yaml)
# 3. Install ESO on GKE: helm install external-secrets external-secrets/external-secrets
# --namespace external-secrets --create-namespace
# 4. Apply this Application to Legion Argo CD
# 5. Verify gitea pod is running on GKE with DB connectivity
# 6. Take a tar of /data from the Legion Gitea pod and restore to GKE PVC
# 7. Update Cloudflare Tunnel on Legion: remove git.neuralplatform.ai route
# 8. Add GKE ingress / GCP LB rule for git.neuralplatform.ai
# 9. Decommission Gitea on Legion (remove gitea*.yaml from servers/legion/apps/)
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
targetRevision: main
path: servers/gcp/k8s/gitea
destination:
# Replace GKE_CLUSTER_ENDPOINT after `terraform apply`:
# terraform -chdir=servers/gcp output -raw gke_cluster_endpoint
server: https://34.63.89.52
namespace: gitea
# terminatingReplicas is a newer Deployment status field that Argo CD's
# bundled schema doesn't know about — causes ComparisonError during diff.
# Ignoring /status entirely is safe: Argo CD never manages status fields.
ignoreDifferences:
- group: apps
kind: Deployment
jsonPointers:
- /status
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true
- ServerSideApply=true
@@ -1,26 +0,0 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: gitea-runner-gke
namespace: argocd
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
targetRevision: main
path: servers/gcp/k8s/gitea-runner
destination:
server: https://34.63.89.52
namespace: ci
ignoreDifferences:
- group: apps
kind: Deployment
jsonPointers:
- /status
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true
- ServerSideApply=true
@@ -1,33 +0,0 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: vault-gke
namespace: argocd
annotations:
# Syncs plain k8s manifests for Vault to the GKE cluster.
# The Vault Helm release itself is in vault-helm-gke.yaml.
#
# Apply to Legion Argo CD after registering the GKE cluster:
# 1. gcloud container clusters get-credentials neuron-platform \
# --region us-central1 --project neuron-785695
# 2. argocd cluster add <context-name> --name gke-neuron-platform
# 3. Update GKE_CLUSTER_ENDPOINT in all argocd-apps/*.yaml:
# terraform -chdir=servers/gcp output -raw gke_cluster_endpoint
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
targetRevision: main
path: servers/gcp/k8s/vault
destination:
# Replace GKE_CLUSTER_ENDPOINT after `terraform apply`:
# terraform -chdir=servers/gcp output -raw gke_cluster_endpoint
server: https://34.63.89.52
namespace: vault
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true
- ServerSideApply=true
@@ -1,175 +0,0 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: vault-helm-gke
namespace: argocd
annotations:
# Deploys the Vault Helm chart to the GKE cluster via Legion Argo CD.
# The destination.server must be updated after `terraform apply`:
# terraform -chdir=servers/gcp output -raw gke_cluster_endpoint
spec:
project: default
source:
repoURL: https://helm.releases.hashicorp.com
chart: vault
targetRevision: "0.29.1"
helm:
values: |
global:
enabled: true
injector:
enabled: false
ui:
enabled: true
server:
image:
repository: hashicorp/vault
tag: "1.19.2"
# Workload Identity — Vault pod k8s SA impersonates vault-unseal GCP SA
# for KMS auto-unseal. Binding is in servers/gcp/gke.tf.
serviceAccount:
create: true
name: vault
annotations:
iam.gke.io/gcp-service-account: vault-unseal@neuron-785695.iam.gserviceaccount.com
# GKE Autopilot: no privileged containers. Vault doesn't need privilege.
# Request IPC_LOCK so Vault can lock memory (prevents secrets swap).
securityContext:
capabilities:
add:
- IPC_LOCK
# Single replica for now — raft standby unsealing requires the barrier
# key to be shared over port 8201 (cluster port), but 8201 only opens
# after unsealing (deadlock). Running solo on pod 0 until we resolve
# the standby bootstrap path. Scale back to 3 once fixed.
ha:
enabled: true
replicas: 1
raft:
enabled: true
setNodeId: true
config: |
ui = true
listener "tcp" {
tls_disable = 1
address = "[::]:8200"
cluster_address = "[::]:8201"
}
storage "raft" {
path = "/vault/data"
retry_join {
leader_api_addr = "http://vault-helm-gke-0.vault-helm-gke-internal:8200"
}
retry_join {
leader_api_addr = "http://vault-helm-gke-1.vault-helm-gke-internal:8200"
}
retry_join {
leader_api_addr = "http://vault-helm-gke-2.vault-helm-gke-internal:8200"
}
}
seal "gcpckms" {
project = "neuron-785695"
region = "global"
key_ring = "vault"
crypto_key = "vault-unseal"
}
telemetry {
prometheus_retention_time = "30s"
disable_hostname = false
}
# Spread pods across GKE zones
topologySpreadConstraints:
- maxSkew: 1
topologyKey: topology.kubernetes.io/zone
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
app.kubernetes.io/name: vault
component: server
# 10Gi SSD per pod — premium-rwo = pd-ssd on GKE Autopilot
dataStorage:
enabled: true
size: 10Gi
storageClass: standard-rwo
accessMode: ReadWriteOnce
# Annotations on the pod template — bumping rollme triggers a
# StatefulSet rolling restart (pods 2→1→0) to pick up the
# corrected raft retry_join addresses.
podAnnotations:
rollme: "2026-05-05-raft-join-fix"
readinessProbe:
enabled: true
# standbyok=true: standby pods pass (they serve reads).
# sealedok removed: sealed/uninit pods fail readiness so they
# leave the service endpoints and stop receiving external traffic.
path: "/v1/sys/health?standbyok=true"
initialDelaySeconds: 5
periodSeconds: 5
failureThreshold: 3
livenessProbe:
enabled: true
# Liveness keeps sealedok=true so sealed pods aren't killed —
# they need time to join raft and auto-unseal.
path: "/v1/sys/health?standbyok=true&sealedok=true&uninitcode=200"
initialDelaySeconds: 60
periodSeconds: 10
failureThreshold: 3
# GKE Autopilot requires resource requests on all containers
resources:
requests:
memory: 256Mi
cpu: 500m
limits:
memory: 512Mi
cpu: 1000m
service:
enabled: true
type: ClusterIP
port: 8200
targetPort: 8200
annotations:
# Container-native NEG for GCP Global HTTPS LB backend cutover.
# GKE will create a zonal NEG named k8s1-<hash>-vault-vault-helm-gke-8200-<hash>
# in each zone where Vault pods are scheduled.
# After this syncs, list NEGs:
# gcloud compute network-endpoint-groups list --filter="name~vault" --project neuron-785695
# Then reference them in servers/gcp/vault-gke-lb.tf.
cloud.google.com/neg: '{"exposed_ports":{"8200":{}}}'
# Ingress disabled — Vault is exposed via GCP HTTPS LB.
# After migration, update the existing LB backend (vault-nodes.tf)
# to target a GKE NEG instead of the GCE instance groups.
# See: https://cloud.google.com/kubernetes-engine/docs/how-to/standalone-neg
ingress:
enabled: false
destination:
# Replace GKE_CLUSTER_ENDPOINT after `terraform apply`:
# terraform -chdir=servers/gcp output -raw gke_cluster_endpoint
server: https://34.63.89.52
namespace: vault
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true
- ServerSideApply=true
-101
View File
@@ -1,101 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: docuseal
namespace: docuseal
labels:
app: docuseal
spec:
replicas: 1
# Recreate — only one pod can hold the RWO PVC at a time.
strategy:
type: Recreate
selector:
matchLabels:
app: docuseal
template:
metadata:
labels:
app: docuseal
spec:
serviceAccountName: docuseal
containers:
- name: docuseal
image: ee.docuseal.com/lip5viwf/ds-ee:latest
ports:
- containerPort: 3000
env:
- name: SECRET_KEY_BASE
valueFrom:
secretKeyRef:
name: docuseal-secrets
key: secret_key_base
- name: FORCE_SSL
value: "false"
- name: HOST
value: "sign.neurontechnologies.ai"
- name: SMTP_ADDRESS
valueFrom:
secretKeyRef:
name: docuseal-secrets
key: smtp_host
- name: SMTP_PORT
valueFrom:
secretKeyRef:
name: docuseal-secrets
key: smtp_port
- name: SMTP_USERNAME
valueFrom:
secretKeyRef:
name: docuseal-secrets
key: smtp_username
- name: SMTP_PASSWORD
valueFrom:
secretKeyRef:
name: docuseal-secrets
key: smtp_password
- name: SMTP_FROM
valueFrom:
secretKeyRef:
name: docuseal-secrets
key: smtp_from
volumeMounts:
- name: data
mountPath: /data
readinessProbe:
httpGet:
path: /
port: 3000
initialDelaySeconds: 15
periodSeconds: 10
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
cpu: 500m
memory: 512Mi
volumes:
- name: data
persistentVolumeClaim:
claimName: docuseal-data
---
apiVersion: v1
kind: Service
metadata:
name: docuseal
namespace: docuseal
annotations:
# External GCP Network LB — Cloudflare proxies in front, provides TLS.
# After provisioning, get the external IP:
# kubectl -n docuseal get svc docuseal
# Update Cloudflare DNS A record for sign.neurontechnologies.ai to this IP.
cloud.google.com/load-balancer-type: "External"
spec:
selector:
app: docuseal
ports:
- name: http
port: 80
targetPort: 3000
type: LoadBalancer
@@ -1,56 +0,0 @@
---
# SecretStore for GKE — uses GCP Secret Manager directly via Workload Identity.
# The DocuSeal GCP SA has secretmanager.secretAccessor on its own secrets
# (see servers/gcp/docuseal.tf).
apiVersion: external-secrets.io/v1
kind: SecretStore
metadata:
name: gcp-secretmanager
namespace: docuseal
spec:
provider:
gcpsm:
projectID: neuron-785695
# Workload Identity — ESO impersonates the docuseal-gke GCP SA to access
# Secret Manager. No JSON key file required.
auth:
workloadIdentity:
clusterLocation: us-central1
clusterName: neuron-platform
serviceAccountRef:
name: docuseal
namespace: docuseal
---
# Pull all DocuSeal secrets from GCP Secret Manager into a single k8s Secret.
apiVersion: external-secrets.io/v1
kind: ExternalSecret
metadata:
name: docuseal-secrets
namespace: docuseal
spec:
refreshInterval: 1h
secretStoreRef:
name: gcp-secretmanager
kind: SecretStore
target:
name: docuseal-secrets
creationPolicy: Owner
data:
- secretKey: secret_key_base
remoteRef:
key: docuseal-secret-key-base
- secretKey: smtp_host
remoteRef:
key: docuseal-smtp-host
- secretKey: smtp_port
remoteRef:
key: docuseal-smtp-port
- secretKey: smtp_username
remoteRef:
key: docuseal-smtp-username
- secretKey: smtp_password
remoteRef:
key: docuseal-smtp-password
- secretKey: smtp_from
remoteRef:
key: docuseal-smtp-from
-6
View File
@@ -1,6 +0,0 @@
apiVersion: v1
kind: Namespace
metadata:
name: docuseal
labels:
app.kubernetes.io/name: docuseal
-12
View File
@@ -1,12 +0,0 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: docuseal-data
namespace: docuseal
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 5Gi
storageClassName: standard-rwo
@@ -1,10 +0,0 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: docuseal
namespace: docuseal
annotations:
# Workload Identity — allows ESO to authenticate to GCP Secret Manager
# as the docuseal-gke GCP SA without a JSON key file.
# The GCP SA binding is in servers/gcp/docuseal.tf (docuseal_workload_identity).
iam.gke.io/gcp-service-account: docuseal-gke@neuron-785695.iam.gserviceaccount.com
@@ -1,116 +0,0 @@
---
# Gitea Actions runner on GKE Autopilot.
#
# Architecture:
# - init container registers the runner with Gitea (--no-interactive, idempotent)
# - main container runs act_runner daemon
#
# LIMITATION: GKE Autopilot does not allow privileged containers, so Docker-in-Docker
# (DinD) is not available. The runner uses "host" label mode, which runs steps
# directly inside the runner pod container rather than spawning Docker containers.
# forgejo-runner v11 still requires a Docker daemon to start — this deployment
# will fail until one of the following is resolved:
#
# Option A (recommended): Switch to a GKE Standard node pool for the ci namespace.
# Add a standard node pool with sandbox.config.sandboxType=gvisor or allow
# privileged pods on specific node pools.
#
# Option B: Use Cloud Build for docker build steps; use this runner for shell steps.
# Requires forking all workflow files to remove docker steps.
#
# Option C: Point docker_host at an external Docker TCP daemon (e.g., on the GCE runner VM).
#
# Until resolved, the existing GCE VM runner (gitea-runner-1 in us-central1-a)
# handles CI jobs. This deployment is a placeholder for when privileged pods are available.
#
# The ci namespace has pod-security.kubernetes.io/enforce: privileged set
# (see namespace.yaml) but Autopilot's Warden enforces this at the node level.
apiVersion: apps/v1
kind: Deployment
metadata:
name: gitea-runner
namespace: ci
labels:
app: gitea-runner
spec:
# Set to 0 until the DinD/privileged-pod blocker is resolved (see comments above).
# Change to 1 when a Docker daemon is available (Standard node pool or external TCP).
replicas: 0
selector:
matchLabels:
app: gitea-runner
template:
metadata:
labels:
app: gitea-runner
spec:
serviceAccountName: gitea-runner
securityContext:
runAsNonRoot: false
initContainers:
- name: register
image: us-central1-docker.pkg.dev/neuron-785695/neuron-ci/ci-base:latest
command: ["/bin/sh", "-c"]
args:
- |
# Idempotent registration — skip if already registered (.runner file exists).
if [ -f /data/.runner ]; then
echo "Runner already registered, skipping."
else
act_runner register \
--instance "${GITEA_INSTANCE_URL}" \
--token "${GITEA_RUNNER_REGISTRATION_TOKEN}" \
--name "gke-runner-$(hostname)" \
--labels "ubuntu-latest:host,ubuntu-24.04:host,linux,x64" \
--no-interactive
fi
cat > /data/config.yaml << 'CONFIGEOF'
runner:
capacity: 2
timeout: 3h
envs:
BASH_ENV: "/usr/local/bin/git-ssh-init.sh"
container:
# GKE Autopilot blocks privileged DinD — steps run directly
# in the runner container (host mode). Labels use :host suffix.
network: host
docker_host: "-"
force_pull: false
valid_volumes: []
CONFIGEOF
env:
# Use the internal Gitea service to avoid Cloudflare Access auth.
# Gitea is in the same GKE cluster — use the ClusterIP service.
- name: GITEA_INSTANCE_URL
value: "http://gitea.gitea.svc.cluster.local:80"
envFrom:
- secretRef:
name: gitea-runner-token
volumeMounts:
- mountPath: /data
name: data
workingDir: /data
containers:
- name: runner
image: us-central1-docker.pkg.dev/neuron-785695/neuron-ci/ci-base:latest
command: ["act_runner", "daemon", "--config", "/data/config.yaml"]
env:
- name: GITEA_INSTANCE_URL
value: "http://gitea.gitea.svc.cluster.local:80"
envFrom:
- secretRef:
name: gitea-runner-token
resources:
requests:
cpu: "500m"
memory: "512Mi"
limits:
cpu: "4"
memory: "4Gi"
volumeMounts:
- mountPath: /data
name: data
workingDir: /data
volumes:
- name: data
emptyDir: {}
@@ -1,39 +0,0 @@
---
# SecretStore for the CI namespace — uses GCP Secret Manager via Workload Identity.
# The gitea-runner-gke GCP SA has secretmanager.secretAccessor on gitea-runner-token
# (see servers/gcp/gitea-runner.tf).
apiVersion: external-secrets.io/v1
kind: SecretStore
metadata:
name: gcp-secretmanager
namespace: ci
spec:
provider:
gcpsm:
projectID: neuron-785695
auth:
workloadIdentity:
clusterLocation: us-central1
clusterName: neuron-platform
serviceAccountRef:
name: gitea-runner
namespace: ci
---
# Pull the Gitea runner registration token from Secret Manager into a k8s Secret.
apiVersion: external-secrets.io/v1
kind: ExternalSecret
metadata:
name: gitea-runner-token
namespace: ci
spec:
refreshInterval: 1h
secretStoreRef:
name: gcp-secretmanager
kind: SecretStore
target:
name: gitea-runner-token
creationPolicy: Owner
data:
- secretKey: GITEA_RUNNER_REGISTRATION_TOKEN
remoteRef:
key: gitea-runner-token
@@ -1,8 +0,0 @@
apiVersion: v1
kind: Namespace
metadata:
name: ci
labels:
app.kubernetes.io/name: ci
# DinD (Docker-in-Docker) requires privileged pods.
pod-security.kubernetes.io/enforce: privileged
@@ -1,11 +0,0 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: gitea-runner
namespace: ci
annotations:
# Workload Identity — allows ESO (and optionally the runner pod) to
# authenticate to GCP Secret Manager as the gitea-runner-gke GCP SA
# without a JSON key file.
# The GCP SA binding is in servers/gcp/gitea-runner.tf (gitea_runner_gke_workload_identity).
iam.gke.io/gcp-service-account: gitea-runner-gke@neuron-785695.iam.gserviceaccount.com
-104
View File
@@ -1,104 +0,0 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: gitea-custom-css
namespace: gitea
data:
header.tmpl: |
<link rel="stylesheet" href="/assets/css/custom.css">
custom.css: |
/* ── Typography & base ── */
:root {
--color-primary: #6366f1;
--color-primary-dark: #4f46e5;
--color-secondary: #8b5cf6;
--color-bg: #0f0f17;
--color-surface: #16161f;
--color-surface-2: #1e1e2e;
--color-border: #2a2a3d;
--color-text: #e2e2f0;
--color-text-muted: #8888aa;
--color-green: #22d3a5;
--radius: 10px;
}
body {
background: var(--color-bg) !important;
color: var(--color-text) !important;
font-family: "Inter", -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif !important;
}
/* ── Top navbar ── */
.navbar, #navbar {
background: var(--color-surface) !important;
border-bottom: 1px solid var(--color-border) !important;
box-shadow: 0 1px 12px rgba(0,0,0,0.4) !important;
}
.navbar .brand svg, .navbar .brand img { filter: brightness(1.2); }
/* ── Sidebar & panels ── */
.repository, .ui.container, .ui.segment,
.ui.card, .ui.cards > .card {
background: var(--color-surface) !important;
border: 1px solid var(--color-border) !important;
border-radius: var(--radius) !important;
}
/* ── Buttons ── */
.ui.primary.button, .ui.green.button {
background: var(--color-primary) !important;
border: none !important;
border-radius: 8px !important;
}
.ui.primary.button:hover { background: var(--color-primary-dark) !important; }
/* ── Inputs ── */
input, textarea, select,
.ui.input > input, .ui.dropdown {
background: var(--color-surface-2) !important;
border: 1px solid var(--color-border) !important;
color: var(--color-text) !important;
border-radius: 8px !important;
}
/* ── Sign-in page ── */
.user.signin .ui.segment,
.user.signup .ui.segment {
background: var(--color-surface) !important;
border: 1px solid var(--color-border) !important;
border-radius: 16px !important;
box-shadow: 0 8px 32px rgba(0,0,0,0.5) !important;
padding: 2.5rem !important;
}
/* ── Repo file tree ── */
.repository.file.list .file-list {
background: var(--color-surface) !important;
border-radius: var(--radius) !important;
border: 1px solid var(--color-border) !important;
}
.repository.file.list .file-list tr:hover td {
background: var(--color-surface-2) !important;
}
/* ── Labels & badges ── */
.ui.label { border-radius: 6px !important; }
/* ── Dashboard activity feed ── */
.feeds .news { border-bottom: 1px solid var(--color-border) !important; }
/* ── Code blocks ── */
pre, code {
background: var(--color-surface-2) !important;
border: 1px solid var(--color-border) !important;
border-radius: 6px !important;
}
/* ── Muted footer ── */
#footer {
background: var(--color-surface) !important;
border-top: 1px solid var(--color-border) !important;
color: var(--color-text-muted) !important;
}
-163
View File
@@ -1,163 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: gitea
namespace: gitea
labels:
app: gitea
spec:
replicas: 1
# Recreate — only one pod can hold the RWO PVC at a time.
# Scale to multiple replicas only after adding shared storage (e.g. Filestore).
strategy:
type: Recreate
selector:
matchLabels:
app: gitea
template:
metadata:
labels:
app: gitea
spec:
serviceAccountName: gitea
containers:
- name: gitea
image: gitea/gitea:1.25.5
ports:
- name: http
containerPort: 3000
- name: ssh
containerPort: 22
env:
# Database — connect through the Cloud SQL Auth Proxy unix socket
- name: GITEA__database__DB_TYPE
value: postgres
# Unix socket path used by the Cloud SQL Auth Proxy sidecar
- name: GITEA__database__HOST
value: /cloudsql/neuron-785695:us-central1:neuron-prod-pg15
- name: GITEA__database__NAME
value: gitea
- name: GITEA__database__USER
value: gitea
- name: GITEA__database__PASSWD
valueFrom:
secretKeyRef:
name: gitea-db
key: password
# Server
- name: GITEA__server__DOMAIN
value: git.neuralplatform.ai
- name: GITEA__server__ROOT_URL
value: https://git.neuralplatform.ai
- name: GITEA__server__SSH_DOMAIN
value: git.neuralplatform.ai
- name: GITEA__server__SSH_PORT
value: "22"
- name: GITEA__server__START_SSH_SERVER
value: "false"
# Service
- name: GITEA__service__DISABLE_REGISTRATION
value: "true"
- name: GITEA__service__REQUIRE_SIGNIN_VIEW
value: "false"
# Security
- name: GITEA__security__INSTALL_LOCK
value: "true"
# Packages
- name: GITEA__packages__ENABLED
value: "true"
# Webhooks — allow calls back into the cluster and GKE VPC
- name: GITEA__webhook__ALLOWED_HOST_LIST
value: 10.0.0.0/8,172.16.0.0/12,192.168.0.0/16
# Actions
- name: GITEA__actions__ENABLED
value: "true"
- name: GITEA__actions__DEFAULT_ACTIONS_URL
value: https://code.forgejo.org
volumeMounts:
- name: data
mountPath: /data
- name: cloudsql-socket
mountPath: /cloudsql
- name: custom-css
mountPath: /data/gitea/custom/public/assets/css/custom.css
subPath: custom.css
- name: custom-css
mountPath: /data/gitea/custom/templates/custom/header.tmpl
subPath: header.tmpl
resources:
requests:
memory: 256Mi
cpu: 100m
limits:
memory: 512Mi
cpu: 500m
readinessProbe:
httpGet:
path: /
port: 3000
initialDelaySeconds: 10
periodSeconds: 10
failureThreshold: 3
livenessProbe:
httpGet:
path: /
port: 3000
initialDelaySeconds: 30
periodSeconds: 15
failureThreshold: 3
# Cloud SQL Auth Proxy sidecar — provides unix socket at /cloudsql/
# Authenticates to Cloud SQL using Workload Identity (no key file).
- name: cloud-sql-proxy
image: gcr.io/cloud-sql-connectors/cloud-sql-proxy:2
args:
- "--structured-logs"
- "--unix-socket=/cloudsql"
- "neuron-785695:us-central1:neuron-prod-pg15"
securityContext:
runAsNonRoot: true
volumeMounts:
- name: cloudsql-socket
mountPath: /cloudsql
resources:
requests:
memory: 32Mi
cpu: 10m
limits:
memory: 128Mi
cpu: 100m
volumes:
- name: data
persistentVolumeClaim:
claimName: gitea-data
- name: cloudsql-socket
emptyDir: {}
- name: custom-css
configMap:
name: gitea-custom-css
---
apiVersion: v1
kind: Service
metadata:
name: gitea
namespace: gitea
annotations:
# External GCP Network LB — Cloudflare proxies in front, provides TLS.
# After provisioning, get the external IP:
# kubectl --context=gke_neuron-785695_us-central1_neuron-platform -n gitea get svc gitea
# Update Cloudflare DNS A record for git.neuralplatform.ai to this IP.
cloud.google.com/load-balancer-type: "External"
spec:
selector:
app: gitea
ports:
- name: http
port: 80
targetPort: 3000
- name: ssh
port: 22
targetPort: 22
type: LoadBalancer
@@ -1,75 +0,0 @@
---
# SecretStore for GKE — uses GCP Secret Manager directly via Workload Identity.
# On GKE we use the GCP provider instead of a Vault-backed store, since
# Vault itself may be in the process of being migrated.
# The Gitea GCP SA has secretmanager.secretAccessor on its own secret (see cloud-sql.tf).
#
# Pre-requisite: install ESO on GKE before applying this:
# helm install external-secrets external-secrets/external-secrets \
# --namespace external-secrets --create-namespace
apiVersion: external-secrets.io/v1
kind: SecretStore
metadata:
name: gcp-secretmanager
namespace: gitea
spec:
provider:
gcpsm:
projectID: neuron-785695
# Workload Identity — ESO impersonates the gitea-gke GCP SA to access
# Secret Manager. The gitea SA has secretAccessor on gitea-db-password.
auth:
workloadIdentity:
clusterLocation: us-central1
clusterName: neuron-platform
serviceAccountRef:
name: gitea
namespace: gitea
---
# gitea-db — Gitea database password, pulled from GCP Secret Manager.
# The Secret Manager secret "gitea-database-url" stores the full DSN, but we
# extract just the password field for use in GITEA__database__PASSWD.
#
# The full DSN format from Terraform:
# host=/cloudsql/<conn> user=gitea password=<pw> dbname=gitea sslmode=disable
#
# ESO extracts the raw secret value. Since GCP Secret Manager stores the full
# DSN as a single string, we store the password separately as "gitea-db-password"
# so Gitea can receive it as a discrete env var.
#
# Bootstrap: after `terraform apply`, run:
# PASSWORD=$(gcloud secrets versions access latest --secret=gitea-database-url \
# | grep -oP '(?<=password=)\S+')
# echo -n "$PASSWORD" | gcloud secrets create gitea-db-password \
# --data-file=- --project=neuron-785695
#
# Or simpler — let Terraform write it directly. The gitea-db-password secret
# is managed by the gitea-database-url secret version output. Use the full DSN
# secret and parse in-pod, or store password separately.
#
# For simplicity: pull the full DSN and use it as GITEA__database__PASSWD
# is wrong (it's a DSN, not a password). Instead, use the Cloud SQL proxy
# unix socket and no password — configure Gitea to use peer auth.
#
# ACTUAL APPROACH: ExternalSecret pulls the full DSN string into a k8s Secret
# key "dsn". A separate gitea-db secret provides just the password field.
# Terraform outputs both; add a gitea-db-password Secret Manager secret in cloud-sql.tf.
apiVersion: external-secrets.io/v1
kind: ExternalSecret
metadata:
name: gitea-db
namespace: gitea
spec:
refreshInterval: 1h
secretStoreRef:
name: gcp-secretmanager
kind: SecretStore
target:
name: gitea-db
creationPolicy: Owner
data:
- secretKey: password
remoteRef:
# This secret is populated by Terraform (gitea-db-password in cloud-sql.tf)
# It contains just the raw database password (no DSN prefix).
key: gitea-db-password
-6
View File
@@ -1,6 +0,0 @@
apiVersion: v1
kind: Namespace
metadata:
name: gitea
labels:
app.kubernetes.io/name: gitea
-14
View File
@@ -1,14 +0,0 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: gitea-data
namespace: gitea
spec:
# standard-rwo = pd-balanced on GKE Autopilot (ReadWriteOnce)
# Use premium-rwo (pd-ssd) if repo performance becomes a bottleneck.
storageClassName: standard-rwo
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 50Gi
-10
View File
@@ -1,10 +0,0 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: gitea
namespace: gitea
annotations:
# Workload Identity — allows the Cloud SQL Auth Proxy sidecar to authenticate
# to Cloud SQL as the gitea GCP SA without a JSON key file.
# The GCP SA binding is in servers/gcp/gke.tf (gitea_workload_identity).
iam.gke.io/gcp-service-account: gitea-gke@neuron-785695.iam.gserviceaccount.com
-6
View File
@@ -1,6 +0,0 @@
apiVersion: v1
kind: Namespace
metadata:
name: vault
labels:
app.kubernetes.io/name: vault
+11 -3
View File
@@ -20,9 +20,17 @@ terraform {
}
}
backend "gcs" {
bucket = "neuron-785695-terraform-state"
prefix = "gcp"
backend "s3" {
bucket = "legion-terraform-state"
key = "gcp/terraform.tfstate"
region = "auto"
endpoint = "https://651161e0a3d321561b4c90b5bcd5f15b.r2.cloudflarestorage.com"
# R2 is S3-compatible but not AWS — skip AWS-specific checks
skip_credentials_validation = true
skip_metadata_api_check = true
skip_region_validation = true
force_path_style = true
}
}
+3 -39
View File
@@ -15,41 +15,9 @@ set -euxo pipefail
exec > >(tee /var/log/runner-bootstrap.log) 2>&1
apt-get update
# Core system tools and C build dependencies required by CI jobs.
# libcurl4-openssl-dev + build-essential are needed by the El compiler C build;
# libssl-dev/libsqlite3-dev/libpq-dev for downstream projects.
# python3 is for the inline label-rewrite step below.
apt-get install -y --no-install-recommends \
curl \
ca-certificates \
docker.io \
git \
jq \
python3 \
wget \
unzip \
zip \
xz-utils \
rsync \
file \
sudo \
make \
build-essential \
pkg-config \
gcc \
libcurl4-openssl-dev \
libssl-dev \
libsqlite3-dev \
libpq-dev \
libffi-dev \
zlib1g-dev
# Node.js 20 LTS via NodeSource — Ubuntu 24.04's bundled nodejs is 18.x
# which works but 20 LTS matches what our other CI images use.
curl -fsSL https://deb.nodesource.com/setup_20.x | bash -
apt-get install -y --no-install-recommends nodejs
npm install -g yarn
# nodejs/npm needed for JavaScript actions like actions/checkout and
# google-github-actions/auth. python3 is for our inline label-rewrite below.
apt-get install -y curl ca-certificates docker.io git jq nodejs npm python3
# Make docker usable by the unprivileged runner user
systemctl enable --now docker
@@ -57,10 +25,6 @@ systemctl enable --now docker
useradd -m -s /bin/bash runner || true
usermod -aG docker runner
# Allow the runner user to install packages and run system commands
# in CI workflow steps without a password prompt.
echo "runner ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
# act_runner — pinned to a known-good release. Bump RUNNER_VERSION when
# upgrading. The project moved from gitea/act_runner to gitea/runner around
# the 0.6.x series; the binary inside the asset is still called act_runner.
+226 -45
View File
@@ -1,49 +1,204 @@
# ── Vault LB — GCP Global HTTPS LB ───────────────────────────────────────────
# ── Vault HA Cluster — GCE-based Raft ────────────────────────────────────────
#
# LB frontend is unchanged: forwarding rule → target HTTPS proxy → url map → backend.
# Backend uses GKE container-native NEGs (cut over from GCE instance groups).
# Three GCE e2-small VMs across us-central1-{a,b,c} running HashiCorp Vault
# in Raft HA mode. Auto-unseal via the existing GCP KMS key (vault-kms.tf).
#
# Architecture (GKE):
# - Vault Helm chart runs 3 pods in namespace: vault on GKE neuron-platform cluster
# - GKE auto-creates zonal container-native NEGs via Service annotation:
# cloud.google.com/neg: '{"exposed_ports":{"8200":{}}}'
# - NEG backends replace GCE instance group backends in the Global HTTPS LB
# - DNS unchanged: vault.neuralplatform.ai → 34.54.164.21 (same GCP Global LB IP)
# Architecture:
# - vault-node-1 (us-central1-a) — bootstrapped first, others join via retry_join
# - vault-node-2 (us-central1-b)
# - vault-node-3 (us-central1-c)
# - Each VM gets the vault-node SA attached for KMS auto-unseal + ADC
# - Internal traffic: VMs talk Raft over port 8201 on GCP internal IPs
# - External access: HTTPS regional LB → port 8200
# (vault.neuralplatform.ai Cloudflare DNS → GCP LB IP)
#
# GCE nodes (vault-node-1/2/3) and their instance groups have been removed.
# GKE Vault (via Workload Identity + vault-unseal SA) is the sole Vault backend.
# Ops SSH:
# gcloud compute ssh vault-node-1 --zone=us-central1-a --tunnel-through-iap
#
# After first boot, initialize Vault on node-1:
# vault operator init (save the root token + recovery keys securely)
# Nodes 2 and 3 auto-join via retry_join once they boot.
locals {
# Container-native NEG names created by GKE from the Vault Service annotation:
# cloud.google.com/neg: '{"exposed_ports":{"8200":{}}}'
# GKE creates one NEG per zone where Vault pods are scheduled.
# Confirmed with: gcloud compute network-endpoint-groups list --project neuron-785695
vault_neg_names = {
"us-central1-b" = "k8s1-bfbeff02-vault-vault-helm-gke-8200-db1a474f"
"us-central1-c" = "k8s1-bfbeff02-vault-vault-helm-gke-8200-db1a474f"
"us-central1-f" = "k8s1-bfbeff02-vault-vault-helm-gke-8200-db1a474f"
vault_version = "1.19.2"
vault_nodes = {
"vault-node-1" = { zone = "us-central1-a", node_id = "vault-node-1" }
"vault-node-2" = { zone = "us-central1-b", node_id = "vault-node-2" }
"vault-node-3" = { zone = "us-central1-c", node_id = "vault-node-3" }
}
}
# ── GKE container-native NEG data sources ────────────────────────────────────
# GKE auto-creates these when the Vault Service annotation is applied.
# ── Service Account for Vault nodes ──────────────────────────────────────────
# Reuses the vault-unseal SA from vault-kms.tf for KMS access.
# Additional roles: logging + metrics from ops agent.
data "google_compute_network_endpoint_group" "vault_gke" {
for_each = local.vault_neg_names
name = each.value
zone = each.key
project = var.project_id
resource "google_project_iam_member" "vault_node_log_writer" {
project = var.project_id
role = "roles/logging.logWriter"
member = "serviceAccount:${google_service_account.vault_unseal.email}"
}
# ── Firewall — GKE health checks ─────────────────────────────────────────────
# Allow GCP health check probers to reach pod IPs on port 8200.
# Container-native NEGs direct health checks to pod IPs (not node IPs).
# GKE Autopilot pod CIDR: 10.45.128.0/22 (from cluster ip_allocation_policy).
# Without this rule, GCP LB health checks fail → "no healthy upstream".
# No target_tags — applies to all instances/pods in the default network.
# Safe: GCP health check ranges (130.211.0.0/22, 35.191.0.0/16) are GCP-internal only.
resource "google_compute_firewall" "vault_api_gke" {
name = "vault-api-from-lb-gke"
resource "google_project_iam_member" "vault_node_metric_writer" {
project = var.project_id
role = "roles/monitoring.metricWriter"
member = "serviceAccount:${google_service_account.vault_unseal.email}"
}
# Allow Vault nodes to read their own instance metadata (needed for GCP auth method later)
resource "google_project_iam_member" "vault_node_compute_viewer" {
project = var.project_id
role = "roles/compute.viewer"
member = "serviceAccount:${google_service_account.vault_unseal.email}"
}
# ── Startup script staged in GCS ─────────────────────────────────────────────
# Stored in the existing runner-assets bucket (reuse infrastructure).
# The script installs Vault, writes the config, and starts the systemd unit.
resource "google_storage_bucket_object" "vault_startup" {
name = "vault/startup.sh"
bucket = google_storage_bucket.runner_assets.name
source = "${path.module}/vault/startup.sh"
metadata = {
sha256 = filesha256("${path.module}/vault/startup.sh")
}
}
resource "google_storage_bucket_iam_member" "vault_node_bucket_read" {
bucket = google_storage_bucket.runner_assets.name
role = "roles/storage.objectViewer"
member = "serviceAccount:${google_service_account.vault_unseal.email}"
}
# ── Vault node VMs ────────────────────────────────────────────────────────────
resource "google_compute_instance" "vault_node" {
for_each = local.vault_nodes
name = each.key
machine_type = "e2-small"
zone = each.value.zone
project = var.project_id
tags = ["vault-node", "allow-iap-ssh"]
boot_disk {
initialize_params {
image = "projects/debian-cloud/global/images/family/debian-12"
size = 20
type = "pd-balanced"
}
}
# Separate persistent disk for Raft data — survives VM recreation
attached_disk {
source = google_compute_disk.vault_data[each.key].self_link
device_name = "vault-data"
mode = "READ_WRITE"
}
network_interface {
network = "default"
# No external IP — accessed via IAP SSH and the internal LB
# Vault API is published externally via the regional LB below
access_config {}
}
service_account {
email = google_service_account.vault_unseal.email
scopes = ["cloud-platform"]
}
metadata = {
# Pull and execute the real startup script from GCS
startup-script = <<-EOT
#!/usr/bin/env bash
set -euxo pipefail
apt-get update -y
apt-get install -y curl ca-certificates apt-transport-https gnupg google-cloud-cli
gsutil cat gs://${google_storage_bucket.runner_assets.name}/${google_storage_bucket_object.vault_startup.name} \
> /tmp/vault-startup.sh
chmod +x /tmp/vault-startup.sh
VAULT_NODE_ID="${each.value.node_id}" \
VAULT_VERSION="${local.vault_version}" \
/tmp/vault-startup.sh
EOT
enable-oslogin = "TRUE"
}
allow_stopping_for_update = true
depends_on = [
google_compute_disk.vault_data,
google_storage_bucket_object.vault_startup,
google_storage_bucket_iam_member.vault_node_bucket_read,
]
}
# ── Persistent data disks ─────────────────────────────────────────────────────
# 10 GiB per node. Kept separate from the boot disk so Raft data
# survives a full VM deletion and recreation.
resource "google_compute_disk" "vault_data" {
for_each = local.vault_nodes
name = "${each.key}-data"
zone = each.value.zone
project = var.project_id
type = "pd-ssd"
size = 10
labels = {
managed-by = "terraform"
service = "vault"
node = each.key
}
lifecycle {
prevent_destroy = true
}
}
# ── Firewall rules ────────────────────────────────────────────────────────────
# IAP SSH — ops access without a public SSH port
resource "google_compute_firewall" "vault_iap_ssh" {
name = "vault-iap-ssh"
network = "default"
project = var.project_id
direction = "INGRESS"
priority = 1000
allow {
protocol = "tcp"
ports = ["22"]
}
source_ranges = ["35.235.240.0/20"] # GCP IAP CIDR
target_tags = ["vault-node"]
}
# Raft cluster traffic — node-to-node port 8201 (internal only)
resource "google_compute_firewall" "vault_raft" {
name = "vault-raft-internal"
network = "default"
project = var.project_id
direction = "INGRESS"
priority = 1000
allow {
protocol = "tcp"
ports = ["8201"]
}
source_tags = ["vault-node"]
target_tags = ["vault-node"]
}
# Vault API — allow from the GCP health check ranges and the LB
resource "google_compute_firewall" "vault_api" {
name = "vault-api-from-lb"
network = "default"
project = var.project_id
direction = "INGRESS"
@@ -54,15 +209,35 @@ resource "google_compute_firewall" "vault_api_gke" {
ports = ["8200"]
}
# GCP health check source ranges only — restricted port, low risk
source_ranges = ["130.211.0.0/22", "35.191.0.0/16"]
# GCP health check ranges (130.211.0.0/22, 35.191.0.0/16)
# and RFC1918 for any internal service access
source_ranges = ["130.211.0.0/22", "35.191.0.0/16", "10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16"]
target_tags = ["vault-node"]
}
# ── Instance Groups (one unmanaged group per zone for the LB) ─────────────────
resource "google_compute_instance_group" "vault" {
for_each = local.vault_nodes
name = "vault-${each.key}"
zone = each.value.zone
project = var.project_id
instances = [google_compute_instance.vault_node[each.key].self_link]
named_port {
name = "vault-api"
port = 8200
}
}
# ── Regional HTTPS LB for vault.neuralplatform.ai ─────────────────────────────
# Global external HTTPS LB with a Google-managed cert for vault.neuralplatform.ai.
# We use a global external HTTPS LB (same scheme as the prod marketing LB)
# so we can attach a Google-managed cert for vault.neuralplatform.ai.
#
# TLS is terminated at the LB. Vault listens on plain 8200 internally.
# Cloudflare DNS A record for vault.neuralplatform.ai → vault_lb_ip output below.
# The Cloudflare DNS A record for vault.neuralplatform.ai (neuralplatform zone)
# must point to vault_lb_ip output below — add it in Cloudflare dashboard
# or in the legion Terraform if you bring that zone under TF management.
resource "google_compute_global_address" "vault" {
name = "vault-ip"
@@ -100,17 +275,16 @@ resource "google_compute_backend_service" "vault" {
load_balancing_scheme = "EXTERNAL_MANAGED"
protocol = "HTTP" # Vault serves plain HTTP; TLS terminates at the LB
timeout_sec = 30
port_name = "vault-api"
health_checks = [google_compute_health_check.vault.self_link]
# GKE container-native NEG backends — one per zone where Vault pods are scheduled.
# RATE balancing mode is required for NEGs with EXTERNAL_MANAGED load balancers.
dynamic "backend" {
for_each = data.google_compute_network_endpoint_group.vault_gke
for_each = local.vault_nodes
content {
group = backend.value.self_link
balancing_mode = "RATE"
max_rate_per_endpoint = 100
group = google_compute_instance_group.vault[backend.key].self_link
balancing_mode = "UTILIZATION"
max_utilization = 0.8
}
}
@@ -174,3 +348,10 @@ output "vault_lb_ip" {
description = "Global IP for vault.neuralplatform.ai — set as DNS A record in Cloudflare (neuralplatform.ai zone)"
value = google_compute_global_address.vault.address
}
output "vault_node_zones" {
description = "Zone placement of each Vault node"
value = {
for k, v in local.vault_nodes : k => v.zone
}
}
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/backup
destination:
+2 -2
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/github-runner
destination:
@@ -27,7 +27,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/gitea-runner
destination:
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: neuron-prod
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/neuron-technologies/dharma
destination:
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/external-secrets
destination:
+3 -3
View File
@@ -7,7 +7,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/fornax/coordinator
destination:
@@ -29,7 +29,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/fornax/ui
destination:
@@ -51,7 +51,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/fornax/grafana
destination:
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/gitea
destination:
+165
View File
@@ -0,0 +1,165 @@
---
# Gitea CI runner — general-purpose (legion)
# Uses host Docker socket for container management and docker build/push.
apiVersion: apps/v1
kind: Deployment
metadata:
name: gitea-runner
namespace: ci
labels:
app: gitea-runner
spec:
replicas: 1
selector:
matchLabels:
app: gitea-runner
template:
metadata:
labels:
app: gitea-runner
annotations:
config-version: "2026-04-27-containerd-sock"
spec:
securityContext:
runAsNonRoot: false # act_runner needs root for container management
initContainers:
- name: register
image: registry.neuralplatform.ai/ci-base:latest
workingDir: /data
command: ["/bin/sh", "-c"]
args:
- |
act_runner register \
--instance "$GITEA_INSTANCE_URL" \
--token "$GITEA_RUNNER_REGISTRATION_TOKEN" \
--name legion \
--labels "self-hosted:docker://registry.neuralplatform.ai/ci-base:latest,ubuntu-latest:docker://registry.neuralplatform.ai/ci-base:latest,ubuntu-24.04:docker://registry.neuralplatform.ai/ci-base:latest,linux,x64" \
--no-interactive
cat > /data/config.yaml << 'EOF'
runner:
capacity: 2
timeout: 3h
container:
network: host
docker_host: "unix:///run/k3s/containerd/containerd.sock"
force_pull: false
valid_volumes: []
default_image: "registry.neuralplatform.ai/ci-base:latest"
extra_hosts:
- "gitea.git.svc.cluster.local:10.43.1.53"
EOF
envFrom:
- secretRef:
name: gitea-runner-secret
volumeMounts:
- name: data
mountPath: /data
containers:
- name: runner
image: registry.neuralplatform.ai/ci-base:latest
workingDir: /data
command: ["act_runner", "daemon", "--config", "/data/config.yaml"]
envFrom:
- secretRef:
name: gitea-runner-secret
volumeMounts:
- name: data
mountPath: /data
- name: docker-sock
mountPath: /var/run/docker.sock
resources:
requests:
memory: 512Mi
cpu: 250m
limits:
memory: 4Gi
cpu: "4"
volumes:
- name: data
emptyDir: {}
- name: docker-sock
hostPath:
path: /run/k3s/containerd/containerd.sock
type: Socket
---
# Neuron Technologies CI runner
apiVersion: apps/v1
kind: Deployment
metadata:
name: neuron-technologies-runner
namespace: ci
labels:
app: neuron-technologies-runner
spec:
replicas: 1
selector:
matchLabels:
app: neuron-technologies-runner
template:
metadata:
labels:
app: neuron-technologies-runner
annotations:
config-version: "2026-04-27-containerd-sock"
spec:
securityContext:
runAsNonRoot: false
initContainers:
- name: register
image: registry.neuralplatform.ai/ci-base:latest
workingDir: /data
command: ["/bin/sh", "-c"]
args:
- |
act_runner register \
--instance "$GITEA_INSTANCE_URL" \
--token "$GITEA_RUNNER_REGISTRATION_TOKEN" \
--name neuron-technologies \
--labels "self-hosted:docker://registry.neuralplatform.ai/ci-base:latest,ubuntu-latest:docker://registry.neuralplatform.ai/ci-base:latest,ubuntu-24.04:docker://registry.neuralplatform.ai/ci-base:latest,linux,x64" \
--no-interactive
cat > /data/config.yaml << 'EOF'
runner:
capacity: 2
timeout: 3h
container:
network: host
docker_host: "unix:///run/k3s/containerd/containerd.sock"
force_pull: false
valid_volumes: []
default_image: "registry.neuralplatform.ai/ci-base:latest"
extra_hosts:
- "gitea.git.svc.cluster.local:10.43.1.53"
EOF
envFrom:
- secretRef:
name: neuron-technologies-runner-secret
volumeMounts:
- name: data
mountPath: /data
containers:
- name: runner
image: registry.neuralplatform.ai/ci-base:latest
workingDir: /data
command: ["act_runner", "daemon", "--config", "/data/config.yaml"]
envFrom:
- secretRef:
name: neuron-technologies-runner-secret
volumeMounts:
- name: data
mountPath: /data
- name: docker-sock
mountPath: /var/run/docker.sock
resources:
requests:
memory: 512Mi
cpu: 250m
limits:
memory: 4Gi
cpu: "4"
volumes:
- name: data
emptyDir: {}
- name: docker-sock
hostPath:
path: /run/k3s/containerd/containerd.sock
type: Socket
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
labels:
app: gitea
spec:
replicas: 0 # Scaled down — Gitea has been migrated to GKE. DNS now points at GKE LB.
replicas: 1
strategy:
type: Recreate
selector:
-22
View File
@@ -1,22 +0,0 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: gke-apps
namespace: argocd
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
targetRevision: main
path: servers/gcp/k8s/argocd-apps
destination:
# This Application lives on Legion Argo CD — it creates child Applications
# there that target the GKE cluster.
server: https://kubernetes.default.svc
namespace: argocd
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/monitoring
destination:
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/mudcraft
destination:
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/neuron
destination:
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/neuron-technologies/daemon
destination:
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/neuron-technologies/dev
destination:
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/neuron-technologies/marketing
destination:
+1 -1
View File
@@ -15,7 +15,7 @@ spec:
# Only allow deployments sourced from the infrastructure repo.
sourceRepos:
- "https://git.neuralplatform.ai/will/infrastructure.git"
- "http://gitea.git.svc.cluster.local:3000/will/infrastructure.git"
- "https://code.forgejo.org"
- "registry.neuralplatform.ai"
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/neuron-technologies/prod
destination:
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/neuron-technologies/stage
destination:
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/neuron-technologies/swarm
destination:
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/neuron-technologies/tim
destination:
+1 -1
View File
@@ -12,7 +12,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/neuron-technologies/web
destination:
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/ollama
destination:
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/packages
destination:
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/postgres
destination:
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/registry
destination:
+1 -1
View File
@@ -6,7 +6,7 @@ metadata:
spec:
project: default
source:
repoURL: https://git.neuralplatform.ai/will/infrastructure.git
repoURL: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
targetRevision: main
path: servers/legion/k8s/vault
destination:
+3 -3
View File
@@ -36,7 +36,7 @@ resource "helm_release" "argocd" {
server.insecure: true # TLS terminated at Traefik
repositories:
gitea-infrastructure:
url: https://git.neuralplatform.ai/will/infrastructure.git
url: http://gitea.git.svc.cluster.local:3000/will/infrastructure.git
name: infrastructure
type: git
YAML
@@ -96,7 +96,7 @@ resource "kubernetes_secret" "argocd_gitea_repo" {
data = {
type = "git"
url = "https://git.neuralplatform.ai/will/infrastructure.git"
url = "http://gitea.git.svc.cluster.local:3000/will/infrastructure.git"
username = "will"
password = var.gitea_api_token
}
@@ -136,7 +136,7 @@ resource "kubernetes_manifest" "argocd_root_app" {
spec = {
project = "default"
source = {
repoURL = "https://git.neuralplatform.ai/will/infrastructure.git"
repoURL = "http://gitea.git.svc.cluster.local:3000/will/infrastructure.git"
targetRevision = "main"
path = "servers/legion/apps"
}
-28
View File
@@ -23,31 +23,3 @@ resource "cloudflare_record" "np_web_stage" {
proxied = true
ttl = 1
}
# vault.neuralplatform.ai — GCP Global HTTPS LB with managed cert.
# DNS-only (not proxied) — GCP managed TLS cert terminates at the LB.
# Backend is now GKE container-native NEGs (cut over from GCE instance groups).
# IP: terraform output vault_lb_ip from servers/gcp workspace = 34.54.164.21
resource "cloudflare_record" "np_vault" {
zone_id = local.zone_neuralplatform_ai
name = "vault"
type = "A"
content = "34.54.164.21"
proxied = false
ttl = 60
}
# git.neuralplatform.ai — Gitea on GKE via GCP Network LB.
# Cloudflare proxied (provides TLS termination). Gitea listens on port 80 internally.
# IP: external IP of the GKE LoadBalancer service in namespace gitea.
# Get it with:
# kubectl --context=gke_neuron-785695_us-central1_neuron-platform -n gitea get svc gitea
# Update content below with the actual IP, then terraform apply.
resource "cloudflare_record" "np_gitea" {
zone_id = local.zone_neuralplatform_ai
name = "git"
type = "A"
content = "34.31.145.131" # GKE LoadBalancer external IP — gitea svc in namespace gitea
proxied = true
ttl = 1
}
-42
View File
@@ -1,42 +0,0 @@
# Cloudflare Zero Trust Access — git.neuralplatform.ai (Gitea)
#
# The Gitea Access application itself is currently managed in the Cloudflare
# dashboard, NOT in Terraform. This file only manages the *service token* the
# Gitea Actions runners use to authenticate through CF Access while still
# keeping the human Google-OAuth gate for browser users.
#
# Why not import the application here?
# - Importing the existing dashboard app risks drifting the human-auth
# policy (Google IdP, allowed emails) which is settled and working.
# - Service tokens can be added to a dashboard-managed app without
# importing the app itself; the token resource lives at the account
# level and is referenced from a policy.
# - We pay only the cost we need to. If we later want all Access apps
# in TF we can do a focused import pass.
#
# After `terraform apply` produces the token id/secret, Will must:
# 1. Run `vault kv put secret/gitea-runner-cf-access ...` (see outputs).
# 2. In the Cloudflare dashboard, edit the existing "Gitea" Access
# application's policies and add a new policy:
# Action: Service Auth (decision = non_identity)
# Include: Service Token = "gitea-runner"
# This grants the service token bypass through CF Access on
# git.neuralplatform.ai without changing the human-auth flow.
resource "cloudflare_zero_trust_access_service_token" "gitea_runner" {
account_id = var.cloudflare_account_id
name = "gitea-runner"
# Default duration is "8760h" (1 year). Rotate via re-apply when needed.
duration = "forever"
}
output "gitea_runner_cf_access_client_id" {
description = "CF Access service token client ID for the Gitea Actions runner. Store in Vault at secret/gitea-runner-cf-access."
value = cloudflare_zero_trust_access_service_token.gitea_runner.client_id
}
output "gitea_runner_cf_access_client_secret" {
description = "CF Access service token client secret. Store in Vault at secret/gitea-runner-cf-access. Only emitted at creation time."
value = cloudflare_zero_trust_access_service_token.gitea_runner.client_secret
sensitive = true
}
+1 -29
View File
@@ -6,7 +6,7 @@ ENV DEBIAN_FRONTEND=noninteractive
COPY --from=runner-bin /bin/forgejo-runner /usr/local/bin/act_runner
# Core system tools + C build deps needed by El compiler and other CI jobs
# Core system tools
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
curl \
@@ -27,19 +27,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
pkg-config \
software-properties-common \
zstd \
sudo \
libcurl4-openssl-dev \
libssl-dev \
libsqlite3-dev \
libpq-dev \
libffi-dev \
zlib1g-dev \
dpkg-dev \
&& rm -rf /var/lib/apt/lists/*
# Allow any user to run sudo without a password (CI containers need apt-get etc.)
RUN echo "ALL ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
# Node.js 20 LTS via binary tarball (nodesource apt repo is unreliable on Ubuntu 24.04)
RUN NODE_VERSION=20.19.1 \
&& curl -fsSL "https://nodejs.org/dist/v${NODE_VERSION}/node-v${NODE_VERSION}-linux-x64.tar.xz" \
@@ -96,20 +85,3 @@ RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \
&& apt-get update \
&& apt-get install -y --no-install-recommends gh \
&& rm -rf /var/lib/apt/lists/*
# Cloudflare Access bootstrap for git clones to git.neuralplatform.ai.
# This script is sourced by bash in build containers via BASH_ENV (set by
# act_runner's container.env in deployment.yaml) so it runs before every
# step. It configures git insteadOf + CF Access extraHeaders from
# CF_ACCESS_CLIENT_ID / CF_ACCESS_CLIENT_SECRET env vars.
#
# We deliberately don't set ENTRYPOINT / CMD here — act_runner spawns
# build containers with its own entrypoint to keep them alive between
# steps, and overriding it breaks job execution.
# SSH-based git clone init script.
# Sourced before every CI step via BASH_ENV (set in deployment.yaml).
# Writes GITEA_SSH_PRIVATE_KEY to ~/.ssh/gitea_key and rewrites HTTPS
# Gitea URLs to SSH so actions/checkout and git clone both use SSH auth.
COPY git-ssh-init.sh /usr/local/bin/git-ssh-init.sh
RUN chmod +x /usr/local/bin/git-ssh-init.sh
+10 -24
View File
@@ -8,7 +8,7 @@ metadata:
labels:
app: gitea-runner
annotations:
config-version: "2026-05-05-extra-hosts"
config-version: "2026-05-04-docker-sock-fix"
spec:
replicas: 1
selector:
@@ -19,7 +19,7 @@ spec:
labels:
app: gitea-runner
annotations:
config-version: "2026-05-05-extra-hosts"
config-version: "2026-05-04-docker-sock-fix"
spec:
securityContext:
runAsNonRoot: false
@@ -35,25 +35,18 @@ spec:
--name legion \
--labels "self-hosted:docker://registry.neuralplatform.ai/ci-base:latest,ubuntu-latest:docker://registry.neuralplatform.ai/ci-base:latest,ubuntu-24.04:docker://registry.neuralplatform.ai/ci-base:latest,linux,x64" \
--no-interactive
cat > /data/config.yaml << EOF
cat > /data/config.yaml << 'EOF'
runner:
capacity: 2
timeout: 3h
container:
network: host
docker_host: "unix:///var/run/docker.sock"
force_pull: true
force_pull: false
valid_volumes: []
default_image: "registry.neuralplatform.ai/ci-base:latest"
env:
GITEA_SSH_PRIVATE_KEY: "${GITEA_SSH_PRIVATE_KEY}"
BASH_ENV: "/usr/local/bin/git-ssh-init.sh"
# Build containers run with network: host, so k8s DNS names
# don't resolve. Inject the gitea-proxy ClusterIP so that
# actions/checkout can reach gitea-proxy.ci.svc.cluster.local
# (the URL Gitea passes as GITHUB_SERVER_URL to the runner).
extra_hosts:
- "gitea-proxy.ci.svc.cluster.local:10.43.88.7"
- "gitea.git.svc.cluster.local:10.43.1.53"
EOF
envFrom:
- secretRef:
@@ -99,7 +92,7 @@ metadata:
labels:
app: neuron-technologies-runner
annotations:
config-version: "2026-05-05-extra-hosts"
config-version: "2026-05-04-docker-sock-fix"
spec:
replicas: 2
selector:
@@ -110,7 +103,7 @@ spec:
labels:
app: neuron-technologies-runner
annotations:
config-version: "2026-05-05-extra-hosts"
config-version: "2026-05-04-docker-sock-fix"
spec:
securityContext:
runAsNonRoot: false
@@ -126,25 +119,18 @@ spec:
--name "legion-nt-$(hostname)" \
--labels "self-hosted:docker://registry.neuralplatform.ai/ci-base:latest,ubuntu-latest:docker://registry.neuralplatform.ai/ci-base:latest,ubuntu-24.04:docker://registry.neuralplatform.ai/ci-base:latest,linux,x64" \
--no-interactive
cat > /data/config.yaml << EOF
cat > /data/config.yaml << 'EOF'
runner:
capacity: 2
timeout: 3h
container:
network: host
docker_host: "unix:///var/run/docker.sock"
force_pull: true
force_pull: false
valid_volumes: []
default_image: "registry.neuralplatform.ai/ci-base:latest"
# Build containers run with network: host, so k8s DNS names
# don't resolve. Inject the gitea-proxy ClusterIP so that
# actions/checkout can reach gitea-proxy.ci.svc.cluster.local
# (the URL Gitea passes as GITHUB_SERVER_URL to the runner).
env:
GITEA_SSH_PRIVATE_KEY: "${GITEA_SSH_PRIVATE_KEY}"
BASH_ENV: "/usr/local/bin/git-ssh-init.sh"
extra_hosts:
- "gitea-proxy.ci.svc.cluster.local:10.43.88.7"
- "gitea.git.svc.cluster.local:10.43.1.53"
EOF
envFrom:
- secretRef:
@@ -1,19 +1,12 @@
---
# gitea-runner-secret — neural-platform org runner token
#
# GITEA_INSTANCE_URL: act_runner daemon polls git.neuralplatform.ai via HTTPS.
# GITEA_SSH_PRIVATE_KEY: ED25519 deploy key for SSH-based git clones inside
# build containers. git-ssh-init.sh (sourced via BASH_ENV) writes this key
# to ~/.ssh/gitea_key and rewrites HTTPS Gitea URLs to SSH so that
# actions/checkout and plain `git clone` both use SSH auth without any
# HTTPS credential or CF Access token.
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: gitea-runner-secret
namespace: ci
annotations:
force-sync: "2026-05-05-gitea-proxy"
force-sync: "2026-04-23"
spec:
refreshInterval: 1h
secretStoreRef:
@@ -24,18 +17,13 @@ spec:
creationPolicy: Owner
template:
data:
GITEA_INSTANCE_URL: "http://gitea-proxy.ci.svc.cluster.local:3000"
GITEA_INSTANCE_URL: "http://gitea.git.svc.cluster.local:3000"
GITEA_RUNNER_REGISTRATION_TOKEN: "{{ .runner_token }}"
GITEA_SSH_PRIVATE_KEY: "{{ .ci_ssh_private_key }}"
data:
- secretKey: runner_token
remoteRef:
key: secret/data/gitea
property: runner_token
- secretKey: ci_ssh_private_key
remoteRef:
key: secret/data/gitea
property: ci_ssh_private_key
---
# neuron-technologies-runner-secret — neuron-technologies org runner token
apiVersion: external-secrets.io/v1beta1
@@ -43,8 +31,6 @@ kind: ExternalSecret
metadata:
name: neuron-technologies-runner-secret
namespace: ci
annotations:
force-sync: "2026-05-05-gitea-proxy"
spec:
refreshInterval: 1h
secretStoreRef:
@@ -55,15 +41,10 @@ spec:
creationPolicy: Owner
template:
data:
GITEA_INSTANCE_URL: "http://gitea-proxy.ci.svc.cluster.local:3000"
GITEA_INSTANCE_URL: "http://gitea.git.svc.cluster.local:3000"
GITEA_RUNNER_REGISTRATION_TOKEN: "{{ .runner_token }}"
GITEA_SSH_PRIVATE_KEY: "{{ .ci_ssh_private_key }}"
data:
- secretKey: runner_token
remoteRef:
key: secret/data/gitea
property: neuron_technologies_runner_token
- secretKey: ci_ssh_private_key
remoteRef:
key: secret/data/gitea
property: ci_ssh_private_key
@@ -1,50 +0,0 @@
#!/bin/sh
# git-cf-access-init.sh
#
# Configures git so any clone/fetch from Gitea ends up going to
# git.neuralplatform.ai with the runner's Cloudflare Access service-token
# headers attached.
#
# How this gets invoked:
# The forgejo-runner job execution path runs each step via a
# non-interactive bash invocation inside the build container. Setting
# BASH_ENV=/usr/local/bin/git-cf-access-init.sh in act_runner's
# container.env causes bash to source this script before any step's
# commands run. (See servers/legion/k8s/gitea-runner/deployment.yaml.)
#
# What it does:
# 1. Rewrites http://gitea.git.svc.cluster.local:3000/ → https://git.neuralplatform.ai/
# via insteadOf. The runner registered against the in-cluster URL (no
# CF Access on the daemon's polling loop), so act_runner advertises
# that URL to the build container as github.server_url. Build
# containers run with network: host and can't resolve
# *.svc.cluster.local, so we need to redirect to the public URL.
# 2. Adds the CF Access service-token headers to outbound requests to
# git.neuralplatform.ai so the clone authenticates through CF Access.
#
# Idempotent — re-runs replace any prior config keys without accumulating
# duplicate header entries.
#
# Known limitation: actions/checkout sets an Authorization extraheader
# keyed to the server URL it was given (the in-cluster URL). After
# insteadOf substitution the request goes to the public URL where git
# matches http.<public>.extraheader, and the in-cluster-keyed
# Authorization header is dropped. For public repos this is fine. For
# private repos the per-job token will not be sent — see the PR
# description for the follow-up plan if dharma-el's CI needs that token.
if [ -n "${CF_ACCESS_CLIENT_ID:-}" ] && [ -n "${CF_ACCESS_CLIENT_SECRET:-}" ]; then
git config --global --replace-all \
url."https://git.neuralplatform.ai/".insteadOf \
"http://gitea.git.svc.cluster.local:3000/" 2>/dev/null || true
# Reset extraHeader on the public URL, then add both CF Access headers.
git config --global --unset-all \
http."https://git.neuralplatform.ai/".extraHeader 2>/dev/null || true
git config --global --add \
http."https://git.neuralplatform.ai/".extraHeader \
"CF-Access-Client-Id: ${CF_ACCESS_CLIENT_ID}" 2>/dev/null || true
git config --global --add \
http."https://git.neuralplatform.ai/".extraHeader \
"CF-Access-Client-Secret: ${CF_ACCESS_CLIENT_SECRET}" 2>/dev/null || true
fi
@@ -1,48 +0,0 @@
#!/bin/sh
# git-ssh-init.sh
#
# Sets up SSH authentication for git operations inside CI build containers.
#
# How this gets invoked:
# act_runner runs each step via a non-interactive bash invocation.
# Setting BASH_ENV=/usr/local/bin/git-ssh-init.sh in act_runner's
# container.env causes bash to source this before any step's commands.
# (See servers/legion/k8s/gitea-runner/deployment.yaml.)
#
# What it does:
# 1. Writes GITEA_SSH_PRIVATE_KEY (from the runner secret) to ~/.ssh/gitea_key
# 2. Creates an ~/.ssh/config entry so git uses that key for git.neuralplatform.ai
# 3. Sets a git insteadOf rule to rewrite HTTPS Gitea URLs to SSH,
# so `actions/checkout` and any direct `git clone https://...` also use SSH
#
# Idempotent — safe to re-run on every step.
if [ -n "${GITEA_SSH_PRIVATE_KEY:-}" ]; then
mkdir -p ~/.ssh
chmod 700 ~/.ssh
# Write the private key
printf '%s\n' "${GITEA_SSH_PRIVATE_KEY}" > ~/.ssh/gitea_key
chmod 600 ~/.ssh/gitea_key
# SSH config — use this key for git.neuralplatform.ai, skip host key checking
# (build containers are ephemeral; we don't persist a known_hosts file)
cat > ~/.ssh/gitea_config << 'EOF'
Host git.neuralplatform.ai
HostName git.neuralplatform.ai
User git
IdentityFile ~/.ssh/gitea_key
StrictHostKeyChecking no
UserKnownHostsFile /dev/null
EOF
chmod 600 ~/.ssh/gitea_config
# Point SSH at our per-job config (merge with any existing config if present)
export GIT_SSH_COMMAND="ssh -F ~/.ssh/gitea_config"
# NOTE: Do NOT add url.insteadOf SSH rewrite here.
# Gitea's built-in SSH server is disabled (START_SSH_SERVER=false) so
# SSH git clones would fail. HTTPS git operations work directly from
# the build container host network — Cloudflare bypasses the CF Access
# gate for git smart-HTTP and release asset paths.
fi
@@ -1,100 +0,0 @@
---
# gitea-proxy — plain nginx proxy to GKE Gitea LB IP
#
# Routes directly to the GKE Network LB IP, bypassing Cloudflare entirely.
# No CF Access service-token auth needed. The runners can't add custom HTTP
# headers, so going through Cloudflare Access is not viable for the gRPC
# Actions ping endpoint (/api/actions/*).
#
# Runners use:
# GITEA_INSTANCE_URL: http://gitea-proxy.ci.svc.cluster.local:3000
#
# The GKE Gitea LB IP (34.31.145.131) is the external IP of the
# gitea Service in the gitea namespace on the GKE cluster.
apiVersion: v1
kind: ConfigMap
metadata:
name: gitea-proxy-config
namespace: ci
data:
default.conf: |
server {
listen 3000;
location / {
# Direct to GKE Gitea LB IP — bypasses Cloudflare/CF Access entirely.
proxy_pass http://34.31.145.131;
# Tell Gitea which hostname it's being served as so self-referencing
# URLs (clone URLs, webhook URLs) are generated correctly.
proxy_set_header Host git.neuralplatform.ai;
# Forward all original request headers (auth tokens, etc).
proxy_pass_request_headers on;
# Rewrite Location headers in redirects back to the proxy URL.
proxy_redirect http://34.31.145.131/
http://gitea-proxy.ci.svc.cluster.local:3000/;
# Standard proxy headers.
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto http;
# Allow large request bodies (git push payloads).
client_max_body_size 512m;
}
}
---
# Deployment
apiVersion: apps/v1
kind: Deployment
metadata:
name: gitea-proxy
namespace: ci
labels:
app: gitea-proxy
spec:
replicas: 1
selector:
matchLabels:
app: gitea-proxy
template:
metadata:
labels:
app: gitea-proxy
annotations:
config-version: "2026-05-05-direct-lb"
spec:
containers:
- name: nginx
image: nginx:alpine
ports:
- containerPort: 3000
volumeMounts:
- name: config
mountPath: /etc/nginx/conf.d
resources:
requests:
cpu: 50m
memory: 32Mi
limits:
cpu: 200m
memory: 64Mi
volumes:
- name: config
configMap:
name: gitea-proxy-config
---
# Service — ClusterIP reachable by all runner pods as gitea-proxy.ci:3000
apiVersion: v1
kind: Service
metadata:
name: gitea-proxy
namespace: ci
spec:
selector:
app: gitea-proxy
ports:
- port: 3000
targetPort: 3000
@@ -82,12 +82,7 @@ spec:
apiVersion: apps/v1
kind: Deployment
name: neuron-marketing
# minReplicas=1 to match the file's own convention (see header comment).
# Kubernetes only allows minReplicas=0 when at least one Object or External
# metric is configured (queue depth, custom signal, etc.); with only a
# Resource (CPU) metric, scale-to-zero is rejected and the whole HPA is
# invalid — which was blocking neuron-prod's Argo CD sync.
minReplicas: 1
minReplicas: 0
maxReplicas: 8
metrics:
- type: Resource
@@ -117,32 +117,6 @@ spec:
matchLabels:
kubernetes.io/metadata.name: neuron-prod
---
# ── dharma: accept from Traefik (kube-system) and neuron-prod namespace ──────
# The dharma pod was healthy and the IngressRoute was correct, but cross-
# namespace ingress from kube-system (Traefik) was denied by default-deny-all,
# so every external request landed at Traefik and bounced back as 502. This
# allow rule mirrors `allow-mcp-ingress` and brings dharma into line with the
# other neuron-prod services.
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: allow-dharma-ingress
namespace: neuron-prod
spec:
podSelector:
matchLabels:
app: dharma
policyTypes:
- Ingress
ingress:
- from:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: kube-system
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: neuron-prod
---
# ── Egress: all prod pods may reach platform (postgres/redis), vault,
# monitoring (alloy OTLP), kube-dns, and the internet (external APIs) ─
apiVersion: networking.k8s.io/v1
@@ -8,7 +8,7 @@
# routes still go to the Cloud Run prod URL until we cut over.
#
# Vault paths used:
# secret/neuron-technologies/anthropic api_key, web_demo_key
# secret/neuron-technologies/anthropic api_key
# secret/neuron-technologies/supabase anon_key, service_role_key, project_url
# secret/neuron-technologies/marketing-test stripe_*, etc.
# secret/neuron-technologies/notifications resend_api_key
@@ -38,7 +38,7 @@ spec:
- secretKey: NEURON_LLM_0_KEY
remoteRef:
key: secret/data/neuron-technologies/anthropic
property: web_demo_key
property: api_key
- secretKey: SUPABASE_ANON_KEY
remoteRef:
key: secret/data/neuron-technologies/supabase
+38 -8
View File
@@ -92,11 +92,37 @@ resource "cloudflare_zero_trust_tunnel_cloudflared_config" "legion" {
}
}
# vault.neuralplatform.ai — moved to GCP Global HTTPS LB (34.54.164.21)
# DNS is now a direct A record (not proxied) in dns-neuralplatform.tf
ingress_rule {
hostname = "vault.neuralplatform.ai"
service = "https://traefik.kube-system.svc:443"
origin_request {
no_tls_verify = true
}
}
# watch.nook.family, jellyfin.nook.family, bazarr.nook.family — removed
# This infrastructure is focused on Neuron; nook.family media stack retired
ingress_rule {
hostname = "watch.nook.family"
service = "https://traefik.kube-system.svc:443"
origin_request {
no_tls_verify = true
}
}
ingress_rule {
hostname = "jellyfin.nook.family"
service = "https://traefik.kube-system.svc:443"
origin_request {
no_tls_verify = true
}
}
ingress_rule {
hostname = "bazarr.nook.family"
service = "https://traefik.kube-system.svc:443"
origin_request {
no_tls_verify = true
}
}
# fornax.neuralplatform.ai — Fornax torrent coordinator (qBittorrent API proxy)
@@ -108,12 +134,16 @@ resource "cloudflare_zero_trust_tunnel_cloudflared_config" "legion" {
}
}
# git.neuralplatform.ai — REMOVED: Gitea is now on GKE.
# DNS is a direct Cloudflare A record (proxied) pointing at the GKE LoadBalancer IP.
# See: servers/legion/dns-neuralplatform.tf (cloudflare_record.np_gitea)
# git.neuralplatform.ai — Gitea web UI (HTTP via Traefik)
ingress_rule {
hostname = "git.neuralplatform.ai"
service = "https://traefik.kube-system.svc:443"
origin_request {
no_tls_verify = true
}
}
# ssh.git.neuralplatform.ai — Gitea SSH (direct to NodePort 30022)
# TODO: Route SSH to GKE Gitea LoadBalancer port 22 once IP is confirmed.
ingress_rule {
hostname = "ssh.git.neuralplatform.ai"
service = "ssh://localhost:30022"