Deploy soma to GCP Cloud Run at ai.neurontechnologies.ai

- Add cloud-run-soma.tf: soma-prod-us Cloud Run service in us-central1,
  neuron-soma-sa service account, soma Artifact Registry repo, Secret
  Manager secrets for HF token and operator key, serverless NEG, backend
  service, SSL cert
- Add dns-gcp.tf: Cloudflare A record for ai.neurontechnologies.ai pointing
  to GCP LB IP; Cloudflare provider added to main.tf/variables.tf
- Update load-balancer.tf: soma host rule + path matcher, soma SSL cert
  added to HTTPS proxy
- Update outputs.tf: soma service URL and artifact registry URL outputs
- Remove legion soma k8s manifests (Legion is gone)
- Update AGENTS.md to reflect GCP as primary production environment
This commit is contained in:
Will Anderson
2026-04-28 10:15:15 -05:00
parent 65dd9884ae
commit f2b025a433
11 changed files with 316 additions and 219 deletions
+249
View File
@@ -0,0 +1,249 @@
# ── Soma — AI Inference Gateway — Cloud Run ───────────────────────────────────
# Soma is a Rust/Axum inference proxy that speaks OpenAI-compatible API.
# Single region (us-central1) — inference latency matters, no need for global
# multi-region replication at this stage.
#
# Public endpoint: https://ai.neurontechnologies.ai/v1/chat/completions
# Auth: Bearer token (svc-* prefix) via require_api_key middleware.
locals {
soma_labels = {
"managed-by" = "terraform"
"service" = "neuron-soma"
}
soma_image = "us-central1-docker.pkg.dev/${var.project_id}/neuron-soma/soma:latest"
}
# ── Artifact Registry ─────────────────────────────────────────────────────────
resource "google_artifact_registry_repository" "soma" {
location = "us-central1"
repository_id = "neuron-soma"
description = "Soma AI inference gateway (Rust) Docker images"
format = "DOCKER"
project = var.project_id
cleanup_policies {
id = "keep-last-10"
action = "KEEP"
most_recent_versions {
keep_count = 10
}
}
}
# ── CI pusher access to soma repo ─────────────────────────────────────────────
resource "google_artifact_registry_repository_iam_member" "ci_soma" {
project = var.project_id
location = "us-central1"
repository = google_artifact_registry_repository.soma.name
role = "roles/artifactregistry.writer"
member = "serviceAccount:${google_service_account.ci_pusher.email}"
}
resource "google_service_account_iam_member" "ci_pusher_act_as_soma" {
service_account_id = google_service_account.soma.name
role = "roles/iam.serviceAccountUser"
member = "serviceAccount:${google_service_account.ci_pusher.email}"
}
# ── Service Account ───────────────────────────────────────────────────────────
resource "google_service_account" "soma" {
account_id = "neuron-soma-sa"
display_name = "Neuron Soma Cloud Run SA"
description = "Service account for the Soma AI inference gateway on Cloud Run"
project = var.project_id
}
resource "google_project_iam_member" "soma_secret_accessor" {
project = var.project_id
role = "roles/secretmanager.secretAccessor"
member = "serviceAccount:${google_service_account.soma.email}"
}
# ── Secrets ───────────────────────────────────────────────────────────────────
resource "google_secret_manager_secret" "soma_hf_token" {
secret_id = "soma-hf-token"
project = var.project_id
replication {
auto {}
}
}
resource "google_secret_manager_secret_version" "soma_hf_token" {
secret = google_secret_manager_secret.soma_hf_token.id
secret_data = "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"
}
resource "google_secret_manager_secret" "soma_operator_key" {
secret_id = "soma-operator-key"
project = var.project_id
replication {
auto {}
}
}
resource "google_secret_manager_secret_version" "soma_operator_key" {
secret = google_secret_manager_secret.soma_operator_key.id
secret_data = "svc-will-1ab07c23ab5112aa14378f2941f7cd3f"
}
# ── Cloud Run Service — us-central1 ──────────────────────────────────────────
resource "google_cloud_run_v2_service" "soma_us" {
name = "soma-prod-us"
location = "us-central1"
project = var.project_id
ingress = "INGRESS_TRAFFIC_ALL"
labels = local.soma_labels
template {
service_account = google_service_account.soma.email
scaling {
# min=1: always warm — inference latency is unacceptable on cold start
min_instance_count = 1
max_instance_count = 10
}
containers {
image = local.soma_image
resources {
limits = {
cpu = "2"
memory = "2Gi"
}
# cpu_idle=false: keep CPU allocated — inference proxy needs fast response
cpu_idle = false
}
env {
name = "SOMA_CONFIG_PATH"
value = "/etc/soma/soma.toml"
}
env {
name = "HF_TOKEN"
value_source {
secret_key_ref {
secret = google_secret_manager_secret.soma_hf_token.secret_id
version = "latest"
}
}
}
env {
name = "SOMA_OPERATOR_KEY"
value_source {
secret_key_ref {
secret = google_secret_manager_secret.soma_operator_key.secret_id
version = "latest"
}
}
}
ports {
container_port = 8080
name = "http1"
}
startup_probe {
http_get {
path = "/health"
port = 8080
}
initial_delay_seconds = 2
timeout_seconds = 5
period_seconds = 5
failure_threshold = 10
}
liveness_probe {
http_get {
path = "/health"
port = 8080
}
timeout_seconds = 5
period_seconds = 30
failure_threshold = 3
}
}
max_instance_request_concurrency = 100
}
traffic {
type = "TRAFFIC_TARGET_ALLOCATION_TYPE_LATEST"
percent = 100
}
depends_on = [
google_project_iam_member.soma_secret_accessor,
google_secret_manager_secret_version.soma_hf_token,
google_secret_manager_secret_version.soma_operator_key,
]
}
# ── Public Invoker IAM ────────────────────────────────────────────────────────
resource "google_cloud_run_v2_service_iam_member" "soma_us_public" {
project = var.project_id
location = "us-central1"
name = google_cloud_run_v2_service.soma_us.name
role = "roles/run.invoker"
member = "allUsers"
}
# ── Serverless NEG ────────────────────────────────────────────────────────────
resource "google_compute_region_network_endpoint_group" "soma_us" {
name = "soma-neg-us"
network_endpoint_type = "SERVERLESS"
region = "us-central1"
project = var.project_id
cloud_run {
service = google_cloud_run_v2_service.soma_us.name
}
}
# ── Backend Service ───────────────────────────────────────────────────────────
resource "google_compute_backend_service" "soma" {
name = "soma-backend-prod"
project = var.project_id
load_balancing_scheme = "EXTERNAL_MANAGED"
protocol = "HTTPS"
# Cloud Armor — same policy as other services (SQLi/XSS/rate-limit)
security_policy = google_compute_security_policy.marketing.self_link
# No CDN — inference responses are always unique
enable_cdn = false
backend {
group = google_compute_region_network_endpoint_group.soma_us.self_link
}
log_config {
enable = true
sample_rate = 1.0
}
}
# ── SSL Certificate ───────────────────────────────────────────────────────────
resource "google_compute_managed_ssl_certificate" "soma" {
name = "soma-cert-prod"
project = var.project_id
managed {
domains = ["ai.neurontechnologies.ai"]
}
}
+17
View File
@@ -0,0 +1,17 @@
# ── Cloudflare DNS — GCP-backed services ─────────────────────────────────────
# A records pointing to the GCP global load balancer IP.
# All subdomains share the same anycast IP (google_compute_global_address.prod).
#
# Cloudflare provider reads CLOUDFLARE_API_TOKEN from env.
# Zone ID for neurontechnologies.ai is set in terraform.tfvars.
# ── ai.neurontechnologies.ai → Soma inference gateway ────────────────────────
resource "cloudflare_record" "soma_ai" {
zone_id = var.cloudflare_zone_id_neurontechnologies
name = "ai"
type = "A"
content = google_compute_global_address.prod.address
proxied = true
ttl = 1
}
+11
View File
@@ -205,6 +205,11 @@ resource "google_compute_url_map" "prod" {
path_matcher = "accounts"
}
host_rule {
hosts = ["ai.neurontechnologies.ai"]
path_matcher = "soma"
}
path_matcher {
name = "marketing"
default_service = google_compute_backend_service.prod.self_link
@@ -219,6 +224,11 @@ resource "google_compute_url_map" "prod" {
name = "accounts"
default_service = google_compute_backend_service.accounts.self_link
}
path_matcher {
name = "soma"
default_service = google_compute_backend_service.soma.self_link
}
}
# ── Prod HTTPS Target Proxy ───────────────────────────────────────────────────
@@ -231,6 +241,7 @@ resource "google_compute_target_https_proxy" "prod" {
google_compute_managed_ssl_certificate.prod.self_link,
google_compute_managed_ssl_certificate.accounts.self_link,
google_compute_managed_ssl_certificate.api.self_link,
google_compute_managed_ssl_certificate.soma.self_link,
]
}
+11
View File
@@ -14,6 +14,10 @@ terraform {
source = "hashicorp/random"
version = "~> 3.6"
}
cloudflare = {
source = "cloudflare/cloudflare"
version = "~> 4.0"
}
}
backend "s3" {
@@ -39,3 +43,10 @@ provider "google-beta" {
project = var.project_id
region = "us-central1"
}
# Cloudflare provider — reads CLOUDFLARE_API_KEY + CLOUDFLARE_EMAIL from env.
# (Global API key, not a scoped API token.)
provider "cloudflare" {
api_key = var.cloudflare_api_key
email = var.cloudflare_email
}
+17
View File
@@ -70,6 +70,7 @@ output "cloud_run_services" {
api_us = google_cloud_run_v2_service.api_us.uri
api_eu = google_cloud_run_v2_service.api_eu.uri
api_apac = google_cloud_run_v2_service.api_apac.uri
soma_us = google_cloud_run_v2_service.soma_us.uri
}
}
@@ -79,5 +80,21 @@ output "artifact_registry_urls" {
marketing = "us-central1-docker.pkg.dev/${var.project_id}/neuron-marketing/marketing"
accounts = "us-central1-docker.pkg.dev/${var.project_id}/neuron-accounts/accounts"
api = "us-central1-docker.pkg.dev/${var.project_id}/neuron-api/api"
soma = "us-central1-docker.pkg.dev/${var.project_id}/neuron-soma/soma"
}
}
output "soma_service_url" {
description = "Soma Cloud Run service URL (us-central1)"
value = google_cloud_run_v2_service.soma_us.uri
}
output "soma_artifact_registry_url" {
description = "Soma Docker image base URL in Artifact Registry"
value = "us-central1-docker.pkg.dev/${var.project_id}/neuron-soma/soma"
}
output "soma_ssl_cert_name" {
description = "Soma SSL cert (check provisioning status in GCP console)"
value = google_compute_managed_ssl_certificate.soma.name
}
+11
View File
@@ -30,6 +30,17 @@ variable "cloudflare_zone_id_neurontechnologies" {
# or: curl -s -X GET "https://api.cloudflare.com/client/v4/zones?name=neurontechnologies.ai" -H "X-Auth-Email: andersonwilliam85@gmail.com" -H "X-Auth-Key: <key>"
}
variable "cloudflare_api_key" {
description = "Cloudflare global API key (from Vault: secret/cloudflare api_key)"
type = string
sensitive = true
}
variable "cloudflare_email" {
description = "Cloudflare account email (from Vault: secret/cloudflare email)"
type = string
}
locals {
project_id = var.project_id
@@ -1,96 +0,0 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: soma-config
namespace: neuron-prod
data:
soma.toml: |
[soma]
name = "neuron-prod"
version = "0.1.0"
region_primary = "us-central1"
[api]
port = 8080
default_rate_limit_rpm = 500
burst_rate_limit_rpm = 1000
[routing]
anti_concentration_limit = 0.60
idle_drain_minutes = 15
pre_warm_load_threshold = 0.70
cost_oracle_poll_seconds = 60
[warm_pool]
llm_min_warm = 1
image_gen_min_warm = 1
video_min_warm = 1
[providers]
priority = ["legion"]
[providers.gcp]
enabled = false
project_id = ""
zones = []
service_account_key_path = ""
[providers.runpod]
enabled = false
api_key_env = "RUNPOD_API_KEY"
preferred_gpu = "H100_SXM"
[providers.legion]
enabled = true
host = "legion.neuralplatform.ai"
ssh_key_env = "LEGION_SSH_KEY"
cost_per_hour = 0.40
[providers.aws]
enabled = false
region = "us-east-1"
access_key_env = "AWS_ACCESS_KEY_ID"
secret_key_env = "AWS_SECRET_ACCESS_KEY"
[providers.azure]
enabled = false
subscription_id = ""
[storage]
primary = "r2"
replicate_to = []
[storage.gcs]
project = ""
buckets = []
[storage.r2]
account_id_env = "CF_R2_ACCOUNT_ID"
access_key_env = "CF_R2_ACCESS_KEY"
[identity]
vault_addr = "https://vault.neuralplatform.ai"
vault_token_env = "VAULT_TOKEN"
secret_rotation_days = 90
[inference.llm]
default_model = "NeuronTechnologiesAI/Neuron"
default_backend = "huggingface"
[inference.image_gen]
default_model = "lustify"
default_backend = "sd-forge"
default_width = 1024
default_height = 1024
[email]
provider = "smtp"
from_address = "noreply@neurontechnologies.ai"
from_name = "Neuron Technologies"
smtp_host = "smtp.postmarkapp.com"
smtp_port_env = "SMTP_PORT"
[telemetry]
otlp_endpoint = "http://alloy.monitoring.svc.cluster.local:4318"
otlp_tenant = "legion"
log_level = "info"
@@ -1,68 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: soma
namespace: neuron-prod
labels:
app: soma
env: prod
spec:
replicas: 1
selector:
matchLabels:
app: soma
template:
metadata:
labels:
app: soma
env: prod
spec:
securityContext:
runAsUser: 1000
fsGroup: 1000
seccompProfile:
type: RuntimeDefault
containers:
- name: soma
image: registry.neuralplatform.ai/soma:latest
imagePullPolicy: Always
ports:
- name: http
containerPort: 8080
env:
- name: SOMA_CONFIG_PATH
value: /etc/soma/soma.toml
envFrom:
- secretRef:
name: soma-secrets
volumeMounts:
- name: config
mountPath: /etc/soma
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: false
capabilities:
drop: ["ALL"]
resources:
requests:
cpu: 500m
memory: 512Mi
limits:
cpu: 2000m
memory: 1Gi
livenessProbe:
httpGet:
path: /health
port: 8080
initialDelaySeconds: 10
periodSeconds: 30
readinessProbe:
httpGet:
path: /health
port: 8080
initialDelaySeconds: 5
periodSeconds: 10
volumes:
- name: config
configMap:
name: soma-config
@@ -1,22 +0,0 @@
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: soma-secrets
namespace: neuron-prod
spec:
refreshInterval: 1h
secretStoreRef:
name: vault
kind: ClusterSecretStore
target:
name: soma-secrets
creationPolicy: Owner
data:
- secretKey: SOMA_OPERATOR_KEY
remoteRef:
key: secret/data/soma
property: operator_key
- secretKey: HF_TOKEN
remoteRef:
key: secret/data/soma
property: hf_token
@@ -1,18 +0,0 @@
# Traefik IngressRoute: ai.neurontechnologies.ai → soma service
# Exposes soma as the Neuron AI inference gateway.
# Endpoint: https://ai.neurontechnologies.ai/v1/chat/completions
apiVersion: traefik.io/v1alpha1
kind: IngressRoute
metadata:
name: soma-ai
namespace: neuron-prod
spec:
entryPoints:
- websecure
routes:
- match: Host(`ai.neurontechnologies.ai`)
kind: Rule
services:
- name: soma
port: 8080
@@ -1,15 +0,0 @@
apiVersion: v1
kind: Service
metadata:
name: soma
namespace: neuron-prod
labels:
app: soma
spec:
selector:
app: soma
ports:
- name: http
port: 8080
targetPort: 8080
type: ClusterIP