Deploy soma to GCP Cloud Run at ai.neurontechnologies.ai
- Add cloud-run-soma.tf: soma-prod-us Cloud Run service in us-central1, neuron-soma-sa service account, soma Artifact Registry repo, Secret Manager secrets for HF token and operator key, serverless NEG, backend service, SSL cert - Add dns-gcp.tf: Cloudflare A record for ai.neurontechnologies.ai pointing to GCP LB IP; Cloudflare provider added to main.tf/variables.tf - Update load-balancer.tf: soma host rule + path matcher, soma SSL cert added to HTTPS proxy - Update outputs.tf: soma service URL and artifact registry URL outputs - Remove legion soma k8s manifests (Legion is gone) - Update AGENTS.md to reflect GCP as primary production environment
This commit is contained in:
@@ -0,0 +1,249 @@
|
||||
# ── Soma — AI Inference Gateway — Cloud Run ───────────────────────────────────
|
||||
# Soma is a Rust/Axum inference proxy that speaks OpenAI-compatible API.
|
||||
# Single region (us-central1) — inference latency matters, no need for global
|
||||
# multi-region replication at this stage.
|
||||
#
|
||||
# Public endpoint: https://ai.neurontechnologies.ai/v1/chat/completions
|
||||
# Auth: Bearer token (svc-* prefix) via require_api_key middleware.
|
||||
|
||||
locals {
|
||||
soma_labels = {
|
||||
"managed-by" = "terraform"
|
||||
"service" = "neuron-soma"
|
||||
}
|
||||
soma_image = "us-central1-docker.pkg.dev/${var.project_id}/neuron-soma/soma:latest"
|
||||
}
|
||||
|
||||
# ── Artifact Registry ─────────────────────────────────────────────────────────
|
||||
|
||||
resource "google_artifact_registry_repository" "soma" {
|
||||
location = "us-central1"
|
||||
repository_id = "neuron-soma"
|
||||
description = "Soma AI inference gateway (Rust) Docker images"
|
||||
format = "DOCKER"
|
||||
project = var.project_id
|
||||
|
||||
cleanup_policies {
|
||||
id = "keep-last-10"
|
||||
action = "KEEP"
|
||||
most_recent_versions {
|
||||
keep_count = 10
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# ── CI pusher access to soma repo ─────────────────────────────────────────────
|
||||
|
||||
resource "google_artifact_registry_repository_iam_member" "ci_soma" {
|
||||
project = var.project_id
|
||||
location = "us-central1"
|
||||
repository = google_artifact_registry_repository.soma.name
|
||||
role = "roles/artifactregistry.writer"
|
||||
member = "serviceAccount:${google_service_account.ci_pusher.email}"
|
||||
}
|
||||
|
||||
resource "google_service_account_iam_member" "ci_pusher_act_as_soma" {
|
||||
service_account_id = google_service_account.soma.name
|
||||
role = "roles/iam.serviceAccountUser"
|
||||
member = "serviceAccount:${google_service_account.ci_pusher.email}"
|
||||
}
|
||||
|
||||
# ── Service Account ───────────────────────────────────────────────────────────
|
||||
|
||||
resource "google_service_account" "soma" {
|
||||
account_id = "neuron-soma-sa"
|
||||
display_name = "Neuron Soma Cloud Run SA"
|
||||
description = "Service account for the Soma AI inference gateway on Cloud Run"
|
||||
project = var.project_id
|
||||
}
|
||||
|
||||
resource "google_project_iam_member" "soma_secret_accessor" {
|
||||
project = var.project_id
|
||||
role = "roles/secretmanager.secretAccessor"
|
||||
member = "serviceAccount:${google_service_account.soma.email}"
|
||||
}
|
||||
|
||||
# ── Secrets ───────────────────────────────────────────────────────────────────
|
||||
|
||||
resource "google_secret_manager_secret" "soma_hf_token" {
|
||||
secret_id = "soma-hf-token"
|
||||
project = var.project_id
|
||||
|
||||
replication {
|
||||
auto {}
|
||||
}
|
||||
}
|
||||
|
||||
resource "google_secret_manager_secret_version" "soma_hf_token" {
|
||||
secret = google_secret_manager_secret.soma_hf_token.id
|
||||
secret_data = "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"
|
||||
}
|
||||
|
||||
resource "google_secret_manager_secret" "soma_operator_key" {
|
||||
secret_id = "soma-operator-key"
|
||||
project = var.project_id
|
||||
|
||||
replication {
|
||||
auto {}
|
||||
}
|
||||
}
|
||||
|
||||
resource "google_secret_manager_secret_version" "soma_operator_key" {
|
||||
secret = google_secret_manager_secret.soma_operator_key.id
|
||||
secret_data = "svc-will-1ab07c23ab5112aa14378f2941f7cd3f"
|
||||
}
|
||||
|
||||
# ── Cloud Run Service — us-central1 ──────────────────────────────────────────
|
||||
|
||||
resource "google_cloud_run_v2_service" "soma_us" {
|
||||
name = "soma-prod-us"
|
||||
location = "us-central1"
|
||||
project = var.project_id
|
||||
ingress = "INGRESS_TRAFFIC_ALL"
|
||||
labels = local.soma_labels
|
||||
|
||||
template {
|
||||
service_account = google_service_account.soma.email
|
||||
|
||||
scaling {
|
||||
# min=1: always warm — inference latency is unacceptable on cold start
|
||||
min_instance_count = 1
|
||||
max_instance_count = 10
|
||||
}
|
||||
|
||||
containers {
|
||||
image = local.soma_image
|
||||
|
||||
resources {
|
||||
limits = {
|
||||
cpu = "2"
|
||||
memory = "2Gi"
|
||||
}
|
||||
# cpu_idle=false: keep CPU allocated — inference proxy needs fast response
|
||||
cpu_idle = false
|
||||
}
|
||||
|
||||
env {
|
||||
name = "SOMA_CONFIG_PATH"
|
||||
value = "/etc/soma/soma.toml"
|
||||
}
|
||||
|
||||
env {
|
||||
name = "HF_TOKEN"
|
||||
value_source {
|
||||
secret_key_ref {
|
||||
secret = google_secret_manager_secret.soma_hf_token.secret_id
|
||||
version = "latest"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
env {
|
||||
name = "SOMA_OPERATOR_KEY"
|
||||
value_source {
|
||||
secret_key_ref {
|
||||
secret = google_secret_manager_secret.soma_operator_key.secret_id
|
||||
version = "latest"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ports {
|
||||
container_port = 8080
|
||||
name = "http1"
|
||||
}
|
||||
|
||||
startup_probe {
|
||||
http_get {
|
||||
path = "/health"
|
||||
port = 8080
|
||||
}
|
||||
initial_delay_seconds = 2
|
||||
timeout_seconds = 5
|
||||
period_seconds = 5
|
||||
failure_threshold = 10
|
||||
}
|
||||
|
||||
liveness_probe {
|
||||
http_get {
|
||||
path = "/health"
|
||||
port = 8080
|
||||
}
|
||||
timeout_seconds = 5
|
||||
period_seconds = 30
|
||||
failure_threshold = 3
|
||||
}
|
||||
}
|
||||
|
||||
max_instance_request_concurrency = 100
|
||||
}
|
||||
|
||||
traffic {
|
||||
type = "TRAFFIC_TARGET_ALLOCATION_TYPE_LATEST"
|
||||
percent = 100
|
||||
}
|
||||
|
||||
depends_on = [
|
||||
google_project_iam_member.soma_secret_accessor,
|
||||
google_secret_manager_secret_version.soma_hf_token,
|
||||
google_secret_manager_secret_version.soma_operator_key,
|
||||
]
|
||||
}
|
||||
|
||||
# ── Public Invoker IAM ────────────────────────────────────────────────────────
|
||||
|
||||
resource "google_cloud_run_v2_service_iam_member" "soma_us_public" {
|
||||
project = var.project_id
|
||||
location = "us-central1"
|
||||
name = google_cloud_run_v2_service.soma_us.name
|
||||
role = "roles/run.invoker"
|
||||
member = "allUsers"
|
||||
}
|
||||
|
||||
# ── Serverless NEG ────────────────────────────────────────────────────────────
|
||||
|
||||
resource "google_compute_region_network_endpoint_group" "soma_us" {
|
||||
name = "soma-neg-us"
|
||||
network_endpoint_type = "SERVERLESS"
|
||||
region = "us-central1"
|
||||
project = var.project_id
|
||||
|
||||
cloud_run {
|
||||
service = google_cloud_run_v2_service.soma_us.name
|
||||
}
|
||||
}
|
||||
|
||||
# ── Backend Service ───────────────────────────────────────────────────────────
|
||||
|
||||
resource "google_compute_backend_service" "soma" {
|
||||
name = "soma-backend-prod"
|
||||
project = var.project_id
|
||||
load_balancing_scheme = "EXTERNAL_MANAGED"
|
||||
protocol = "HTTPS"
|
||||
|
||||
# Cloud Armor — same policy as other services (SQLi/XSS/rate-limit)
|
||||
security_policy = google_compute_security_policy.marketing.self_link
|
||||
|
||||
# No CDN — inference responses are always unique
|
||||
enable_cdn = false
|
||||
|
||||
backend {
|
||||
group = google_compute_region_network_endpoint_group.soma_us.self_link
|
||||
}
|
||||
|
||||
log_config {
|
||||
enable = true
|
||||
sample_rate = 1.0
|
||||
}
|
||||
}
|
||||
|
||||
# ── SSL Certificate ───────────────────────────────────────────────────────────
|
||||
|
||||
resource "google_compute_managed_ssl_certificate" "soma" {
|
||||
name = "soma-cert-prod"
|
||||
project = var.project_id
|
||||
|
||||
managed {
|
||||
domains = ["ai.neurontechnologies.ai"]
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
# ── Cloudflare DNS — GCP-backed services ─────────────────────────────────────
|
||||
# A records pointing to the GCP global load balancer IP.
|
||||
# All subdomains share the same anycast IP (google_compute_global_address.prod).
|
||||
#
|
||||
# Cloudflare provider reads CLOUDFLARE_API_TOKEN from env.
|
||||
# Zone ID for neurontechnologies.ai is set in terraform.tfvars.
|
||||
|
||||
# ── ai.neurontechnologies.ai → Soma inference gateway ────────────────────────
|
||||
|
||||
resource "cloudflare_record" "soma_ai" {
|
||||
zone_id = var.cloudflare_zone_id_neurontechnologies
|
||||
name = "ai"
|
||||
type = "A"
|
||||
content = google_compute_global_address.prod.address
|
||||
proxied = true
|
||||
ttl = 1
|
||||
}
|
||||
@@ -205,6 +205,11 @@ resource "google_compute_url_map" "prod" {
|
||||
path_matcher = "accounts"
|
||||
}
|
||||
|
||||
host_rule {
|
||||
hosts = ["ai.neurontechnologies.ai"]
|
||||
path_matcher = "soma"
|
||||
}
|
||||
|
||||
path_matcher {
|
||||
name = "marketing"
|
||||
default_service = google_compute_backend_service.prod.self_link
|
||||
@@ -219,6 +224,11 @@ resource "google_compute_url_map" "prod" {
|
||||
name = "accounts"
|
||||
default_service = google_compute_backend_service.accounts.self_link
|
||||
}
|
||||
|
||||
path_matcher {
|
||||
name = "soma"
|
||||
default_service = google_compute_backend_service.soma.self_link
|
||||
}
|
||||
}
|
||||
|
||||
# ── Prod HTTPS Target Proxy ───────────────────────────────────────────────────
|
||||
@@ -231,6 +241,7 @@ resource "google_compute_target_https_proxy" "prod" {
|
||||
google_compute_managed_ssl_certificate.prod.self_link,
|
||||
google_compute_managed_ssl_certificate.accounts.self_link,
|
||||
google_compute_managed_ssl_certificate.api.self_link,
|
||||
google_compute_managed_ssl_certificate.soma.self_link,
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
@@ -14,6 +14,10 @@ terraform {
|
||||
source = "hashicorp/random"
|
||||
version = "~> 3.6"
|
||||
}
|
||||
cloudflare = {
|
||||
source = "cloudflare/cloudflare"
|
||||
version = "~> 4.0"
|
||||
}
|
||||
}
|
||||
|
||||
backend "s3" {
|
||||
@@ -39,3 +43,10 @@ provider "google-beta" {
|
||||
project = var.project_id
|
||||
region = "us-central1"
|
||||
}
|
||||
|
||||
# Cloudflare provider — reads CLOUDFLARE_API_KEY + CLOUDFLARE_EMAIL from env.
|
||||
# (Global API key, not a scoped API token.)
|
||||
provider "cloudflare" {
|
||||
api_key = var.cloudflare_api_key
|
||||
email = var.cloudflare_email
|
||||
}
|
||||
|
||||
@@ -70,6 +70,7 @@ output "cloud_run_services" {
|
||||
api_us = google_cloud_run_v2_service.api_us.uri
|
||||
api_eu = google_cloud_run_v2_service.api_eu.uri
|
||||
api_apac = google_cloud_run_v2_service.api_apac.uri
|
||||
soma_us = google_cloud_run_v2_service.soma_us.uri
|
||||
}
|
||||
}
|
||||
|
||||
@@ -79,5 +80,21 @@ output "artifact_registry_urls" {
|
||||
marketing = "us-central1-docker.pkg.dev/${var.project_id}/neuron-marketing/marketing"
|
||||
accounts = "us-central1-docker.pkg.dev/${var.project_id}/neuron-accounts/accounts"
|
||||
api = "us-central1-docker.pkg.dev/${var.project_id}/neuron-api/api"
|
||||
soma = "us-central1-docker.pkg.dev/${var.project_id}/neuron-soma/soma"
|
||||
}
|
||||
}
|
||||
|
||||
output "soma_service_url" {
|
||||
description = "Soma Cloud Run service URL (us-central1)"
|
||||
value = google_cloud_run_v2_service.soma_us.uri
|
||||
}
|
||||
|
||||
output "soma_artifact_registry_url" {
|
||||
description = "Soma Docker image base URL in Artifact Registry"
|
||||
value = "us-central1-docker.pkg.dev/${var.project_id}/neuron-soma/soma"
|
||||
}
|
||||
|
||||
output "soma_ssl_cert_name" {
|
||||
description = "Soma SSL cert (check provisioning status in GCP console)"
|
||||
value = google_compute_managed_ssl_certificate.soma.name
|
||||
}
|
||||
|
||||
@@ -30,6 +30,17 @@ variable "cloudflare_zone_id_neurontechnologies" {
|
||||
# or: curl -s -X GET "https://api.cloudflare.com/client/v4/zones?name=neurontechnologies.ai" -H "X-Auth-Email: andersonwilliam85@gmail.com" -H "X-Auth-Key: <key>"
|
||||
}
|
||||
|
||||
variable "cloudflare_api_key" {
|
||||
description = "Cloudflare global API key (from Vault: secret/cloudflare api_key)"
|
||||
type = string
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
variable "cloudflare_email" {
|
||||
description = "Cloudflare account email (from Vault: secret/cloudflare email)"
|
||||
type = string
|
||||
}
|
||||
|
||||
locals {
|
||||
project_id = var.project_id
|
||||
|
||||
|
||||
@@ -1,96 +0,0 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: soma-config
|
||||
namespace: neuron-prod
|
||||
data:
|
||||
soma.toml: |
|
||||
[soma]
|
||||
name = "neuron-prod"
|
||||
version = "0.1.0"
|
||||
region_primary = "us-central1"
|
||||
|
||||
[api]
|
||||
port = 8080
|
||||
default_rate_limit_rpm = 500
|
||||
burst_rate_limit_rpm = 1000
|
||||
|
||||
[routing]
|
||||
anti_concentration_limit = 0.60
|
||||
idle_drain_minutes = 15
|
||||
pre_warm_load_threshold = 0.70
|
||||
cost_oracle_poll_seconds = 60
|
||||
|
||||
[warm_pool]
|
||||
llm_min_warm = 1
|
||||
image_gen_min_warm = 1
|
||||
video_min_warm = 1
|
||||
|
||||
[providers]
|
||||
priority = ["legion"]
|
||||
|
||||
[providers.gcp]
|
||||
enabled = false
|
||||
project_id = ""
|
||||
zones = []
|
||||
service_account_key_path = ""
|
||||
|
||||
[providers.runpod]
|
||||
enabled = false
|
||||
api_key_env = "RUNPOD_API_KEY"
|
||||
preferred_gpu = "H100_SXM"
|
||||
|
||||
[providers.legion]
|
||||
enabled = true
|
||||
host = "legion.neuralplatform.ai"
|
||||
ssh_key_env = "LEGION_SSH_KEY"
|
||||
cost_per_hour = 0.40
|
||||
|
||||
[providers.aws]
|
||||
enabled = false
|
||||
region = "us-east-1"
|
||||
access_key_env = "AWS_ACCESS_KEY_ID"
|
||||
secret_key_env = "AWS_SECRET_ACCESS_KEY"
|
||||
|
||||
[providers.azure]
|
||||
enabled = false
|
||||
subscription_id = ""
|
||||
|
||||
[storage]
|
||||
primary = "r2"
|
||||
replicate_to = []
|
||||
|
||||
[storage.gcs]
|
||||
project = ""
|
||||
buckets = []
|
||||
|
||||
[storage.r2]
|
||||
account_id_env = "CF_R2_ACCOUNT_ID"
|
||||
access_key_env = "CF_R2_ACCESS_KEY"
|
||||
|
||||
[identity]
|
||||
vault_addr = "https://vault.neuralplatform.ai"
|
||||
vault_token_env = "VAULT_TOKEN"
|
||||
secret_rotation_days = 90
|
||||
|
||||
[inference.llm]
|
||||
default_model = "NeuronTechnologiesAI/Neuron"
|
||||
default_backend = "huggingface"
|
||||
|
||||
[inference.image_gen]
|
||||
default_model = "lustify"
|
||||
default_backend = "sd-forge"
|
||||
default_width = 1024
|
||||
default_height = 1024
|
||||
|
||||
[email]
|
||||
provider = "smtp"
|
||||
from_address = "noreply@neurontechnologies.ai"
|
||||
from_name = "Neuron Technologies"
|
||||
smtp_host = "smtp.postmarkapp.com"
|
||||
smtp_port_env = "SMTP_PORT"
|
||||
|
||||
[telemetry]
|
||||
otlp_endpoint = "http://alloy.monitoring.svc.cluster.local:4318"
|
||||
otlp_tenant = "legion"
|
||||
log_level = "info"
|
||||
@@ -1,68 +0,0 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: soma
|
||||
namespace: neuron-prod
|
||||
labels:
|
||||
app: soma
|
||||
env: prod
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: soma
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: soma
|
||||
env: prod
|
||||
spec:
|
||||
securityContext:
|
||||
runAsUser: 1000
|
||||
fsGroup: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
containers:
|
||||
- name: soma
|
||||
image: registry.neuralplatform.ai/soma:latest
|
||||
imagePullPolicy: Always
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 8080
|
||||
env:
|
||||
- name: SOMA_CONFIG_PATH
|
||||
value: /etc/soma/soma.toml
|
||||
envFrom:
|
||||
- secretRef:
|
||||
name: soma-secrets
|
||||
volumeMounts:
|
||||
- name: config
|
||||
mountPath: /etc/soma
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
readOnlyRootFilesystem: false
|
||||
capabilities:
|
||||
drop: ["ALL"]
|
||||
resources:
|
||||
requests:
|
||||
cpu: 500m
|
||||
memory: 512Mi
|
||||
limits:
|
||||
cpu: 2000m
|
||||
memory: 1Gi
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: 8080
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 30
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: 8080
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
volumes:
|
||||
- name: config
|
||||
configMap:
|
||||
name: soma-config
|
||||
@@ -1,22 +0,0 @@
|
||||
apiVersion: external-secrets.io/v1beta1
|
||||
kind: ExternalSecret
|
||||
metadata:
|
||||
name: soma-secrets
|
||||
namespace: neuron-prod
|
||||
spec:
|
||||
refreshInterval: 1h
|
||||
secretStoreRef:
|
||||
name: vault
|
||||
kind: ClusterSecretStore
|
||||
target:
|
||||
name: soma-secrets
|
||||
creationPolicy: Owner
|
||||
data:
|
||||
- secretKey: SOMA_OPERATOR_KEY
|
||||
remoteRef:
|
||||
key: secret/data/soma
|
||||
property: operator_key
|
||||
- secretKey: HF_TOKEN
|
||||
remoteRef:
|
||||
key: secret/data/soma
|
||||
property: hf_token
|
||||
@@ -1,18 +0,0 @@
|
||||
# Traefik IngressRoute: ai.neurontechnologies.ai → soma service
|
||||
# Exposes soma as the Neuron AI inference gateway.
|
||||
# Endpoint: https://ai.neurontechnologies.ai/v1/chat/completions
|
||||
|
||||
apiVersion: traefik.io/v1alpha1
|
||||
kind: IngressRoute
|
||||
metadata:
|
||||
name: soma-ai
|
||||
namespace: neuron-prod
|
||||
spec:
|
||||
entryPoints:
|
||||
- websecure
|
||||
routes:
|
||||
- match: Host(`ai.neurontechnologies.ai`)
|
||||
kind: Rule
|
||||
services:
|
||||
- name: soma
|
||||
port: 8080
|
||||
@@ -1,15 +0,0 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: soma
|
||||
namespace: neuron-prod
|
||||
labels:
|
||||
app: soma
|
||||
spec:
|
||||
selector:
|
||||
app: soma
|
||||
ports:
|
||||
- name: http
|
||||
port: 8080
|
||||
targetPort: 8080
|
||||
type: ClusterIP
|
||||
Reference in New Issue
Block a user