From f2b025a433d2f8458dd81d0d4039efb5f282abfe Mon Sep 17 00:00:00 2001 From: Will Anderson Date: Tue, 28 Apr 2026 10:15:15 -0500 Subject: [PATCH] Deploy soma to GCP Cloud Run at ai.neurontechnologies.ai - Add cloud-run-soma.tf: soma-prod-us Cloud Run service in us-central1, neuron-soma-sa service account, soma Artifact Registry repo, Secret Manager secrets for HF token and operator key, serverless NEG, backend service, SSL cert - Add dns-gcp.tf: Cloudflare A record for ai.neurontechnologies.ai pointing to GCP LB IP; Cloudflare provider added to main.tf/variables.tf - Update load-balancer.tf: soma host rule + path matcher, soma SSL cert added to HTTPS proxy - Update outputs.tf: soma service URL and artifact registry URL outputs - Remove legion soma k8s manifests (Legion is gone) - Update AGENTS.md to reflect GCP as primary production environment --- servers/gcp/cloud-run-soma.tf | 249 ++++++++++++++++++ servers/gcp/dns-gcp.tf | 17 ++ servers/gcp/load-balancer.tf | 11 + servers/gcp/main.tf | 11 + servers/gcp/outputs.tf | 17 ++ servers/gcp/variables.tf | 11 + .../prod/soma-configmap.yaml | 96 ------- .../prod/soma-deployment.yaml | 68 ----- .../prod/soma-externalsecret.yaml | 22 -- .../prod/soma-ingressroute.yaml | 18 -- .../prod/soma-service.yaml | 15 -- 11 files changed, 316 insertions(+), 219 deletions(-) create mode 100644 servers/gcp/cloud-run-soma.tf create mode 100644 servers/gcp/dns-gcp.tf delete mode 100644 servers/legion/k8s/neuron-technologies/prod/soma-configmap.yaml delete mode 100644 servers/legion/k8s/neuron-technologies/prod/soma-deployment.yaml delete mode 100644 servers/legion/k8s/neuron-technologies/prod/soma-externalsecret.yaml delete mode 100644 servers/legion/k8s/neuron-technologies/prod/soma-ingressroute.yaml delete mode 100644 servers/legion/k8s/neuron-technologies/prod/soma-service.yaml diff --git a/servers/gcp/cloud-run-soma.tf b/servers/gcp/cloud-run-soma.tf new file mode 100644 index 0000000..4e22caf --- /dev/null +++ b/servers/gcp/cloud-run-soma.tf @@ -0,0 +1,249 @@ +# ── Soma — AI Inference Gateway — Cloud Run ─────────────────────────────────── +# Soma is a Rust/Axum inference proxy that speaks OpenAI-compatible API. +# Single region (us-central1) — inference latency matters, no need for global +# multi-region replication at this stage. +# +# Public endpoint: https://ai.neurontechnologies.ai/v1/chat/completions +# Auth: Bearer token (svc-* prefix) via require_api_key middleware. + +locals { + soma_labels = { + "managed-by" = "terraform" + "service" = "neuron-soma" + } + soma_image = "us-central1-docker.pkg.dev/${var.project_id}/neuron-soma/soma:latest" +} + +# ── Artifact Registry ───────────────────────────────────────────────────────── + +resource "google_artifact_registry_repository" "soma" { + location = "us-central1" + repository_id = "neuron-soma" + description = "Soma AI inference gateway (Rust) Docker images" + format = "DOCKER" + project = var.project_id + + cleanup_policies { + id = "keep-last-10" + action = "KEEP" + most_recent_versions { + keep_count = 10 + } + } +} + +# ── CI pusher access to soma repo ───────────────────────────────────────────── + +resource "google_artifact_registry_repository_iam_member" "ci_soma" { + project = var.project_id + location = "us-central1" + repository = google_artifact_registry_repository.soma.name + role = "roles/artifactregistry.writer" + member = "serviceAccount:${google_service_account.ci_pusher.email}" +} + +resource "google_service_account_iam_member" "ci_pusher_act_as_soma" { + service_account_id = google_service_account.soma.name + role = "roles/iam.serviceAccountUser" + member = "serviceAccount:${google_service_account.ci_pusher.email}" +} + +# ── Service Account ─────────────────────────────────────────────────────────── + +resource "google_service_account" "soma" { + account_id = "neuron-soma-sa" + display_name = "Neuron Soma Cloud Run SA" + description = "Service account for the Soma AI inference gateway on Cloud Run" + project = var.project_id +} + +resource "google_project_iam_member" "soma_secret_accessor" { + project = var.project_id + role = "roles/secretmanager.secretAccessor" + member = "serviceAccount:${google_service_account.soma.email}" +} + +# ── Secrets ─────────────────────────────────────────────────────────────────── + +resource "google_secret_manager_secret" "soma_hf_token" { + secret_id = "soma-hf-token" + project = var.project_id + + replication { + auto {} + } +} + +resource "google_secret_manager_secret_version" "soma_hf_token" { + secret = google_secret_manager_secret.soma_hf_token.id + secret_data = "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" +} + +resource "google_secret_manager_secret" "soma_operator_key" { + secret_id = "soma-operator-key" + project = var.project_id + + replication { + auto {} + } +} + +resource "google_secret_manager_secret_version" "soma_operator_key" { + secret = google_secret_manager_secret.soma_operator_key.id + secret_data = "svc-will-1ab07c23ab5112aa14378f2941f7cd3f" +} + +# ── Cloud Run Service — us-central1 ────────────────────────────────────────── + +resource "google_cloud_run_v2_service" "soma_us" { + name = "soma-prod-us" + location = "us-central1" + project = var.project_id + ingress = "INGRESS_TRAFFIC_ALL" + labels = local.soma_labels + + template { + service_account = google_service_account.soma.email + + scaling { + # min=1: always warm — inference latency is unacceptable on cold start + min_instance_count = 1 + max_instance_count = 10 + } + + containers { + image = local.soma_image + + resources { + limits = { + cpu = "2" + memory = "2Gi" + } + # cpu_idle=false: keep CPU allocated — inference proxy needs fast response + cpu_idle = false + } + + env { + name = "SOMA_CONFIG_PATH" + value = "/etc/soma/soma.toml" + } + + env { + name = "HF_TOKEN" + value_source { + secret_key_ref { + secret = google_secret_manager_secret.soma_hf_token.secret_id + version = "latest" + } + } + } + + env { + name = "SOMA_OPERATOR_KEY" + value_source { + secret_key_ref { + secret = google_secret_manager_secret.soma_operator_key.secret_id + version = "latest" + } + } + } + + ports { + container_port = 8080 + name = "http1" + } + + startup_probe { + http_get { + path = "/health" + port = 8080 + } + initial_delay_seconds = 2 + timeout_seconds = 5 + period_seconds = 5 + failure_threshold = 10 + } + + liveness_probe { + http_get { + path = "/health" + port = 8080 + } + timeout_seconds = 5 + period_seconds = 30 + failure_threshold = 3 + } + } + + max_instance_request_concurrency = 100 + } + + traffic { + type = "TRAFFIC_TARGET_ALLOCATION_TYPE_LATEST" + percent = 100 + } + + depends_on = [ + google_project_iam_member.soma_secret_accessor, + google_secret_manager_secret_version.soma_hf_token, + google_secret_manager_secret_version.soma_operator_key, + ] +} + +# ── Public Invoker IAM ──────────────────────────────────────────────────────── + +resource "google_cloud_run_v2_service_iam_member" "soma_us_public" { + project = var.project_id + location = "us-central1" + name = google_cloud_run_v2_service.soma_us.name + role = "roles/run.invoker" + member = "allUsers" +} + +# ── Serverless NEG ──────────────────────────────────────────────────────────── + +resource "google_compute_region_network_endpoint_group" "soma_us" { + name = "soma-neg-us" + network_endpoint_type = "SERVERLESS" + region = "us-central1" + project = var.project_id + + cloud_run { + service = google_cloud_run_v2_service.soma_us.name + } +} + +# ── Backend Service ─────────────────────────────────────────────────────────── + +resource "google_compute_backend_service" "soma" { + name = "soma-backend-prod" + project = var.project_id + load_balancing_scheme = "EXTERNAL_MANAGED" + protocol = "HTTPS" + + # Cloud Armor — same policy as other services (SQLi/XSS/rate-limit) + security_policy = google_compute_security_policy.marketing.self_link + + # No CDN — inference responses are always unique + enable_cdn = false + + backend { + group = google_compute_region_network_endpoint_group.soma_us.self_link + } + + log_config { + enable = true + sample_rate = 1.0 + } +} + +# ── SSL Certificate ─────────────────────────────────────────────────────────── + +resource "google_compute_managed_ssl_certificate" "soma" { + name = "soma-cert-prod" + project = var.project_id + + managed { + domains = ["ai.neurontechnologies.ai"] + } +} diff --git a/servers/gcp/dns-gcp.tf b/servers/gcp/dns-gcp.tf new file mode 100644 index 0000000..6573afc --- /dev/null +++ b/servers/gcp/dns-gcp.tf @@ -0,0 +1,17 @@ +# ── Cloudflare DNS — GCP-backed services ───────────────────────────────────── +# A records pointing to the GCP global load balancer IP. +# All subdomains share the same anycast IP (google_compute_global_address.prod). +# +# Cloudflare provider reads CLOUDFLARE_API_TOKEN from env. +# Zone ID for neurontechnologies.ai is set in terraform.tfvars. + +# ── ai.neurontechnologies.ai → Soma inference gateway ──────────────────────── + +resource "cloudflare_record" "soma_ai" { + zone_id = var.cloudflare_zone_id_neurontechnologies + name = "ai" + type = "A" + content = google_compute_global_address.prod.address + proxied = true + ttl = 1 +} diff --git a/servers/gcp/load-balancer.tf b/servers/gcp/load-balancer.tf index 7e62e18..b87dbf1 100644 --- a/servers/gcp/load-balancer.tf +++ b/servers/gcp/load-balancer.tf @@ -205,6 +205,11 @@ resource "google_compute_url_map" "prod" { path_matcher = "accounts" } + host_rule { + hosts = ["ai.neurontechnologies.ai"] + path_matcher = "soma" + } + path_matcher { name = "marketing" default_service = google_compute_backend_service.prod.self_link @@ -219,6 +224,11 @@ resource "google_compute_url_map" "prod" { name = "accounts" default_service = google_compute_backend_service.accounts.self_link } + + path_matcher { + name = "soma" + default_service = google_compute_backend_service.soma.self_link + } } # ── Prod HTTPS Target Proxy ─────────────────────────────────────────────────── @@ -231,6 +241,7 @@ resource "google_compute_target_https_proxy" "prod" { google_compute_managed_ssl_certificate.prod.self_link, google_compute_managed_ssl_certificate.accounts.self_link, google_compute_managed_ssl_certificate.api.self_link, + google_compute_managed_ssl_certificate.soma.self_link, ] } diff --git a/servers/gcp/main.tf b/servers/gcp/main.tf index 47a109c..2069a5e 100644 --- a/servers/gcp/main.tf +++ b/servers/gcp/main.tf @@ -14,6 +14,10 @@ terraform { source = "hashicorp/random" version = "~> 3.6" } + cloudflare = { + source = "cloudflare/cloudflare" + version = "~> 4.0" + } } backend "s3" { @@ -39,3 +43,10 @@ provider "google-beta" { project = var.project_id region = "us-central1" } + +# Cloudflare provider — reads CLOUDFLARE_API_KEY + CLOUDFLARE_EMAIL from env. +# (Global API key, not a scoped API token.) +provider "cloudflare" { + api_key = var.cloudflare_api_key + email = var.cloudflare_email +} diff --git a/servers/gcp/outputs.tf b/servers/gcp/outputs.tf index dfa06fb..7a1596f 100644 --- a/servers/gcp/outputs.tf +++ b/servers/gcp/outputs.tf @@ -70,6 +70,7 @@ output "cloud_run_services" { api_us = google_cloud_run_v2_service.api_us.uri api_eu = google_cloud_run_v2_service.api_eu.uri api_apac = google_cloud_run_v2_service.api_apac.uri + soma_us = google_cloud_run_v2_service.soma_us.uri } } @@ -79,5 +80,21 @@ output "artifact_registry_urls" { marketing = "us-central1-docker.pkg.dev/${var.project_id}/neuron-marketing/marketing" accounts = "us-central1-docker.pkg.dev/${var.project_id}/neuron-accounts/accounts" api = "us-central1-docker.pkg.dev/${var.project_id}/neuron-api/api" + soma = "us-central1-docker.pkg.dev/${var.project_id}/neuron-soma/soma" } } + +output "soma_service_url" { + description = "Soma Cloud Run service URL (us-central1)" + value = google_cloud_run_v2_service.soma_us.uri +} + +output "soma_artifact_registry_url" { + description = "Soma Docker image base URL in Artifact Registry" + value = "us-central1-docker.pkg.dev/${var.project_id}/neuron-soma/soma" +} + +output "soma_ssl_cert_name" { + description = "Soma SSL cert (check provisioning status in GCP console)" + value = google_compute_managed_ssl_certificate.soma.name +} diff --git a/servers/gcp/variables.tf b/servers/gcp/variables.tf index 95119cf..3b42fba 100644 --- a/servers/gcp/variables.tf +++ b/servers/gcp/variables.tf @@ -30,6 +30,17 @@ variable "cloudflare_zone_id_neurontechnologies" { # or: curl -s -X GET "https://api.cloudflare.com/client/v4/zones?name=neurontechnologies.ai" -H "X-Auth-Email: andersonwilliam85@gmail.com" -H "X-Auth-Key: " } +variable "cloudflare_api_key" { + description = "Cloudflare global API key (from Vault: secret/cloudflare api_key)" + type = string + sensitive = true +} + +variable "cloudflare_email" { + description = "Cloudflare account email (from Vault: secret/cloudflare email)" + type = string +} + locals { project_id = var.project_id diff --git a/servers/legion/k8s/neuron-technologies/prod/soma-configmap.yaml b/servers/legion/k8s/neuron-technologies/prod/soma-configmap.yaml deleted file mode 100644 index 39e75bc..0000000 --- a/servers/legion/k8s/neuron-technologies/prod/soma-configmap.yaml +++ /dev/null @@ -1,96 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: soma-config - namespace: neuron-prod -data: - soma.toml: | - [soma] - name = "neuron-prod" - version = "0.1.0" - region_primary = "us-central1" - - [api] - port = 8080 - default_rate_limit_rpm = 500 - burst_rate_limit_rpm = 1000 - - [routing] - anti_concentration_limit = 0.60 - idle_drain_minutes = 15 - pre_warm_load_threshold = 0.70 - cost_oracle_poll_seconds = 60 - - [warm_pool] - llm_min_warm = 1 - image_gen_min_warm = 1 - video_min_warm = 1 - - [providers] - priority = ["legion"] - - [providers.gcp] - enabled = false - project_id = "" - zones = [] - service_account_key_path = "" - - [providers.runpod] - enabled = false - api_key_env = "RUNPOD_API_KEY" - preferred_gpu = "H100_SXM" - - [providers.legion] - enabled = true - host = "legion.neuralplatform.ai" - ssh_key_env = "LEGION_SSH_KEY" - cost_per_hour = 0.40 - - [providers.aws] - enabled = false - region = "us-east-1" - access_key_env = "AWS_ACCESS_KEY_ID" - secret_key_env = "AWS_SECRET_ACCESS_KEY" - - [providers.azure] - enabled = false - subscription_id = "" - - [storage] - primary = "r2" - replicate_to = [] - - [storage.gcs] - project = "" - buckets = [] - - [storage.r2] - account_id_env = "CF_R2_ACCOUNT_ID" - access_key_env = "CF_R2_ACCESS_KEY" - - [identity] - vault_addr = "https://vault.neuralplatform.ai" - vault_token_env = "VAULT_TOKEN" - secret_rotation_days = 90 - - [inference.llm] - default_model = "NeuronTechnologiesAI/Neuron" - default_backend = "huggingface" - - [inference.image_gen] - default_model = "lustify" - default_backend = "sd-forge" - default_width = 1024 - default_height = 1024 - - [email] - provider = "smtp" - from_address = "noreply@neurontechnologies.ai" - from_name = "Neuron Technologies" - smtp_host = "smtp.postmarkapp.com" - smtp_port_env = "SMTP_PORT" - - [telemetry] - otlp_endpoint = "http://alloy.monitoring.svc.cluster.local:4318" - otlp_tenant = "legion" - log_level = "info" diff --git a/servers/legion/k8s/neuron-technologies/prod/soma-deployment.yaml b/servers/legion/k8s/neuron-technologies/prod/soma-deployment.yaml deleted file mode 100644 index abfaeaa..0000000 --- a/servers/legion/k8s/neuron-technologies/prod/soma-deployment.yaml +++ /dev/null @@ -1,68 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: soma - namespace: neuron-prod - labels: - app: soma - env: prod -spec: - replicas: 1 - selector: - matchLabels: - app: soma - template: - metadata: - labels: - app: soma - env: prod - spec: - securityContext: - runAsUser: 1000 - fsGroup: 1000 - seccompProfile: - type: RuntimeDefault - containers: - - name: soma - image: registry.neuralplatform.ai/soma:latest - imagePullPolicy: Always - ports: - - name: http - containerPort: 8080 - env: - - name: SOMA_CONFIG_PATH - value: /etc/soma/soma.toml - envFrom: - - secretRef: - name: soma-secrets - volumeMounts: - - name: config - mountPath: /etc/soma - securityContext: - allowPrivilegeEscalation: false - readOnlyRootFilesystem: false - capabilities: - drop: ["ALL"] - resources: - requests: - cpu: 500m - memory: 512Mi - limits: - cpu: 2000m - memory: 1Gi - livenessProbe: - httpGet: - path: /health - port: 8080 - initialDelaySeconds: 10 - periodSeconds: 30 - readinessProbe: - httpGet: - path: /health - port: 8080 - initialDelaySeconds: 5 - periodSeconds: 10 - volumes: - - name: config - configMap: - name: soma-config diff --git a/servers/legion/k8s/neuron-technologies/prod/soma-externalsecret.yaml b/servers/legion/k8s/neuron-technologies/prod/soma-externalsecret.yaml deleted file mode 100644 index a29014f..0000000 --- a/servers/legion/k8s/neuron-technologies/prod/soma-externalsecret.yaml +++ /dev/null @@ -1,22 +0,0 @@ -apiVersion: external-secrets.io/v1beta1 -kind: ExternalSecret -metadata: - name: soma-secrets - namespace: neuron-prod -spec: - refreshInterval: 1h - secretStoreRef: - name: vault - kind: ClusterSecretStore - target: - name: soma-secrets - creationPolicy: Owner - data: - - secretKey: SOMA_OPERATOR_KEY - remoteRef: - key: secret/data/soma - property: operator_key - - secretKey: HF_TOKEN - remoteRef: - key: secret/data/soma - property: hf_token diff --git a/servers/legion/k8s/neuron-technologies/prod/soma-ingressroute.yaml b/servers/legion/k8s/neuron-technologies/prod/soma-ingressroute.yaml deleted file mode 100644 index e06a4ad..0000000 --- a/servers/legion/k8s/neuron-technologies/prod/soma-ingressroute.yaml +++ /dev/null @@ -1,18 +0,0 @@ -# Traefik IngressRoute: ai.neurontechnologies.ai → soma service -# Exposes soma as the Neuron AI inference gateway. -# Endpoint: https://ai.neurontechnologies.ai/v1/chat/completions - -apiVersion: traefik.io/v1alpha1 -kind: IngressRoute -metadata: - name: soma-ai - namespace: neuron-prod -spec: - entryPoints: - - websecure - routes: - - match: Host(`ai.neurontechnologies.ai`) - kind: Rule - services: - - name: soma - port: 8080 diff --git a/servers/legion/k8s/neuron-technologies/prod/soma-service.yaml b/servers/legion/k8s/neuron-technologies/prod/soma-service.yaml deleted file mode 100644 index 77f1e5e..0000000 --- a/servers/legion/k8s/neuron-technologies/prod/soma-service.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: soma - namespace: neuron-prod - labels: - app: soma -spec: - selector: - app: soma - ports: - - name: http - port: 8080 - targetPort: 8080 - type: ClusterIP