Files
neuron/.gitea/workflows/deploy-gke.yaml
T
will.anderson 996dd3860a
Neuron Soul CI / build (push) Successful in 7m6s
Deploy Soul to GKE / deploy (push) Failing after 8m11s
fix: replace embedded python with sed in deploy-gke manifest update step
2026-06-19 15:25:22 -05:00

247 lines
10 KiB
YAML

name: Deploy Soul to GKE
# Triggers on push to main — after the soul binary is built and published
# by ci.yaml, this workflow builds the Docker image and blue-green deploys
# to the neuron-prod namespace on GKE.
#
# This workflow runs AFTER ci.yaml has published the neuron-soul generic
# artifact to Artifact Registry. The Docker build downloads that binary.
on:
push:
branches:
- main
workflow_dispatch:
inputs:
slot:
description: "Target blue-green slot (blue or green)"
required: false
default: "green"
# Serialize all builds on this runner — concurrent jobs exhaust the Docker daemon.
# A queued deploy runs after the in-progress build finishes.
concurrency:
group: neuron-runner
cancel-in-progress: false
jobs:
deploy:
runs-on: ubuntu-latest
env:
USE_GKE_GCLOUD_AUTH_PLUGIN: "True"
steps:
- name: Free disk space
run: |
df -h /
docker system prune -af --volumes 2>/dev/null || true
rm -rf /tmp/.act-* /tmp/act-* 2>/dev/null || true
df -h /
- name: Checkout
uses: actions/checkout@v4
- name: Install dependencies
run: |
apt-get update -qq
apt-get install -y --no-install-recommends \
ca-certificates curl apt-transport-https kubectl
echo "deb [trusted=yes] https://packages.cloud.google.com/apt cloud-sdk main" \
> /etc/apt/sources.list.d/google-cloud-sdk.list
apt-get update -qq && apt-get install -y google-cloud-cli google-cloud-cli-gke-gcloud-auth-plugin
- name: Authenticate to GCP
env:
GCP_SA_KEY: ${{ secrets.GCP_SA_KEY }}
run: |
echo "${GCP_SA_KEY}" > /tmp/gcp-key.json
gcloud auth activate-service-account --key-file=/tmp/gcp-key.json
gcloud config set project neuron-785695
gcloud auth configure-docker us-central1-docker.pkg.dev --quiet
- name: Get GKE credentials
run: |
gcloud container clusters get-credentials neuron-platform \
--region=us-central1 \
--project=neuron-785695
- name: Determine image tag and slot
id: vars
run: |
# GITEA_SHA is set by the Gitea runner; fall back to GITHUB_SHA for
# compatibility with older Forgejo/Gitea versions.
RAW_SHA="${GITEA_SHA:-${GITHUB_SHA:-}}"
SHA="${RAW_SHA:0:8}"
if [ -z "$SHA" ]; then
# Last resort: read from git directly
SHA=$(git rev-parse --short=8 HEAD 2>/dev/null || echo "unknown")
fi
IMAGE="us-central1-docker.pkg.dev/neuron-785695/neuron-api/neuron-soul:${SHA}"
echo "sha=${SHA}" >> "$GITEA_OUTPUT"
echo "image=${IMAGE}" >> "$GITEA_OUTPUT"
# Determine which slot is currently idle (0 replicas = idle slot)
# If both are at 0 (fresh deploy), default to blue
BLUE_REPLICAS=$(kubectl get deployment/neuron-mcp-blue \
-n neuron-prod \
-o jsonpath='{.spec.replicas}' 2>/dev/null || echo "0")
GREEN_REPLICAS=$(kubectl get deployment/neuron-mcp-green \
-n neuron-prod \
-o jsonpath='{.spec.replicas}' 2>/dev/null || echo "0")
echo " Blue replicas: ${BLUE_REPLICAS}"
echo " Green replicas: ${GREEN_REPLICAS}"
# Use workflow_dispatch override if provided, otherwise pick the idle slot
if [ "${{ github.event.inputs.slot }}" != "" ]; then
SLOT="${{ github.event.inputs.slot }}"
elif [ "${GREEN_REPLICAS}" -eq 0 ] && [ "${BLUE_REPLICAS}" -gt 0 ]; then
SLOT="green"
elif [ "${BLUE_REPLICAS}" -eq 0 ] && [ "${GREEN_REPLICAS}" -gt 0 ]; then
SLOT="blue"
else
# Fresh cluster — deploy to blue first
SLOT="blue"
fi
echo "slot=${SLOT}" >> "$GITEA_OUTPUT"
echo " Deploying to slot: ${SLOT}"
- name: Prepare build artifacts
run: |
# Pre-download soul binary and El SDK so the Dockerfile can COPY them
# from the build context instead of authenticating inside the build.
mkdir -p build-artifacts
# ── soul binary ────────────────────────────────────────────────────────
# ci.yaml publishes the soul binary to foundation-prod on every push.
# Download the latest version (the one just built by ci.yaml).
SOUL_VER=$(gcloud artifacts versions list \
--repository=foundation-prod \
--location=us-central1 \
--project=neuron-785695 \
--package=neuron-soul \
--sort-by="~createTime" \
--limit=1 \
--format="value(name)" 2>/dev/null | awk -F/ '{print $NF}')
echo "Downloading neuron-soul@${SOUL_VER}"
gcloud artifacts generic download \
--repository=foundation-prod \
--location=us-central1 \
--project=neuron-785695 \
--package=neuron-soul \
--version="${SOUL_VER}" \
--destination=build-artifacts/
mv build-artifacts/neuron* build-artifacts/neuron 2>/dev/null || true
chmod +x build-artifacts/neuron
# ── El SDK (for engram source compilation inside the build) ────────────
ELC_VER=$(gcloud artifacts versions list \
--repository=foundation-prod --location=us-central1 --project=neuron-785695 \
--package=el-elc --sort-by="~createTime" --limit=1 \
--format="value(name)" 2>/dev/null | awk -F/ '{print $NF}')
gcloud artifacts generic download \
--repository=foundation-prod --location=us-central1 --project=neuron-785695 \
--package=el-elc --version="${ELC_VER}" --destination=build-artifacts/
mv build-artifacts/elc* build-artifacts/elc 2>/dev/null || true
chmod +x build-artifacts/elc
RC_VER=$(gcloud artifacts versions list \
--repository=foundation-prod --location=us-central1 --project=neuron-785695 \
--package=el-runtime-c --sort-by="~createTime" --limit=1 \
--format="value(name)" 2>/dev/null | awk -F/ '{print $NF}')
gcloud artifacts generic download \
--repository=foundation-prod --location=us-central1 --project=neuron-785695 \
--package=el-runtime-c --version="${RC_VER}" --destination=build-artifacts/
mv build-artifacts/el_runtime.c* build-artifacts/el_runtime.c 2>/dev/null || true
RH_VER=$(gcloud artifacts versions list \
--repository=foundation-prod --location=us-central1 --project=neuron-785695 \
--package=el-runtime-h --sort-by="~createTime" --limit=1 \
--format="value(name)" 2>/dev/null | awk -F/ '{print $NF}')
gcloud artifacts generic download \
--repository=foundation-prod --location=us-central1 --project=neuron-785695 \
--package=el-runtime-h --version="${RH_VER}" --destination=build-artifacts/
mv build-artifacts/el_runtime.h* build-artifacts/el_runtime.h 2>/dev/null || true
echo "Build artifacts ready:"
ls -lh build-artifacts/
- name: Clone engram source for Docker build context
run: |
# The Dockerfile builds engram from source (no published AR package).
# Clone the engram repo into ./engram/ so it's available in the build context.
git clone http://34.31.145.131/neuron-technologies/engram.git \
--depth=1 --branch=main \
engram
echo "Engram source ready at ./engram/src/server.el"
- name: Build and push Docker image
run: |
IMAGE="${{ steps.vars.outputs.image }}"
echo "Building ${IMAGE}..."
# No --secret needed: artifacts are pre-downloaded into build-artifacts/
# and the Dockerfile uses COPY to include them.
docker build \
--tag "${IMAGE}" \
--tag "us-central1-docker.pkg.dev/neuron-785695/neuron-api/neuron-soul:latest" \
.
echo "Pushing ${IMAGE}..."
docker push "${IMAGE}"
docker push "us-central1-docker.pkg.dev/neuron-785695/neuron-api/neuron-soul:latest"
- name: Blue-green deploy to GKE
run: |
chmod +x scripts/blue-green-deploy.sh
scripts/blue-green-deploy.sh \
--image "${{ steps.vars.outputs.image }}" \
--slot "${{ steps.vars.outputs.slot }}"
- name: Update infrastructure manifests
if: success()
env:
INFRA_GIT_TOKEN: ${{ secrets.INFRA_GIT_TOKEN }}
run: |
SLOT="${{ steps.vars.outputs.slot }}"
if [ "$SLOT" = "blue" ]; then IDLE="green"; else IDLE="blue"; fi
git clone "http://${INFRA_GIT_TOKEN}@34.31.145.131/neuron-technologies/infrastructure.git" \
--depth=1 --branch=main /tmp/infra-update
cd /tmp/infra-update
DEPLOY_DIR="platform/k8s/neuron-mcp"
sed -i "s/^ replicas: .*/ replicas: 1/" "${DEPLOY_DIR}/deployment-${SLOT}.yaml"
sed -i "s/^ replicas: .*/ replicas: 0/" "${DEPLOY_DIR}/deployment-${IDLE}.yaml"
echo " deployment-${SLOT}.yaml: replicas set to 1"
echo " deployment-${IDLE}.yaml: replicas set to 0"
git config user.email "ci@neurontechnologies.ai"
git config user.name "Neuron CI"
git add "${DEPLOY_DIR}/deployment-blue.yaml" "${DEPLOY_DIR}/deployment-green.yaml"
git diff --staged --quiet && { echo "No manifest changes needed"; exit 0; }
git commit -m "ci: neuron-mcp replica sync after blue-green swap to ${SLOT}"
git push origin main
echo "Infrastructure manifests updated: ${SLOT}=1, ${IDLE}=0"
- name: Verify deployment
run: |
SLOT="${{ steps.vars.outputs.slot }}"
echo "Verifying neuron-mcp-${SLOT} is healthy..."
kubectl rollout status deployment/"neuron-mcp-${SLOT}" \
--namespace=neuron-prod \
--timeout=3m
echo "Active service endpoints:"
kubectl get endpoints neuron-mcp -n neuron-prod
echo "Pod status:"
kubectl get pods -n neuron-prod -l app=neuron-mcp
- name: Cleanup
if: always()
run: rm -f /tmp/gcp-key.json