#!/usr/bin/env bash # seed-engram-gke.sh — seed the GKE soul's PVC with the latest engram backup # # Downloads the latest engram backup from GCS, extracts snapshot.json, # and copies it into the neuron-prod PVC via a temporary seed Job. # # Usage: # seed-engram-gke.sh [--hostname ] [--backup ] [--dry-run] # # Options: # --hostname Mac hostname used as the backup source prefix # Default: auto-detected from GCS bucket listing # --backup Full GCS path to a specific backup tarball # Overrides --hostname; use if you want a specific backup # --dry-run Print what would happen without executing # # Examples: # seed-engram-gke.sh # seed-engram-gke.sh --hostname Wills-MacBook-Pro.local # seed-engram-gke.sh --backup gs://neuron-db-backup/local-mac/Wills-MacBook-Pro.local/engram-20260509T1200.tar.gz set -euo pipefail NAMESPACE="neuron-prod" PVC_NAME="neuron-engram-data" GCS_BUCKET="gs://neuron-db-backup/local-mac" HOSTNAME_PREFIX="" SPECIFIC_BACKUP="" DRY_RUN=false SEED_JOB_NAME="engram-seed-$(date +%s)" usage() { grep '^#' "$0" | grep -v '#!/' | sed 's/^# //' | sed 's/^#//' exit 0 } while [[ $# -gt 0 ]]; do case $1 in --hostname) HOSTNAME_PREFIX="$2"; shift 2 ;; --backup) SPECIFIC_BACKUP="$2"; shift 2 ;; --dry-run) DRY_RUN=true; shift ;; -h|--help) usage ;; *) echo "Unknown argument: $1"; exit 1 ;; esac done run() { if $DRY_RUN; then echo "[dry-run] $*" else "$@" fi } echo "==> Neuron Engram GKE Seed" echo " Namespace: ${NAMESPACE}" echo " PVC: ${PVC_NAME}" echo "" # Step 1: Locate the backup if [[ -n "$SPECIFIC_BACKUP" ]]; then BACKUP_PATH="$SPECIFIC_BACKUP" echo "[1/5] Using specified backup: ${BACKUP_PATH}" else echo "[1/5] Finding latest backup in GCS..." if [[ -z "$HOSTNAME_PREFIX" ]]; then # Auto-detect: list prefixes and pick the first (most likely the only one) HOSTNAME_PREFIX=$(gsutil ls "${GCS_BUCKET}/" 2>/dev/null \ | sed "s|${GCS_BUCKET}/||" \ | sed 's|/||' \ | head -1) if [[ -z "$HOSTNAME_PREFIX" ]]; then echo "ERROR: Could not find any backup prefixes in ${GCS_BUCKET}/" echo " Run: gsutil ls ${GCS_BUCKET}/" exit 1 fi echo " Auto-detected hostname prefix: ${HOSTNAME_PREFIX}" fi BACKUP_PATH=$(gsutil ls "${GCS_BUCKET}/${HOSTNAME_PREFIX}/" 2>/dev/null \ | grep "engram-.*\.tar\.gz" \ | sort -r \ | head -1) if [[ -z "$BACKUP_PATH" ]]; then echo "ERROR: No backup found at ${GCS_BUCKET}/${HOSTNAME_PREFIX}/" exit 1 fi echo " Latest backup: ${BACKUP_PATH}" fi # Step 2: Download and extract snapshot.json TMP_DIR=$(mktemp -d) trap 'rm -rf "${TMP_DIR}"' EXIT echo "[2/5] Downloading backup..." run gsutil cp "${BACKUP_PATH}" "${TMP_DIR}/engram.tar.gz" echo "[3/5] Extracting snapshot.json..." if $DRY_RUN; then echo "[dry-run] tar -xzf ${TMP_DIR}/engram.tar.gz -C ${TMP_DIR} --wildcards '*snapshot.json'" SNAPSHOT_PATH="${TMP_DIR}/snapshot.json" else tar -xzf "${TMP_DIR}/engram.tar.gz" -C "${TMP_DIR}" 2>/dev/null || true # Handle nested paths (backup may be engram/snapshot.json or just snapshot.json) SNAPSHOT_PATH=$(find "${TMP_DIR}" -name "snapshot.json" | head -1) if [[ -z "$SNAPSHOT_PATH" ]]; then echo "ERROR: snapshot.json not found in backup archive" echo " Archive contents:" tar -tzf "${TMP_DIR}/engram.tar.gz" | head -20 exit 1 fi SNAPSHOT_SIZE=$(du -h "$SNAPSHOT_PATH" | cut -f1) echo " snapshot.json: ${SNAPSHOT_SIZE} at ${SNAPSHOT_PATH}" fi # Step 4: Copy into PVC via a seed Job # The Job mounts the PVC, then we kubectl cp the snapshot.json into the pod. echo "[4/5] Launching seed Job to access PVC..." SEED_JOB_MANIFEST=$(cat <" echo "[dry-run] kubectl wait --for=condition=ready pod -l job-name=${SEED_JOB_NAME} -n ${NAMESPACE}" echo "[dry-run] kubectl cp ${SNAPSHOT_PATH} ${NAMESPACE}/:/data/snapshot.json" echo "[dry-run] kubectl delete job/${SEED_JOB_NAME} -n ${NAMESPACE}" else echo "$SEED_JOB_MANIFEST" | kubectl apply -f - echo " Waiting for seed pod to be ready (timeout: 2m)..." kubectl wait --for=condition=ready pod \ -l "job-name=${SEED_JOB_NAME}" \ --namespace="${NAMESPACE}" \ --timeout=2m SEED_POD=$(kubectl get pods \ -l "job-name=${SEED_JOB_NAME}" \ --namespace="${NAMESPACE}" \ --output=jsonpath='{.items[0].metadata.name}') echo " Seed pod: ${SEED_POD}" echo "[5/5] Copying snapshot.json into PVC..." kubectl cp "${SNAPSHOT_PATH}" "${NAMESPACE}/${SEED_POD}:/data/snapshot.json" # Verify PVC_SIZE=$(kubectl exec "${SEED_POD}" --namespace="${NAMESPACE}" -- \ stat -c%s /data/snapshot.json 2>/dev/null || echo "unknown") echo " PVC snapshot.json size: ${PVC_SIZE} bytes" echo " Cleaning up seed Job..." kubectl delete job/"${SEED_JOB_NAME}" --namespace="${NAMESPACE}" --ignore-not-found fi echo "" echo "==> Engram seed complete." echo " The soul pod will read /data/snapshot.json on next boot." echo "" echo " Scale the blue deployment to pick up the seeded engram:" echo " kubectl scale deployment/neuron-mcp-blue --replicas=1 -n ${NAMESPACE}"