Files
neuron/docs/rd/soma-architecture.html
will.anderson a76aaf4831
Deploy Soul to GKE / deploy (push) Failing after 27s
Neuron Soul CI / build (push) Failing after 4m26s
docs: add architecture, R&D, and patent strategy docs
2026-06-10 17:31:07 -05:00

1543 lines
85 KiB
HTML

<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>SOMA — AI-Native Cloud Infrastructure · Eyes Only · Neuron Technologies</title>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Playfair+Display:ital,wght@0,700;1,400;1,700&family=IBM+Plex+Sans:ital,wght@0,400;0,500;0,600;1,400&family=IBM+Plex+Mono:wght@400;500&display=swap" rel="stylesheet">
<style>
*,*::before,*::after{box-sizing:border-box;margin:0;padding:0}
:root{
--bg:#FAFAF8;
--bg2:#F0F0EC;
--card:#FFFFFF;
--navy:#0052A0;
--navy-d:rgba(0,82,160,.06);
--navy-m:rgba(0,82,160,.12);
--navy-b:rgba(0,82,160,.22);
--green:#1A7F4B;
--green-d:rgba(26,127,75,.06);
--green-b:rgba(26,127,75,.22);
--amber:#B45309;
--amber-d:rgba(180,83,9,.06);
--amber-b:rgba(180,83,9,.22);
--gold:#8B6914;
--gold-d:rgba(139,105,20,.07);
--gold-b:rgba(139,105,20,.25);
--t1:#0D0D14;
--t2:#3A3A4A;
--t3:#6B6B7E;
--border:rgba(0,0,0,.07);
--border2:rgba(0,0,0,.13);
--head:'Playfair Display',Georgia,serif;
--body:'IBM Plex Sans',system-ui,sans-serif;
--mono:'IBM Plex Mono','SF Mono',monospace;
}
html{scroll-behavior:smooth}
body{
font-family:var(--body);
background:var(--bg);
color:var(--t1);
font-size:16px;
line-height:1.7;
overflow-x:hidden;
}
/* Subtle grid texture */
body::before{
content:'';position:fixed;inset:0;pointer-events:none;z-index:0;
background-image:
linear-gradient(rgba(0,0,0,.025) 1px,transparent 1px),
linear-gradient(90deg,rgba(0,0,0,.025) 1px,transparent 1px);
background-size:48px 48px;
}
/* NAVIGATION */
nav{
position:sticky;top:0;z-index:100;
background:rgba(250,250,248,.96);
backdrop-filter:blur(10px);
border-bottom:1px solid var(--border2);
display:flex;align-items:center;
padding:0 32px;height:54px;gap:6px;flex-wrap:wrap;
}
.nav-wordmark{
font-family:var(--mono);font-size:.68rem;font-weight:500;
letter-spacing:.18em;color:var(--t1);
text-transform:uppercase;margin-right:auto;
}
.nav-link{
font-family:var(--mono);font-size:.52rem;
letter-spacing:.12em;text-transform:uppercase;
color:var(--t3);padding:4px 10px;border-radius:4px;
cursor:pointer;transition:all .2s;border:1px solid transparent;
background:none;
}
.nav-link:hover,.nav-link.active{
color:var(--navy);background:var(--navy-d);border-color:var(--navy-b);
}
.nav-badge{
font-family:var(--mono);font-size:.54rem;letter-spacing:.14em;
text-transform:uppercase;background:rgba(139,105,20,.08);
border:1px solid var(--gold-b);color:var(--gold);
padding:3px 10px;border-radius:99px;margin-left:8px;
}
/* SECTIONS */
.section{display:none;min-height:calc(100vh - 54px);}
.section.active{display:block;}
/* MASTHEAD */
.masthead{
text-align:center;
border-top:3px solid var(--t1);
border-bottom:1px solid var(--border2);
padding:36px 0 32px;
margin-bottom:0;
position:relative;z-index:1;
}
.masthead .dateline{
font-family:var(--mono);font-size:.56rem;letter-spacing:.20em;
text-transform:uppercase;color:var(--t3);margin-bottom:22px;
}
.masthead .eyebrow{
font-family:var(--mono);font-size:.62rem;letter-spacing:.18em;
text-transform:uppercase;color:var(--navy);margin-bottom:14px;font-weight:500;
}
.masthead h1{
font-family:var(--head);font-size:3.2rem;font-weight:700;
line-height:1.08;margin-bottom:16px;
}
.masthead h1 em{font-style:italic;color:var(--navy);}
.masthead .subtitle{
font-size:.95rem;color:var(--t3);max-width:520px;
margin:0 auto 28px;line-height:1.7;font-style:italic;
}
.masthead-stats{
display:inline-grid;grid-template-columns:repeat(4,1fr);
gap:0;border:1px solid var(--border2);
max-width:680px;width:100%;
}
.masthead-stat{
padding:16px 24px;border-right:1px solid var(--border2);
}
.masthead-stat:last-child{border-right:none;}
.masthead-stat-val{
font-family:var(--head);font-size:1.9rem;font-weight:700;
color:var(--navy);line-height:1;margin-bottom:4px;
}
.masthead-stat-label{
font-family:var(--mono);font-size:.52rem;letter-spacing:.12em;
text-transform:uppercase;color:var(--t3);
}
/* DOC PAGE LAYOUT */
.doc-page{
max-width:860px;margin:0 auto;
padding:72px 48px 120px;position:relative;z-index:1;
}
.doc-page h2{
font-family:var(--mono);font-size:.56rem;font-weight:500;
letter-spacing:.20em;text-transform:uppercase;color:var(--navy);
margin:60px 0 20px;padding-bottom:10px;
border-bottom:1px solid var(--border2);
}
.doc-page h2:first-child{margin-top:0;}
p{margin-bottom:.9em;font-size:.95rem;color:var(--t2);line-height:1.8;}
p strong{color:var(--t1);font-weight:600;}
/* SEC HEADER (within sections, used as sub-label) */
.sec-num{
display:block;
font-family:var(--mono);font-size:.56rem;font-weight:500;
letter-spacing:.20em;text-transform:uppercase;color:var(--navy);
margin-bottom:6px;
}
.sec-title{
font-family:var(--head);font-size:2.1rem;font-weight:700;
color:var(--t1);margin-bottom:20px;line-height:1.15;
}
.sec-desc{
font-size:.95rem;color:var(--t2);line-height:1.8;margin-bottom:40px;
font-family:var(--body);
}
/* CALLOUT BLOCKS */
.callout{
border-left:3px solid var(--navy);
padding:16px 22px;margin:20px 0;
background:var(--navy-d);border-radius:0 12px 12px 0;
font-family:var(--head);font-style:italic;
font-size:1.02rem;line-height:1.65;color:var(--t1);
}
.callout.amber{border-left-color:var(--amber);background:var(--amber-d);}
.callout.green{border-left-color:var(--green);background:var(--green-d);}
.callout.dark{
background:#0D0D14;border-left-color:rgba(0,82,160,.5);
color:#EEE9DC;border-radius:12px;padding:28px 32px;
position:relative;overflow:hidden;
}
.callout.dark::before{
content:'\201C';font-family:var(--head);font-size:14rem;
color:rgba(139,105,20,.06);position:absolute;
top:-60px;left:-10px;line-height:1;pointer-events:none;
}
.callout.dark .label{
font-family:var(--mono);font-size:.54rem;letter-spacing:.18em;
text-transform:uppercase;color:#c4a84a;margin-bottom:14px;position:relative;
}
.callout.dark p{color:#B8B4A8;position:relative;}
.callout.dark strong{color:#EEE9DC;}
/* CARDS */
.card{
border-radius:14px;border:1px solid var(--border2);
background:var(--card);padding:24px;
transition:transform .3s,box-shadow .3s;
position:relative;overflow:hidden;
}
.card:hover{transform:translateY(-2px);box-shadow:0 8px 24px rgba(0,0,0,.08);}
.card.expandable{cursor:pointer;}
.card.expandable .card-body{display:none;margin-top:16px;padding-top:16px;border-top:1px solid var(--border2);}
.card.expandable.open .card-body{display:block;}
.card-head{display:flex;align-items:center;justify-content:space-between;gap:16px;}
.card-icon{font-size:18px;margin-right:4px;}
.expand-arrow{font-size:10px;color:var(--t3);transition:transform .2s;flex-shrink:0;}
.card.open .expand-arrow{transform:rotate(180deg);}
.card-title{
font-family:var(--head);font-size:1.05rem;font-weight:700;
color:var(--t1);margin-bottom:8px;
}
.card-sub{
font-family:var(--mono);font-size:.54rem;letter-spacing:.12em;
text-transform:uppercase;color:var(--t3);margin-bottom:12px;
}
.card p{font-size:.9rem;color:var(--t2);line-height:1.75;}
/* VOLATILITY BADGES */
.vol{
display:inline-flex;align-items:center;gap:6px;
font-family:var(--mono);font-size:.52rem;letter-spacing:.1em;text-transform:uppercase;
padding:3px 10px;border-radius:99px;font-weight:500;
}
.vol-stable{background:var(--green-d);color:var(--green);border:1px solid var(--green-b);}
.vol-variable{background:var(--navy-d);color:var(--navy);border:1px solid var(--navy-b);}
.vol-dynamic{background:var(--amber-d);color:var(--amber);border:1px solid var(--amber-b);}
.vol-dot{width:5px;height:5px;border-radius:50%;background:currentColor;}
/* GRID LAYOUTS */
.grid-2{display:grid;grid-template-columns:1fr 1fr;gap:24px;}
.grid-3{display:grid;grid-template-columns:1fr 1fr 1fr;gap:16px;}
.grid-4{display:grid;grid-template-columns:repeat(4,1fr);gap:16px;}
/* TAGS / PILLS */
.tag{
display:inline-block;font-family:var(--mono);
font-size:.52rem;letter-spacing:.08em;text-transform:uppercase;
padding:3px 9px;border-radius:6px;
background:var(--bg2);color:var(--t3);
border:1px solid var(--border2);margin:2px;
}
.tag-navy{background:var(--navy-d);color:var(--navy);border-color:var(--navy-b);}
.tag-green{background:var(--green-d);color:var(--green);border-color:var(--green-b);}
.tag-amber{background:var(--amber-d);color:var(--amber);border-color:var(--amber-b);}
.tag-gold{background:var(--gold-d);color:var(--gold);border-color:var(--gold-b);}
/* legacy aliases for content using tag-cyan / tag-violet */
.tag-cyan{background:var(--navy-d);color:var(--navy);border-color:var(--navy-b);}
.tag-violet{background:var(--amber-d);color:var(--amber);border-color:var(--amber-b);}
.tag-red{background:rgba(180,83,9,.06);color:var(--amber);border-color:var(--amber-b);}
/* TABLES */
table{width:100%;border-collapse:collapse;font-size:.875rem;font-family:var(--body);}
th{
font-family:var(--mono);font-size:.52rem;letter-spacing:.12em;text-transform:uppercase;
color:var(--t3);padding:10px 14px;
text-align:left;border-bottom:1px solid var(--border2);
}
td{padding:10px 14px;border-bottom:1px solid var(--border);color:var(--t2);vertical-align:top;}
tr:hover td{background:var(--bg2);}
td:first-child{color:var(--t1);font-weight:600;}
/* MONO BLOCKS */
.mono-block{
background:var(--bg2);border:1px solid var(--border2);
border-left:3px solid var(--navy);
padding:16px 20px;font-family:var(--mono);font-size:.8rem;
color:var(--t2);line-height:1.9;border-radius:0 8px 8px 0;
}
.mono-block .key{color:var(--navy);font-weight:500;}
.mono-block .val{color:var(--green);}
.mono-block .comment{color:var(--t3);}
/* DIAGRAM */
.diagram-container{
background:var(--bg2);border:1px solid var(--border2);
border-radius:14px;padding:28px;overflow-x:auto;
}
.diagram-legend{
display:flex;gap:24px;flex-wrap:wrap;
margin-top:20px;padding-top:16px;
border-top:1px solid var(--border2);
}
.legend-item{display:flex;align-items:center;gap:8px;font-family:var(--mono);font-size:.6rem;color:var(--t3);letter-spacing:.06em;}
.legend-box{width:16px;height:12px;border-radius:3px;flex-shrink:0;}
/* FLOW LINES */
.flow-line{display:flex;align-items:center;gap:0;font-size:.85rem;margin-bottom:8px;}
.flow-node{
padding:5px 14px;border-radius:8px;
background:var(--bg2);border:1px solid var(--border2);
color:var(--t2);white-space:nowrap;font-family:var(--mono);font-size:.7rem;letter-spacing:.06em;
}
.flow-node.active-node{border-color:var(--navy-b);color:var(--navy);background:var(--navy-d);}
.flow-arrow{padding:0 8px;color:var(--t3);font-size:14px;}
/* IMPROVEMENT LOOPS */
.loop{
display:flex;gap:24px;align-items:flex-start;
padding:24px 0;border-bottom:1px solid var(--border2);
}
.loop-num{
font-family:var(--head);font-size:2.4rem;font-weight:700;
color:var(--navy);opacity:.18;flex-shrink:0;width:52px;line-height:1;
}
.loop-content h4{
font-family:var(--head);font-size:1.05rem;font-weight:700;
color:var(--t1);margin-bottom:8px;
}
.loop-content p{font-size:.9rem;color:var(--t2);line-height:1.75;}
.loop-delta{
margin-top:10px;font-family:var(--mono);font-size:.6rem;letter-spacing:.08em;
color:var(--green);padding:5px 12px;background:var(--green-d);
border:1px solid var(--green-b);border-radius:6px;display:inline-block;
}
/* TIMELINE */
.timeline{position:relative;padding-left:32px;}
.timeline::before{
content:'';position:absolute;left:8px;top:0;bottom:0;
width:1px;background:var(--border2);
}
.timeline-item{position:relative;margin-bottom:36px;}
.timeline-item::before{
content:'';position:absolute;left:-28px;top:6px;
width:9px;height:9px;border-radius:50%;
background:var(--navy);box-shadow:0 0 0 3px var(--navy-d);
}
.timeline-year{
font-family:var(--mono);font-size:.58rem;color:var(--navy);
letter-spacing:.15em;text-transform:uppercase;margin-bottom:8px;font-weight:500;
}
.timeline-content h4{
font-family:var(--head);font-size:1.05rem;font-weight:700;
color:var(--t1);margin-bottom:8px;
}
.timeline-content p{font-size:.9rem;color:var(--t2);line-height:1.75;}
/* OPERATOR SECTION */
.operator-grid{display:grid;grid-template-columns:1fr 2fr;gap:24px;}
.operator-profile{
background:var(--card);border:1px solid var(--border2);
border-radius:14px;padding:28px;text-align:center;
}
.operator-avatar{
width:80px;height:80px;border-radius:50%;
border:2px solid var(--navy-b);background:var(--navy-d);
margin:0 auto 16px;display:flex;align-items:center;justify-content:center;
font-size:32px;
}
.operator-name{
font-family:var(--head);font-size:1.2rem;font-weight:700;
color:var(--navy);margin-bottom:4px;
}
.operator-role{
font-family:var(--mono);font-size:.54rem;color:var(--t3);
letter-spacing:.14em;text-transform:uppercase;
}
/* ROUTING DECISION TREE */
.decision-tree{font-family:var(--mono);font-size:.78rem;line-height:2.1;color:var(--t2);}
.decision-tree .if{color:var(--navy);font-weight:500;}
.decision-tree .then{color:var(--green);font-weight:500;}
.decision-tree .indent{padding-left:24px;}
.decision-tree .indent2{padding-left:48px;}
/* STATUS INDICATORS */
.status{
display:inline-flex;align-items:center;gap:6px;
font-family:var(--mono);font-size:.54rem;letter-spacing:.12em;text-transform:uppercase;
}
.status-dot{width:6px;height:6px;border-radius:50%;animation:pulse 2s infinite;}
.status-green .status-dot{background:var(--green);box-shadow:0 0 6px rgba(26,127,75,.5);}
.status-amber .status-dot{background:var(--amber);box-shadow:0 0 6px rgba(180,83,9,.5);}
.status-red .status-dot{background:#B91C1C;box-shadow:0 0 6px rgba(185,28,28,.5);}
@keyframes pulse{0%,100%{opacity:1;}50%{opacity:.4;}}
/* DIVIDER */
.divider{height:1px;background:var(--border2);margin:28px 0;}
/* UTILITY */
.mb-8{margin-bottom:8px;}
.mb-16{margin-bottom:16px;}
.mb-24{margin-bottom:24px;}
.mb-32{margin-bottom:32px;}
.mt-16{margin-top:16px;}
.mt-24{margin-top:24px;}
.text-navy{color:var(--navy);}
.text-green{color:var(--green);}
.text-amber{color:var(--amber);}
.text-muted{color:var(--t3);}
.text-small{font-size:.82rem;}
/* REVEAL ANIMATIONS */
.reveal{opacity:0;transform:translateY(28px);transition:opacity .7s cubic-bezier(.16,1,.3,1),transform .7s cubic-bezier(.16,1,.3,1);}
.reveal.visible{opacity:1;transform:translateY(0);}
/* SVG FLOW ANIMATION */
@keyframes flowDash{to{stroke-dashoffset:-20;}}
.flow-animated{stroke-dasharray:6 4;animation:flowDash 1.2s linear infinite;}
@keyframes nodeGlow{
0%,100%{filter:drop-shadow(0 0 3px rgba(0,82,160,.3));}
50%{filter:drop-shadow(0 0 8px rgba(0,82,160,.6));}
}
.node-core{animation:nodeGlow 3s ease-in-out infinite;}
/* SCROLLBAR */
::-webkit-scrollbar{width:6px;height:6px;}
::-webkit-scrollbar-track{background:var(--bg2);}
::-webkit-scrollbar-thumb{background:var(--border2);border-radius:3px;}
::-webkit-scrollbar-thumb:hover{background:var(--navy-b);}
/* RESPONSIVE */
@media(max-width:900px){
.grid-2,.grid-3,.grid-4{grid-template-columns:1fr;}
.masthead-stats{grid-template-columns:1fr 1fr;}
.operator-grid{grid-template-columns:1fr;}
.doc-page{padding:48px 24px 80px;}
nav{padding:0 16px;gap:2px;}
.nav-link{padding:4px 8px;}
}
</style>
</head>
<body>
<!-- NAVIGATION -->
<nav>
<div class="nav-wordmark">NEURON · SOMA</div>
<button class="nav-link active" onclick="showSection('overview')">Overview</button>
<button class="nav-link" onclick="showSection('diagram')">Diagram</button>
<button class="nav-link" onclick="showSection('components')">Components</button>
<button class="nav-link" onclick="showSection('routing')">Routing</button>
<button class="nav-link" onclick="showSection('workloads')">Workloads</button>
<button class="nav-link" onclick="showSection('loops')">Design Loops</button>
<button class="nav-link" onclick="showSection('operator')">Operator</button>
<button class="nav-link" onclick="showSection('strategy')">5-Year Play</button>
<span class="nav-badge">EYES ONLY</span>
</nav>
<!-- MASTHEAD (always visible above tabs) -->
<div class="masthead">
<div class="dateline">NEURON TECHNOLOGIES · INTERNAL PLANNING · 2025-04</div>
<div class="eyebrow">AI-NATIVE CLOUD INFRASTRUCTURE</div>
<h1>The <em>Soma</em> Architecture</h1>
<p class="subtitle">A compute abstraction layer that treats AI inference capacity as a managed resource pool — routed, provisioned, and optimized across the full provider landscape.</p>
<div style="display:flex;align-items:center;justify-content:center;gap:20px;margin-bottom:28px;">
<span class="status status-green"><span class="status-dot"></span><span class="text-muted">DESIGN PHASE</span></span>
</div>
<div class="masthead-stats">
<div class="masthead-stat">
<div class="masthead-stat-val">10</div>
<div class="masthead-stat-label">Core Components</div>
</div>
<div class="masthead-stat">
<div class="masthead-stat-val">3</div>
<div class="masthead-stat-label">Volatility Tiers</div>
</div>
<div class="masthead-stat">
<div class="masthead-stat-val">4</div>
<div class="masthead-stat-label">Workload Envs</div>
</div>
<div class="masthead-stat">
<div class="masthead-stat-val">&#8734;</div>
<div class="masthead-stat-label">Provider Agnostic</div>
</div>
</div>
</div>
<!-- SECTION: OVERVIEW -->
<section class="section active" id="sec-overview">
<div class="doc-page">
<span class="sec-num">01 // Strategic Overview</span>
<div class="sec-title">The Central Insight</div>
<p class="sec-desc">Soma is the compute abstraction layer for Neuron Technologies — a platform that treats AI inference capacity as a managed resource pool rather than a static deployment target. The central insight: AI workloads are heterogeneous, bursty, and cost-sensitive. No single provider wins on all dimensions. Soma routes, provisions, and optimizes across the full provider landscape, presenting a unified API surface to the application tier.</p>
<div class="grid-2 mb-32 reveal">
<div class="card">
<div class="card-title">The Core Problem</div>
<p>AI-native applications require GPU compute that is simultaneously: expensive at rest, scarce at peak, and fragmented across providers. Teams make architectural bets on specific clouds, then pay the price — vendor lock-in, idle capacity, or service gaps during demand spikes.</p>
<div style="margin-top:16px;padding-top:16px;border-top:1px solid var(--border2);">
<div class="flow-line">
<div class="flow-node">Request arrives</div>
<div class="flow-arrow"></div>
<div class="flow-node" style="opacity:.4">Which provider?</div>
<div class="flow-arrow"></div>
<div class="flow-node" style="opacity:.2">???</div>
</div>
</div>
</div>
<div class="card">
<div class="card-title">The Soma Answer</div>
<p>A control plane that knows the real cost, latency, and availability of every attached compute node — and routes requests based on workload tier, cost oracle signals, and live health. Providers become fungible. The router becomes the intelligence.</p>
<div style="margin-top:16px;padding-top:16px;border-top:1px solid var(--border2);">
<div class="flow-line">
<div class="flow-node active-node">Request arrives</div>
<div class="flow-arrow"></div>
<div class="flow-node active-node">SOMA Router</div>
<div class="flow-arrow"></div>
<div class="flow-node active-node">Optimal node</div>
</div>
</div>
</div>
</div>
<h2>Design Principles</h2>
<div class="grid-3 mb-32 reveal">
<div class="card">
<div class="card-sub">Design Principle 01</div>
<div class="card-title">Provider Abstraction</div>
<p>RunPod, Legion, AWS, Azure, GCP, and bare metal are all first-class node types. Soma treats them identically at the routing layer. Provider-specific adapters handle provisioning; the core stays clean.</p>
</div>
<div class="card">
<div class="card-sub">Design Principle 02</div>
<div class="card-title">Volatility Isolation</div>
<p>Stable contracts (API specs, data schemas) are separated from variable behavior (routing logic) and dynamic state (live cost, availability, active jobs). Changes in one tier cannot break another. This is VBD in practice.</p>
</div>
<div class="card">
<div class="card-sub">Design Principle 03</div>
<div class="card-title">AI-First Operation</div>
<p>Neuron is the operator. Soma exposes structured, machine-readable interfaces at every layer — cost signals, health events, provisioning APIs. Autonomous operation is the design target, not the bolt-on.</p>
</div>
</div>
<h2>Vision Codex</h2>
<div class="mono-block reveal">
<div><span class="key">SOMA_VISION</span> = <span class="val">"Treat GPU compute like an intelligent power grid"</span></div>
<div><span class="key">ROUTING_MODEL</span> = <span class="val">"tier-first, cost-second, latency-third"</span> <span class="comment"># deterministic priority stack</span></div>
<div><span class="key">PROVIDER_STRATEGY</span> = <span class="val">"no single provider exceeds 60% of active capacity"</span> <span class="comment"># anti-concentration rule</span></div>
<div><span class="key">WARM_POOL</span> = <span class="val">"always maintain ≥1 warm node per inference type"</span> <span class="comment"># cold-start mitigation</span></div>
<div><span class="key">COST_TARGET</span> = <span class="val">"autoscale to zero on idle, pre-warm before predicted demand"</span></div>
</div>
</div>
</section>
<!-- SECTION: DIAGRAM -->
<section class="section" id="sec-diagram">
<div class="doc-page">
<span class="sec-num">02 // Architecture Diagram</span>
<div class="sec-title">Volatility-Based System Map</div>
<div class="diagram-container reveal">
<svg viewBox="0 0 1000 720" xmlns="http://www.w3.org/2000/svg" style="width:100%;max-width:1000px;display:block;margin:0 auto;font-family:'IBM Plex Mono',monospace;">
<!-- Background tier bands -->
<!-- STABLE TIER (bottom) -->
<rect x="0" y="570" width="1000" height="150" fill="rgba(26,127,75,.04)" rx="0"/>
<text x="12" y="590" fill="rgba(26,127,75,.5)" font-size="9" letter-spacing="2" text-anchor="start" font-family="'IBM Plex Mono',monospace">STABLE TIER</text>
<!-- VARIABLE TIER (middle) -->
<rect x="0" y="280" width="1000" height="290" fill="rgba(0,82,160,.03)" rx="0"/>
<text x="12" y="300" fill="rgba(0,82,160,.45)" font-size="9" letter-spacing="2" font-family="'IBM Plex Mono',monospace">VARIABLE TIER</text>
<!-- DYNAMIC TIER (top) -->
<rect x="0" y="10" width="1000" height="270" fill="rgba(180,83,9,.03)" rx="0"/>
<text x="12" y="30" fill="rgba(180,83,9,.45)" font-size="9" letter-spacing="2" font-family="'IBM Plex Mono',monospace">DYNAMIC TIER</text>
<!-- Tier boundary lines -->
<line x1="0" y1="280" x2="1000" y2="280" stroke="rgba(0,82,160,.18)" stroke-dasharray="4 6"/>
<line x1="0" y1="570" x2="1000" y2="570" stroke="rgba(26,127,75,.18)" stroke-dasharray="4 6"/>
<!-- =========================================== -->
<!-- DYNAMIC TIER COMPONENTS -->
<!-- =========================================== -->
<!-- Neuron Interface (top center) -->
<rect x="390" y="40" width="220" height="60" fill="#FFF8F0" stroke="rgba(180,83,9,.65)" stroke-width="1.5" rx="4" class="node-core"/>
<text x="500" y="64" fill="#B45309" font-size="10" font-weight="700" text-anchor="middle" letter-spacing="1" font-family="'IBM Plex Mono',monospace">NEURON INTERFACE</text>
<text x="500" y="80" fill="rgba(180,83,9,.6)" font-size="8" text-anchor="middle" font-family="'IBM Plex Mono',monospace">AI OPERATOR · AUTONOMOUS MGMT</text>
<!-- Observer (top right) -->
<rect x="740" y="40" width="200" height="60" fill="#FAFAF8" stroke="rgba(180,83,9,.4)" stroke-width="1" stroke-dasharray="4 2" rx="4"/>
<text x="840" y="64" fill="#B45309" font-size="10" font-weight="700" text-anchor="middle" letter-spacing="1" font-family="'IBM Plex Mono',monospace">OBSERVER</text>
<text x="840" y="80" fill="rgba(180,83,9,.5)" font-size="8" text-anchor="middle" font-family="'IBM Plex Mono',monospace">TELEMETRY · COST TRACKING · ANOMALY</text>
<!-- Cost Oracle (top left) -->
<rect x="60" y="40" width="200" height="60" fill="#FAFAF8" stroke="rgba(180,83,9,.4)" stroke-width="1" stroke-dasharray="4 2" rx="4"/>
<text x="160" y="64" fill="#B45309" font-size="10" font-weight="700" text-anchor="middle" letter-spacing="1" font-family="'IBM Plex Mono',monospace">COST ORACLE</text>
<text x="160" y="80" fill="rgba(180,83,9,.5)" font-size="8" text-anchor="middle" font-family="'IBM Plex Mono',monospace">REAL-TIME PRICING · SPOT SIGNALS</text>
<!-- Control Plane (middle of dynamic tier) -->
<rect x="250" y="145" width="260" height="70" fill="#FAFAF8" stroke="rgba(180,83,9,.55)" stroke-width="1.5" rx="4"/>
<text x="380" y="174" fill="#B45309" font-size="10" font-weight="700" text-anchor="middle" letter-spacing="1" font-family="'IBM Plex Mono',monospace">CONTROL PLANE</text>
<text x="380" y="191" fill="rgba(180,83,9,.5)" font-size="8" text-anchor="middle" font-family="'IBM Plex Mono',monospace">NODE REGISTRY · MODEL CATALOG · HEALTH MONITOR</text>
<!-- Workload Orchestrator -->
<rect x="590" y="145" width="230" height="70" fill="#FAFAF8" stroke="rgba(180,83,9,.4)" stroke-width="1" stroke-dasharray="5 3" rx="4"/>
<text x="705" y="174" fill="#B45309" font-size="10" font-weight="700" text-anchor="middle" letter-spacing="1" font-family="'IBM Plex Mono',monospace">WORKLOAD ORCH.</text>
<text x="705" y="191" fill="rgba(180,83,9,.5)" font-size="8" text-anchor="middle" font-family="'IBM Plex Mono',monospace">PROVISION · CONFIGURE · TERMINATE</text>
<!-- =========================================== -->
<!-- VARIABLE TIER COMPONENTS -->
<!-- =========================================== -->
<!-- Soma Router (center, variable) -->
<rect x="340" y="305" width="320" height="80" fill="#EEF4FF" stroke="rgba(0,82,160,.75)" stroke-width="2" rx="6"/>
<text x="500" y="336" fill="#0052A0" font-size="12" font-weight="700" text-anchor="middle" letter-spacing="2" font-family="'IBM Plex Mono',monospace">SOMA ROUTER</text>
<text x="500" y="355" fill="rgba(0,82,160,.65)" font-size="8" text-anchor="middle" font-family="'IBM Plex Mono',monospace">TIER CLASSIFY · COST OPTIMIZE · LOAD BALANCE</text>
<text x="500" y="372" fill="rgba(0,82,160,.38)" font-size="7" text-anchor="middle" font-family="'IBM Plex Mono',monospace">LOW / MEDIUM / HIGH</text>
<!-- Inference Services (variable tier, right) -->
<rect x="720" y="300" width="220" height="88" fill="#FAFAF8" stroke="rgba(0,82,160,.35)" stroke-width="1" rx="4"/>
<text x="830" y="322" fill="#0052A0" font-size="10" font-weight="700" text-anchor="middle" letter-spacing="1" font-family="'IBM Plex Mono',monospace">INFERENCE SERVICES</text>
<rect x="730" y="330" width="60" height="20" fill="rgba(0,82,160,.06)" stroke="rgba(0,82,160,.2)" rx="3"/>
<text x="760" y="344" fill="rgba(0,82,160,.7)" font-size="7" text-anchor="middle" font-family="'IBM Plex Mono',monospace">LLM</text>
<rect x="798" y="330" width="60" height="20" fill="rgba(0,82,160,.06)" stroke="rgba(0,82,160,.2)" rx="3"/>
<text x="828" y="344" fill="rgba(0,82,160,.7)" font-size="7" text-anchor="middle" font-family="'IBM Plex Mono',monospace">IMAGE GEN</text>
<rect x="730" y="355" width="130" height="20" fill="rgba(0,82,160,.06)" stroke="rgba(0,82,160,.2)" rx="3"/>
<text x="795" y="369" fill="rgba(0,82,160,.7)" font-size="7" text-anchor="middle" font-family="'IBM Plex Mono',monospace">VIDEO (SVD)</text>
<!-- Pipeline Engine (variable tier, left) -->
<rect x="60" y="300" width="220" height="88" fill="#FAFAF8" stroke="rgba(0,82,160,.35)" stroke-width="1" rx="4"/>
<text x="170" y="322" fill="#0052A0" font-size="10" font-weight="700" text-anchor="middle" letter-spacing="1" font-family="'IBM Plex Mono',monospace">PIPELINE ENGINE</text>
<text x="170" y="340" fill="rgba(0,82,160,.5)" font-size="8" text-anchor="middle" font-family="'IBM Plex Mono',monospace">PANTHEON CONDUCTOR</text>
<text x="170" y="356" fill="rgba(0,82,160,.35)" font-size="7" text-anchor="middle" font-family="'IBM Plex Mono',monospace">22-STEP INFERENCE PIPELINE</text>
<rect x="72" y="362" width="196" height="16" fill="rgba(0,82,160,.04)" stroke="rgba(0,82,160,.15)" rx="2"/>
<text x="170" y="374" fill="rgba(0,82,160,.45)" font-size="7" text-anchor="middle" font-family="'IBM Plex Mono',monospace">INHERITED · BATTLE-TESTED</text>
<!-- Secrets Layer (variable) -->
<rect x="60" y="420" width="200" height="60" fill="#FAFAF8" stroke="rgba(0,82,160,.3)" stroke-width="1" rx="4"/>
<text x="160" y="446" fill="#0052A0" font-size="10" font-weight="700" text-anchor="middle" letter-spacing="1" font-family="'IBM Plex Mono',monospace">SECRETS LAYER</text>
<text x="160" y="463" fill="rgba(0,82,160,.5)" font-size="8" text-anchor="middle" font-family="'IBM Plex Mono',monospace">VAULT · CUSTOMER ISOLATED</text>
<!-- Node Pool (variable, right of center) -->
<rect x="290" y="430" width="440" height="110" fill="#FAFAF8" stroke="rgba(0,82,160,.35)" stroke-width="1" rx="4"/>
<text x="510" y="452" fill="#0052A0" font-size="10" font-weight="700" text-anchor="middle" letter-spacing="1" font-family="'IBM Plex Mono',monospace">NODE POOL</text>
<!-- Individual nodes -->
<rect x="300" y="460" width="72" height="28" fill="rgba(0,82,160,.05)" stroke="rgba(0,82,160,.18)" rx="3"/>
<text x="336" y="478" fill="rgba(0,82,160,.75)" font-size="7" text-anchor="middle" font-family="'IBM Plex Mono',monospace">RUNPOD</text>
<rect x="380" y="460" width="72" height="28" fill="rgba(0,82,160,.05)" stroke="rgba(0,82,160,.18)" rx="3"/>
<text x="416" y="478" fill="rgba(0,82,160,.75)" font-size="7" text-anchor="middle" font-family="'IBM Plex Mono',monospace">LEGION</text>
<rect x="460" y="460" width="72" height="28" fill="rgba(0,82,160,.05)" stroke="rgba(0,82,160,.18)" rx="3"/>
<text x="496" y="478" fill="rgba(0,82,160,.75)" font-size="7" text-anchor="middle" font-family="'IBM Plex Mono',monospace">AWS</text>
<rect x="540" y="460" width="72" height="28" fill="rgba(0,82,160,.05)" stroke="rgba(0,82,160,.18)" rx="3"/>
<text x="576" y="478" fill="rgba(0,82,160,.75)" font-size="7" text-anchor="middle" font-family="'IBM Plex Mono',monospace">AZURE/GCP</text>
<rect x="620" y="460" width="96" height="28" fill="rgba(0,82,160,.05)" stroke="rgba(0,82,160,.18)" rx="3"/>
<text x="668" y="478" fill="rgba(0,82,160,.75)" font-size="7" text-anchor="middle" font-family="'IBM Plex Mono',monospace">BARE METAL</text>
<!-- Node status dots -->
<circle cx="324" cy="497" r="3" fill="#1A7F4B"/>
<circle cx="404" cy="497" r="3" fill="#1A7F4B"/>
<circle cx="484" cy="497" r="3" fill="#B45309"/>
<circle cx="564" cy="497" r="3" fill="rgba(0,82,160,.7)"/>
<circle cx="656" cy="497" r="3" fill="#1A7F4B"/>
<text x="330" y="500" fill="rgba(0,0,0,.25)" font-size="6" font-family="'IBM Plex Mono',monospace">WARM</text>
<text x="410" y="500" fill="rgba(0,0,0,.25)" font-size="6" font-family="'IBM Plex Mono',monospace">WARM</text>
<text x="490" y="500" fill="rgba(0,0,0,.25)" font-size="6" font-family="'IBM Plex Mono',monospace">COLD</text>
<text x="570" y="500" fill="rgba(0,0,0,.25)" font-size="6" font-family="'IBM Plex Mono',monospace">PROV.</text>
<text x="662" y="500" fill="rgba(0,0,0,.25)" font-size="6" font-family="'IBM Plex Mono',monospace">WARM</text>
<!-- =========================================== -->
<!-- STABLE TIER COMPONENTS -->
<!-- =========================================== -->
<!-- Storage Layer -->
<rect x="100" y="590" width="280" height="60" fill="#F0FAF4" stroke="rgba(26,127,75,.45)" stroke-width="1.5" rx="4"/>
<text x="240" y="615" fill="#1A7F4B" font-size="10" font-weight="700" text-anchor="middle" letter-spacing="1" font-family="'IBM Plex Mono',monospace">STORAGE LAYER</text>
<text x="240" y="632" fill="rgba(26,127,75,.6)" font-size="8" text-anchor="middle" font-family="'IBM Plex Mono',monospace">R2/S3 BLOB · MODEL REGISTRY · ARTIFACT STORE</text>
<!-- Model Catalog (stable contracts) -->
<rect x="440" y="590" width="200" height="60" fill="#F0FAF4" stroke="rgba(26,127,75,.45)" stroke-width="1.5" rx="4"/>
<text x="540" y="615" fill="#1A7F4B" font-size="10" font-weight="700" text-anchor="middle" letter-spacing="1" font-family="'IBM Plex Mono',monospace">MODEL CATALOG</text>
<text x="540" y="632" fill="rgba(26,127,75,.6)" font-size="8" text-anchor="middle" font-family="'IBM Plex Mono',monospace">VERSIONED · CAPABILITY INDEXED</text>
<!-- API Contracts -->
<rect x="700" y="590" width="200" height="60" fill="#F0FAF4" stroke="rgba(26,127,75,.45)" stroke-width="1.5" rx="4"/>
<text x="800" y="615" fill="#1A7F4B" font-size="10" font-weight="700" text-anchor="middle" letter-spacing="1" font-family="'IBM Plex Mono',monospace">API CONTRACTS</text>
<text x="800" y="632" fill="rgba(26,127,75,.6)" font-size="8" text-anchor="middle" font-family="'IBM Plex Mono',monospace">STABLE INTERFACES · VERSIONED SPECS</text>
<!-- =========================================== -->
<!-- CONNECTIONS / FLOW ARROWS -->
<!-- =========================================== -->
<!-- Neuron → Control Plane -->
<line x1="500" y1="100" x2="380" y2="145" stroke="rgba(180,83,9,.55)" stroke-width="1.5" class="flow-animated"/>
<!-- Neuron → Observer -->
<line x1="610" y1="70" x2="740" y2="70" stroke="rgba(180,83,9,.35)" stroke-width="1" class="flow-animated"/>
<!-- Neuron → Cost Oracle -->
<line x1="390" y1="70" x2="260" y2="70" stroke="rgba(180,83,9,.35)" stroke-width="1" class="flow-animated"/>
<!-- Control Plane → Soma Router -->
<line x1="380" y1="215" x2="420" y2="305" stroke="rgba(0,82,160,.5)" stroke-width="1.5"/>
<polygon points="416,298 424,298 420,308" fill="rgba(0,82,160,.5)"/>
<!-- Workload Orch → Node Pool -->
<line x1="705" y1="215" x2="650" y2="430" stroke="rgba(180,83,9,.35)" stroke-width="1" stroke-dasharray="4 3"/>
<!-- Cost Oracle → Soma Router -->
<line x1="260" y1="70" x2="350" y2="310" stroke="rgba(180,83,9,.25)" stroke-width="1" stroke-dasharray="3 4"/>
<!-- Soma Router → Node Pool -->
<line x1="500" y1="385" x2="510" y2="430" stroke="rgba(0,82,160,.65)" stroke-width="2"/>
<polygon points="505,423 515,423 510,433" fill="rgba(0,82,160,.65)"/>
<!-- Soma Router → Inference Services -->
<line x1="660" y1="345" x2="720" y2="345" stroke="rgba(0,82,160,.55)" stroke-width="1.5"/>
<polygon points="714,340 724,345 714,350" fill="rgba(0,82,160,.55)"/>
<!-- Soma Router → Pipeline Engine -->
<line x1="340" y1="345" x2="280" y2="345" stroke="rgba(0,82,160,.45)" stroke-width="1.5"/>
<polygon points="286,340 276,345 286,350" fill="rgba(0,82,160,.45)"/>
<!-- Storage Layer → Model Catalog -->
<line x1="380" y1="620" x2="440" y2="620" stroke="rgba(26,127,75,.3)" stroke-width="1" stroke-dasharray="3 3"/>
<!-- Model Catalog → Soma Router (upward, model selection) -->
<line x1="540" y1="590" x2="500" y2="390" stroke="rgba(26,127,75,.38)" stroke-width="1" stroke-dasharray="3 4"/>
<!-- Observer → Control Plane -->
<line x1="800" y1="100" x2="510" y2="145" stroke="rgba(180,83,9,.28)" stroke-width="1" stroke-dasharray="2 4"/>
<!-- Secrets → Node Pool -->
<line x1="260" y1="450" x2="290" y2="480" stroke="rgba(0,82,160,.25)" stroke-width="1" stroke-dasharray="2 3"/>
</svg>
<div class="diagram-legend">
<div class="legend-item">
<div class="legend-box" style="background:var(--green-d);border:1.5px solid var(--green-b);"></div>
<span>Stable — solid border, versioned contracts</span>
</div>
<div class="legend-item">
<div class="legend-box" style="background:var(--navy-d);border:1px solid var(--navy-b);"></div>
<span>Variable — routing logic, service adapters</span>
</div>
<div class="legend-item">
<div class="legend-box" style="background:var(--amber-d);border:1px dashed var(--amber-b);"></div>
<span>Dynamic — live state, cost signals, health</span>
</div>
<div class="legend-item">
<svg width="32" height="12"><line x1="0" y1="6" x2="32" y2="6" stroke="rgba(0,82,160,.6)" stroke-width="2" stroke-dasharray="4 3"/></svg>
<span>Animated flow — active data paths</span>
</div>
<div class="legend-item">
<div style="display:flex;gap:8px;">
<span style="display:flex;align-items:center;gap:4px;"><span style="display:inline-block;width:8px;height:8px;border-radius:50%;background:#1A7F4B;"></span>Warm</span>
<span style="display:flex;align-items:center;gap:4px;"><span style="display:inline-block;width:8px;height:8px;border-radius:50%;background:#B45309;"></span>Cold</span>
<span style="display:flex;align-items:center;gap:4px;"><span style="display:inline-block;width:8px;height:8px;border-radius:50%;background:#0052A0;"></span>Provisioning</span>
</div>
</div>
</div>
</div>
</div>
</section>
<!-- SECTION: COMPONENTS -->
<section class="section" id="sec-components">
<div class="doc-page">
<span class="sec-num">03 // Component Reference</span>
<div class="sec-title">The Ten Components</div>
<p class="sec-desc">Each component is classified by volatility tier — how frequently its behavior changes under normal operation. Stable components provide durable contracts. Variable components implement logic that evolves with business needs. Dynamic components reflect live system state.</p>
<!-- Component cards, expandable -->
<div class="grid-2" id="component-grid">
<div class="card expandable reveal" onclick="toggleCard(this)">
<div class="card-head">
<div>
<div class="card-sub mb-8">Component 01</div>
<div class="card-title">Control Plane</div>
<span class="vol vol-dynamic"><span class="vol-dot"></span>Dynamic</span>
</div>
<span class="expand-arrow"></span>
</div>
<div class="card-body">
<p class="mb-16">The authoritative registry for everything Soma knows about its infrastructure. Maintains the live node registry (which providers are attached, their current state), the model catalog pointer (which models are where), and runs the health monitor loop.</p>
<div class="mb-16">
<div class="card-sub mb-8">KEY RESPONSIBILITIES</div>
<div class="tag tag-navy">Node Registry</div>
<div class="tag tag-navy">Model Catalog Ref</div>
<div class="tag tag-navy">Health Monitor</div>
<div class="tag tag-navy">Capacity Planner</div>
</div>
<div class="mono-block">
<div><span class="key">interfaces</span>: [<span class="val">"router/node-list"</span>, <span class="val">"orchestrator/provision-targets"</span>, <span class="val">"observer/health-events"</span>]</div>
<div><span class="key">change_frequency</span>: <span class="val">"continuous"</span> <span class="comment"># live state updates</span></div>
<div><span class="key">stability</span>: <span class="val">"contract stable, state dynamic"</span></div>
</div>
</div>
</div>
<div class="card expandable reveal" onclick="toggleCard(this)">
<div class="card-head">
<div>
<div class="card-sub mb-8">Component 02</div>
<div class="card-title">Soma Router</div>
<span class="vol vol-variable"><span class="vol-dot"></span>Variable</span>
</div>
<span class="expand-arrow"></span>
</div>
<div class="card-body">
<p class="mb-16">The intelligence core. Classifies incoming requests by tier (Low/Medium/High), consults the cost oracle for current pricing, and routes to the optimal node in the pool. Implements load balancing, failover, and warm-pool preference logic.</p>
<div class="mb-16">
<div class="card-sub mb-8">KEY RESPONSIBILITIES</div>
<div class="tag tag-navy">Tier Classification</div>
<div class="tag tag-navy">Cost Optimization</div>
<div class="tag tag-navy">Load Balancing</div>
<div class="tag tag-navy">Failover Logic</div>
<div class="tag tag-navy">Model Selection</div>
</div>
<div class="mono-block">
<div><span class="key">routing_priority</span>: [<span class="val">"tier"</span>, <span class="val">"cost"</span>, <span class="val">"latency"</span>, <span class="val">"health"</span>]</div>
<div><span class="key">change_frequency</span>: <span class="val">"weekly/sprint"</span> <span class="comment"># routing policy changes</span></div>
<div><span class="key">owns</span>: <span class="val">"decision logic only — no state"</span></div>
</div>
</div>
</div>
<div class="card expandable reveal" onclick="toggleCard(this)">
<div class="card-head">
<div>
<div class="card-sub mb-8">Component 03</div>
<div class="card-title">Node Pool</div>
<span class="vol vol-variable"><span class="vol-dot"></span>Variable</span>
</div>
<span class="expand-arrow"></span>
</div>
<div class="card-body">
<p class="mb-16">The heterogeneous fleet. Each node has a type (GPU class), provider, warm/cold state, current load, and cost-per-hour. The pool is managed by the Workload Orchestrator, observed by the Control Plane, and served to the Router as a live capability map.</p>
<div class="mb-16">
<div class="card-sub mb-8">NODE TYPES</div>
<div class="tag">RunPod (H100/A100)</div>
<div class="tag">Legion (RTX 4090)</div>
<div class="tag">AWS (p3/p4)</div>
<div class="tag">Azure (ND-series)</div>
<div class="tag">GCP (A100)</div>
<div class="tag">Bare Metal</div>
</div>
<div class="mono-block">
<div><span class="key">node_schema</span>: {<span class="val">type, provider, gpu_class, vram_gb, state, cost_hr, load_pct</span>}</div>
<div><span class="key">min_warm</span>: <span class="val">1</span> <span class="comment"># per inference type — cold-start prevention</span></div>
<div><span class="key">anti_concentration</span>: <span class="val">"no provider &gt; 60% capacity"</span></div>
</div>
</div>
</div>
<div class="card expandable reveal" onclick="toggleCard(this)">
<div class="card-head">
<div>
<div class="card-sub mb-8">Component 04</div>
<div class="card-title">Inference Services</div>
<span class="vol vol-variable"><span class="vol-dot"></span>Variable</span>
</div>
<span class="expand-arrow"></span>
</div>
<div class="card-body">
<p class="mb-16">The runtime layer that executes inference. Three service types: LLM (Ollama/vLLM for text generation), Image Gen (Stable Diffusion Forge), Video (SVD/AnimateDiff). Each type has its own warm-pool strategy, resource profile, and SLA contract.</p>
<div class="mb-16">
<div class="tag tag-navy">LLM · Ollama/vLLM</div>
<div class="tag tag-navy">Image Gen · SD Forge</div>
<div class="tag tag-navy">Video · SVD</div>
</div>
<div class="mono-block">
<div><span class="key">llm_cold_start</span>: <span class="val">"~15s model load"</span> <span class="comment"># mitigated by warm pool</span></div>
<div><span class="key">image_cold_start</span>: <span class="val">"~30s forge init"</span></div>
<div><span class="key">video_cold_start</span>: <span class="val">"~60s SVD pipeline"</span></div>
<div><span class="key">sla_target</span>: <span class="val">"P95 &lt; 2s queue time"</span></div>
</div>
</div>
</div>
<div class="card expandable reveal" onclick="toggleCard(this)">
<div class="card-head">
<div>
<div class="card-sub mb-8">Component 05</div>
<div class="card-title">Pipeline Engine</div>
<span class="vol vol-variable"><span class="vol-dot"></span>Variable</span>
</div>
<span class="expand-arrow"></span>
</div>
<div class="card-body">
<p class="mb-16">Inherited from Pantheon. The conductor orchestrates multi-step inference pipelines: prompt conditioning → model routing → generation → post-processing → artifact storage. The 22-step pipeline is battle-tested and carried forward as a stable inner module within a variable envelope.</p>
<div class="mb-16">
<div class="tag tag-navy">Pantheon Conductor</div>
<div class="tag tag-navy">22-Step Pipeline</div>
<div class="tag tag-navy">DAG Execution</div>
<div class="tag tag-navy">Retry/Backoff</div>
</div>
<div class="mono-block">
<div><span class="key">execution_model</span>: <span class="val">"DAG — directed acyclic graph"</span></div>
<div><span class="key">step_count</span>: <span class="val">22</span> <span class="comment"># inherited from Pantheon</span></div>
<div><span class="key">failure_mode</span>: <span class="val">"step-level retry with exponential backoff"</span></div>
</div>
</div>
</div>
<div class="card expandable reveal" onclick="toggleCard(this)">
<div class="card-head">
<div>
<div class="card-sub mb-8">Component 06</div>
<div class="card-title">Storage Layer</div>
<span class="vol vol-stable"><span class="vol-dot"></span>Stable</span>
</div>
<span class="expand-arrow"></span>
</div>
<div class="card-body">
<p class="mb-16">Durable artifact and model storage. Three sub-concerns: blob storage (R2/S3 for generated images, video, documents), model registry (versioned model weights with capability metadata), and the artifact store (pipeline outputs indexed by job ID).</p>
<div class="mb-16">
<div class="tag tag-green">R2/S3 Blob</div>
<div class="tag tag-green">Model Registry</div>
<div class="tag tag-green">Artifact Store</div>
<div class="tag tag-green">Versioned</div>
</div>
<div class="mono-block">
<div><span class="key">durability</span>: <span class="val">"11 nines (S3/R2 native)"</span></div>
<div><span class="key">model_index</span>: <span class="val">"capability → model → location"</span></div>
<div><span class="key">change_frequency</span>: <span class="val">"schema stable; data continuous"</span></div>
</div>
</div>
</div>
<div class="card expandable reveal" onclick="toggleCard(this)">
<div class="card-head">
<div>
<div class="card-sub mb-8">Component 07</div>
<div class="card-title">Secrets Layer</div>
<span class="vol vol-variable"><span class="vol-dot"></span>Variable</span>
</div>
<span class="expand-arrow"></span>
</div>
<div class="card-body">
<p class="mb-16">Vault-backed secrets management with customer isolation. Each customer context gets a dedicated Vault namespace. Provider API keys, model access tokens, and customer credentials are injected at runtime — never baked into images or config files.</p>
<div class="mb-16">
<div class="tag tag-navy">HashiCorp Vault</div>
<div class="tag tag-navy">Customer Namespaces</div>
<div class="tag tag-navy">ESO Integration</div>
<div class="tag tag-navy">Rotation Policy</div>
</div>
<div class="mono-block">
<div><span class="key">isolation</span>: <span class="val">"per-customer Vault namespace"</span></div>
<div><span class="key">injection</span>: <span class="val">"runtime only — no static secrets"</span></div>
<div><span class="key">rotation</span>: <span class="val">"provider keys rotated &lt;90 days"</span></div>
</div>
</div>
</div>
<div class="card expandable reveal" onclick="toggleCard(this)">
<div class="card-head">
<div>
<div class="card-sub mb-8">Component 08</div>
<div class="card-title">Workload Orchestrator</div>
<span class="vol vol-dynamic"><span class="vol-dot"></span>Dynamic</span>
</div>
<span class="expand-arrow"></span>
</div>
<div class="card-body">
<p class="mb-16">Provisions, configures, monitors, and terminates compute environments on demand. Responds to router signals (capacity needed) and observer signals (idle nodes). Implements the cost-to-zero autoscaling strategy — nodes are terminated when idle beyond threshold.</p>
<div class="mb-16">
<div class="tag tag-amber">Provision</div>
<div class="tag tag-amber">Configure</div>
<div class="tag tag-amber">Monitor</div>
<div class="tag tag-amber">Terminate</div>
<div class="tag tag-amber">Scale-to-Zero</div>
</div>
<div class="mono-block">
<div><span class="key">idle_threshold</span>: <span class="val">"15min"</span> <span class="comment"># before termination signal</span></div>
<div><span class="key">pre_warm_signal</span>: <span class="val">"from Observer predicted load"</span></div>
<div><span class="key">provision_time</span>: <span class="val">"RunPod ~90s, Legion ~10s, Cloud ~3-8m"</span></div>
</div>
</div>
</div>
<div class="card expandable reveal" onclick="toggleCard(this)">
<div class="card-head">
<div>
<div class="card-sub mb-8">Component 09</div>
<div class="card-title">Observer</div>
<span class="vol vol-dynamic"><span class="vol-dot"></span>Dynamic</span>
</div>
<span class="expand-arrow"></span>
</div>
<div class="card-body">
<p class="mb-16">The telemetry backbone. Collects metrics from every node and service, aggregates cost in real-time (per-job, per-customer, per-provider), and runs anomaly detection. Surfaces structured events to Neuron for autonomous decision-making.</p>
<div class="mb-16">
<div class="tag tag-amber">OTLP Telemetry</div>
<div class="tag tag-amber">Cost Aggregation</div>
<div class="tag tag-amber">Anomaly Detection</div>
<div class="tag tag-amber">Neuron Events</div>
</div>
<div class="mono-block">
<div><span class="key">backend</span>: <span class="val">"Grafana/Loki/Tempo stack"</span></div>
<div><span class="key">cost_granularity</span>: <span class="val">"per-job precision"</span></div>
<div><span class="key">anomaly_action</span>: <span class="val">"emit event → Neuron decides"</span></div>
</div>
</div>
</div>
<div class="card expandable reveal" onclick="toggleCard(this)">
<div class="card-head">
<div>
<div class="card-sub mb-8">Component 10</div>
<div class="card-title">Neuron Interface</div>
<span class="vol vol-stable"><span class="vol-dot"></span>Stable</span>
</div>
<span class="expand-arrow"></span>
</div>
<div class="card-body">
<p class="mb-16">The contract surface through which the AI operator (Neuron) manages Soma. Exposes structured, machine-readable APIs for every management action. The interface spec is stable — Neuron's reasoning adapts; the API does not change arbitrarily. This is the boundary between AI cognition and system action.</p>
<div class="mb-16">
<div class="tag tag-green">Mgmt API</div>
<div class="tag tag-green">Event Stream</div>
<div class="tag tag-green">Cost Dashboard</div>
<div class="tag tag-green">Policy Controls</div>
</div>
<div class="mono-block">
<div><span class="key">design</span>: <span class="val">"machine-first, human-readable second"</span></div>
<div><span class="key">event_format</span>: <span class="val">"structured JSON with severity + context"</span></div>
<div><span class="key">auth</span>: <span class="val">"Neuron service identity (Vault-issued)"</span></div>
</div>
</div>
</div>
</div>
</div>
</section>
<!-- SECTION: ROUTING -->
<section class="section" id="sec-routing">
<div class="doc-page">
<span class="sec-num">04 // Routing Intelligence</span>
<div class="sec-title">The Decision Engine</div>
<p class="sec-desc">The Soma Router is a deterministic decision engine, not an ML model. Predictability and auditability matter more than marginal optimization gains. Every routing decision is logged with its full decision chain.</p>
<div class="grid-2 mb-32">
<div>
<div class="card mb-16 reveal">
<div class="card-title mb-16">Tier Classification</div>
<table>
<tr><th>Tier</th><th>Criteria</th><th>Example</th><th>Priority</th></tr>
<tr>
<td><span class="tag tag-green">LOW</span></td>
<td>Batch, async, non-time-sensitive</td>
<td>Overnight fine-tune eval, bulk captioning</td>
<td>Cost-first</td>
</tr>
<tr>
<td><span class="tag tag-navy">MEDIUM</span></td>
<td>Interactive, &lt;30s SLA</td>
<td>Chat completion, image generation</td>
<td>Balance cost/latency</td>
</tr>
<tr>
<td><span class="tag tag-amber">HIGH</span></td>
<td>Real-time, &lt;2s SLA, user-facing</td>
<td>Live assistant, streaming response</td>
<td>Latency-first</td>
</tr>
</table>
</div>
<div class="card reveal">
<div class="card-title mb-16">Cost Oracle Signals</div>
<p class="mb-16">The cost oracle is queried on every routing decision. It aggregates:</p>
<div class="mono-block">
<div><span class="comment"># Inputs to cost oracle</span></div>
<div><span class="key">spot_price</span><span class="val">RunPod/AWS real-time bid</span></div>
<div><span class="key">committed_idle</span><span class="val">Legion always-on cost</span></div>
<div><span class="key">marginal_cost</span><span class="val">per-token / per-image</span></div>
<div><span class="key">queue_depth</span><span class="val">wait cost vs. provision cost</span></div>
<div><span class="key">warm_bonus</span><span class="val">discount for already-warm nodes</span></div>
</div>
</div>
</div>
<div class="card reveal">
<div class="card-title mb-16">Routing Decision Tree</div>
<div class="decision-tree">
<div><span class="if">RECEIVE</span> request(model, tier, budget)</div>
<div class="indent"></div>
<div class="indent"><span class="if">CLASSIFY</span> tier → LOW | MEDIUM | HIGH</div>
<div class="indent2"></div>
<div class="indent2"><span class="if">IF</span> tier == HIGH:</div>
<div style="padding-left:72px;"><span class="then">SELECT</span> lowest-latency warm node</div>
<div style="padding-left:72px;"><span class="then">BYPASS</span> cost oracle (latency wins)</div>
<div class="indent2"><span class="if">ELIF</span> tier == MEDIUM:</div>
<div style="padding-left:72px;"><span class="then">QUERY</span> cost oracle</div>
<div style="padding-left:72px;"><span class="then">SELECT</span> warm node within budget</div>
<div style="padding-left:72px;"><span class="if">IF</span> no warm node: provision cheapest</div>
<div class="indent2"><span class="if">ELIF</span> tier == LOW:</div>
<div style="padding-left:72px;"><span class="then">QUERY</span> cost oracle</div>
<div style="padding-left:72px;"><span class="then">SELECT</span> cheapest (warm or cold)</div>
<div style="padding-left:72px;"><span class="then">ACCEPT</span> cold-start latency</div>
<div class="indent"></div>
<div class="indent"><span class="if">CHECK</span> selected node health</div>
<div class="indent2"><span class="if">IF</span> unhealthy: reraise to next candidate</div>
<div class="indent2"><span class="if">IF</span> no candidates: emit capacity alert → Neuron</div>
<div class="indent"></div>
<div class="indent"><span class="then">DISPATCH</span> + <span class="then">LOG</span> decision chain</div>
</div>
</div>
</div>
<div class="card mb-24 reveal">
<div class="card-title mb-16">Model Selection Logic</div>
<div class="grid-3">
<div>
<div class="card-sub mb-8">Capability Matching</div>
<p>Request declares required capabilities (context_length, multimodal, function_calling, language). Router queries Model Catalog for candidates. Capability match is a hard filter — no degraded fallback without explicit permission.</p>
</div>
<div>
<div class="card-sub mb-8">Version Policy</div>
<p>Model pinning is supported per-customer. Default policy: latest stable version. Canary deployments route 5% of traffic to new model version before promotion. Rollback is instantaneous (router policy change, no redeployment).</p>
</div>
<div>
<div class="card-sub mb-8">Fallback Chain</div>
<p>If the preferred model is unavailable: try capability-equivalent model on same provider → try same model on different provider → try next-tier model with customer notification → queue with ETA. Fallbacks are audited and surface to Observer.</p>
</div>
</div>
</div>
<div class="card reveal">
<div class="card-title mb-16">Anti-Patterns Explicitly Avoided</div>
<table>
<tr><th>Anti-Pattern</th><th>Why Avoided</th><th>Soma Approach</th></tr>
<tr>
<td>Random load balancing</td>
<td>Ignores cost, warm state, GPU class mismatch</td>
<td>Cost-oracle weighted selection</td>
</tr>
<tr>
<td>ML-based router</td>
<td>Non-auditable, training drift, cold-start irony</td>
<td>Deterministic rule tree, logged decisions</td>
</tr>
<tr>
<td>Single-provider lock</td>
<td>Outage = full outage; pricing leverage lost</td>
<td>Anti-concentration rule (60% cap per provider)</td>
</tr>
<tr>
<td>Always-warm everything</td>
<td>Cost explodes; GPU idle waste</td>
<td>Tier-based warm pool: only HIGH tier always warm</td>
</tr>
</table>
</div>
</div>
</section>
<!-- SECTION: WORKLOADS -->
<section class="section" id="sec-workloads">
<div class="doc-page">
<span class="sec-num">05 // Workload Environments</span>
<div class="sec-title">Four Environment Types</div>
<p class="sec-desc">Soma provisions four environment types. Each has a defined resource profile, warm-pool policy, and billing model. Environments are ephemeral by default — they exist to run a workload, then terminate.</p>
<div class="grid-2">
<div class="card reveal" style="border-left:3px solid var(--navy);">
<div class="card-sub mb-8">ENV-01 · INTERACTIVE</div>
<div class="card-title" style="color:var(--navy);">Studio</div>
<div style="margin-bottom:16px;">
<span class="vol vol-dynamic"><span class="vol-dot"></span>Always warm</span>
<span class="tag tag-navy" style="margin-left:8px;">HIGH tier</span>
</div>
<p class="mb-16">User-facing creative workspace. Chat, image generation, real-time feedback loops. Latency-critical — cold starts are unacceptable. Legion is the preferred provider (zero egress, instant start). RunPod H100 as hot failover.</p>
<div class="mono-block">
<div><span class="key">gpu</span>: <span class="val">RTX 4090 or A100</span></div>
<div><span class="key">warm_policy</span>: <span class="val">"always 1 warm per active user session"</span></div>
<div><span class="key">billing</span>: <span class="val">"per-session, pro-rated to minute"</span></div>
<div><span class="key">sla</span>: <span class="val">"P99 &lt; 1s TTFT (time to first token)"</span></div>
</div>
</div>
<div class="card reveal" style="border-left:3px solid var(--amber);">
<div class="card-sub mb-8">ENV-02 · LIGHTWEIGHT</div>
<div class="card-title" style="color:var(--amber);">Mini</div>
<div style="margin-bottom:16px;">
<span class="vol vol-variable"><span class="vol-dot"></span>On-demand</span>
<span class="tag tag-amber" style="margin-left:8px;">MEDIUM tier</span>
</div>
<p class="mb-16">Small tasks, quantized models, cost-optimized throughput. API integrations, automated pipelines, batch API consumers. Accepts up to 15s cold-start penalty. Prefers spot pricing.</p>
<div class="mono-block">
<div><span class="key">gpu</span>: <span class="val">T4, A10, 3090 class</span></div>
<div><span class="key">warm_policy</span>: <span class="val">"1 shared warm node per region"</span></div>
<div><span class="key">billing</span>: <span class="val">"per-request, token-metered"</span></div>
<div><span class="key">sla</span>: <span class="val">"P95 &lt; 30s total response"</span></div>
</div>
</div>
<div class="card reveal" style="border-left:3px solid var(--gold);">
<div class="card-sub mb-8">ENV-03 · EXPERIMENTAL</div>
<div class="card-title" style="color:var(--gold);">Crucible</div>
<div style="margin-bottom:16px;">
<span class="vol vol-dynamic"><span class="vol-dot"></span>Ephemeral</span>
<span class="tag tag-gold" style="margin-left:8px;">LOW tier</span>
</div>
<p class="mb-16">Research, fine-tuning, LoRA training, model evaluation. Long-running jobs, max GPU VRAM, cost-tolerant on runtime but optimized on launch. Uses reserved RunPod pods or Legion when idle. The Crucible runs Lorablation and evaluation harnesses.</p>
<div class="mono-block">
<div><span class="key">gpu</span>: <span class="val">H100, H200 (80GB+ VRAM req.)</span></div>
<div><span class="key">warm_policy</span>: <span class="val">"cold — provision on demand"</span></div>
<div><span class="key">billing</span>: <span class="val">"per-hour, reserved where beneficial"</span></div>
<div><span class="key">sla</span>: <span class="val">"best-effort, hours acceptable"</span></div>
</div>
</div>
<div class="card reveal" style="border-left:3px solid var(--green);">
<div class="card-sub mb-8">ENV-04 · ENTERPRISE</div>
<div class="card-title" style="color:var(--green);">Production</div>
<div style="margin-bottom:16px;">
<span class="vol vol-stable"><span class="vol-dot"></span>Dedicated</span>
<span class="tag tag-green" style="margin-left:8px;">SLA-bound</span>
</div>
<p class="mb-16">Customer-dedicated compute with contractual SLAs. Isolated namespaces (compute and secrets). Deployed as separate node pool partition — no resource sharing with other environments. Uptime guarantees, dedicated on-call path.</p>
<div class="mono-block">
<div><span class="key">gpu</span>: <span class="val">"customer-specified"</span></div>
<div><span class="key">warm_policy</span>: <span class="val">"dedicated — always warm"</span></div>
<div><span class="key">billing</span>: <span class="val">"monthly reserved + burst overage"</span></div>
<div><span class="key">sla</span>: <span class="val">"99.9% uptime, contractual"</span></div>
</div>
</div>
</div>
<h2>Environment Lifecycle</h2>
<div class="card mt-16 reveal">
<div style="overflow-x:auto;">
<div style="display:flex;align-items:center;gap:0;min-width:700px;flex-wrap:nowrap;">
<div class="flow-node active-node" style="flex:1;text-align:center;">Request Received</div>
<div class="flow-arrow"></div>
<div class="flow-node" style="flex:1;text-align:center;">Tier Classified</div>
<div class="flow-arrow"></div>
<div class="flow-node" style="flex:1;text-align:center;">Node Selected / Provisioned</div>
<div class="flow-arrow"></div>
<div class="flow-node active-node" style="flex:1;text-align:center;">Job Executing</div>
<div class="flow-arrow"></div>
<div class="flow-node" style="flex:1;text-align:center;">Artifact Stored</div>
<div class="flow-arrow"></div>
<div class="flow-node" style="flex:1;text-align:center;">Result Delivered</div>
<div class="flow-arrow"></div>
<div class="flow-node" style="flex:1;text-align:center;">Node Released / Terminated</div>
</div>
</div>
</div>
</div>
</section>
<!-- SECTION: IMPROVEMENT LOOPS -->
<section class="section" id="sec-loops">
<div class="doc-page">
<span class="sec-num">06 // Design Improvement Loops</span>
<div class="sec-title">Five Refinement Passes</div>
<p class="sec-desc">Five passes through the architecture before final form. Each loop targeted a specific quality dimension. Recorded here for architectural traceability.</p>
<div class="loop reveal">
<div class="loop-num">01</div>
<div class="loop-content">
<h4>Component Completeness</h4>
<p>Established the ten core components. Initial sketch had the router as a thin proxy and the control plane doing too much. Split the cost oracle into its own dynamic component (it changes continuously — spot prices, real-time availability — and must not be coupled to the more stable control plane contract). Added the Neuron Interface as a first-class component, not an afterthought. Recognized that API Contracts belong in the stable tier as a distinct concern from the Model Catalog.</p>
<div class="loop-delta">+ Cost Oracle separated from Control Plane · + Neuron Interface promoted to Component 10</div>
</div>
</div>
<div class="loop reveal">
<div class="loop-num">02</div>
<div class="loop-content">
<h4>VBD Volatility Boundaries</h4>
<p>Applied Volatility-Based Decomposition rigorously. The routing logic (how decisions are made) changes weekly with policy updates — Variable. The node state (which nodes are alive, their current cost) changes continuously — Dynamic. The storage schema and API contracts almost never change — Stable. Identified a violation: the original design coupled the Node Pool (variable — fleet composition) with node state (dynamic). Split these cleanly: the Pool is the fleet definition (variable), the state lives in the Control Plane's live registry (dynamic).</p>
<div class="loop-delta">+ Node Pool (variable) separated from live node state in Control Plane (dynamic)</div>
</div>
</div>
<div class="loop reveal">
<div class="loop-num">03</div>
<div class="loop-content">
<h4>Harmonic Design — Friction Analysis</h4>
<p>Walked the happy path: request arrives → tier classified → node selected → job runs → artifact stored → result returned. Found two friction points. (1) Cold-start latency is a seam between the Dynamic tier (live node state) and the Variable tier (router wants a warm node that doesn't exist). Resolution: warm-pool policy pushed into the Workload Orchestrator as a proactive pre-warm signal, driven by Observer's predicted load. (2) Model selection had an implicit dependency on Storage Layer for model weights — this creates a tight coupling during routing. Resolution: Model Catalog becomes the stable index, router only touches the catalog, never the storage layer directly.</p>
<div class="loop-delta">+ Pre-warm signal from Observer → Orchestrator · + Model Catalog as stable indirection layer</div>
</div>
</div>
<div class="loop reveal">
<div class="loop-num">04</div>
<div class="loop-content">
<h4>Operational Realism — Failure Modes</h4>
<p>Stress-tested failure scenarios. Provider outage: router must detect via health check + reroute within SLA window. Cold-start spikes: accepted as a feature of LOW tier, SLA explicitly excludes start time. Model unavailable: fallback chain defined (same capability, different provider → next-tier model → queue). Cost oracle unavailable: router falls back to cached pricing with staleness flag — HIGH tier proceeds, LOW tier queues. Secrets rotation: zero-downtime rotation via ESO — new secret version injected without pod restart. Added explicit idle-terminate threshold (15min) to prevent runaway costs on abandoned sessions.</p>
<div class="loop-delta">+ Fallback chain defined · + Cost oracle degraded mode · + 15min idle-terminate policy</div>
</div>
</div>
<div class="loop reveal">
<div class="loop-num">05</div>
<div class="loop-content">
<h4>AI Operator Interface — Autonomous Management Model</h4>
<p>Reexamined what Neuron actually needs to run Soma autonomously. Three action categories emerged: Observe (cost events, health events, anomaly alerts — all structured JSON), Decide (routing policy updates, warm-pool size, provider allocation — via Neuron Interface API), and Act (provision/terminate nodes, update model catalog, rotate secrets — through Workload Orchestrator). The key insight: Neuron should not have direct kubectl/API access to provider infrastructure. All actions go through Soma's own APIs — this creates an auditable, reversible action log and prevents runaway automation. Added the constraint: every Neuron-initiated action emits an event back to Observer, closing the loop.</p>
<div class="loop-delta">+ Neuron actions bounded to Soma API · + Action→event loop closes Observer feedback · + Runaway automation prevention</div>
</div>
</div>
</div>
</section>
<!-- SECTION: OPERATOR -->
<section class="section" id="sec-operator">
<div class="doc-page">
<span class="sec-num">07 // Neuron as Operator</span>
<div class="sec-title">Autonomous Management</div>
<div class="operator-grid reveal">
<div class="operator-profile">
<div class="operator-avatar">&#x2B21;</div>
<div class="operator-name">NEURON</div>
<div class="operator-role" style="margin-bottom:16px;">AI Operator · Soma v1</div>
<div style="text-align:left;margin-top:24px;">
<div class="mono-block" style="font-size:.72rem;">
<div><span class="key">identity</span>: <span class="val">"Vault service token"</span></div>
<div><span class="key">auth_scope</span>: <span class="val">"soma-operator"</span></div>
<div><span class="key">action_log</span>: <span class="val">"append-only, audited"</span></div>
<div><span class="key">human_override</span>: <span class="val">"always possible"</span></div>
<div><span class="key">runaway_guard</span>: <span class="val">"rate limits + event loop"</span></div>
</div>
</div>
<div class="divider"></div>
<div class="status status-green" style="justify-content:center;">
<span class="status-dot"></span>
<span>OPERATOR ACTIVE</span>
</div>
</div>
<div>
<div class="card mb-16">
<div class="card-title mb-16">The Autonomous Management Model</div>
<p class="mb-16">Neuron operates Soma through a structured observe-decide-act loop. It is not given raw infrastructure access — all actions are mediated through Soma's own APIs. This is deliberate: it creates an auditable action log, enforces business rules, and allows human override at any point without needing to understand the underlying infrastructure.</p>
<div class="grid-3">
<div style="text-align:center;padding:16px;background:var(--navy-d);border-radius:8px;border:1px solid var(--navy-b);">
<div style="font-family:var(--head);font-size:1.1rem;font-weight:700;color:var(--navy);margin-bottom:8px;">OBSERVE</div>
<div class="text-small text-muted">Read cost events, health alerts, anomalies from Observer structured stream</div>
</div>
<div style="text-align:center;padding:16px;background:var(--amber-d);border-radius:8px;border:1px solid var(--amber-b);">
<div style="font-family:var(--head);font-size:1.1rem;font-weight:700;color:var(--amber);margin-bottom:8px;">DECIDE</div>
<div class="text-small text-muted">Apply policy, backlog context, and historical patterns to form an action plan</div>
</div>
<div style="text-align:center;padding:16px;background:var(--green-d);border-radius:8px;border:1px solid var(--green-b);">
<div style="font-family:var(--head);font-size:1.1rem;font-weight:700;color:var(--green);margin-bottom:8px;">ACT</div>
<div class="text-small text-muted">Invoke Soma APIs: provision, terminate, update policy, rotate secrets</div>
</div>
</div>
</div>
<div class="card mb-16">
<div class="card-title mb-16">Neuron's Permitted Actions</div>
<table>
<tr><th>Action</th><th>Via</th><th>Guard Rails</th></tr>
<tr><td>Scale node pool</td><td>Workload Orchestrator API</td><td>Provider concentration limit; cost budget</td></tr>
<tr><td>Update routing policy</td><td>Router Policy API</td><td>Dry-run first; audit trail</td></tr>
<tr><td>Promote model version</td><td>Model Catalog API</td><td>Canary 5% first; health check gate</td></tr>
<tr><td>Adjust warm pool size</td><td>Orchestrator Policy API</td><td>Minimum warm floor enforced</td></tr>
<tr><td>Terminate idle nodes</td><td>Workload Orchestrator API</td><td>SLA check before termination</td></tr>
<tr><td>Alert Will</td><td>Email/Axon event</td><td>Threshold-gated; no alert spam</td></tr>
</table>
</div>
<div class="card">
<div class="card-title mb-16">What Neuron Cannot Do (By Design)</div>
<div style="display:flex;flex-wrap:wrap;gap:8px;">
<div class="tag" style="color:var(--amber);border-color:var(--amber-b);background:var(--amber-d);">Direct kubectl commands</div>
<div class="tag" style="color:var(--amber);border-color:var(--amber-b);background:var(--amber-d);">Raw provider API calls</div>
<div class="tag" style="color:var(--amber);border-color:var(--amber-b);background:var(--amber-d);">Modify Vault root tokens</div>
<div class="tag" style="color:var(--amber);border-color:var(--amber-b);background:var(--amber-d);">Delete customer data</div>
<div class="tag" style="color:var(--amber);border-color:var(--amber-b);background:var(--amber-d);">Override SLA contracts</div>
<div class="tag" style="color:var(--amber);border-color:var(--amber-b);background:var(--amber-d);">Spend beyond cost ceiling</div>
<div class="tag" style="color:var(--amber);border-color:var(--amber-b);background:var(--amber-d);">Bypass action audit log</div>
</div>
<p class="mt-16 text-small text-muted">Constraints are architectural, not policy. Neuron's service token has no permissions for these actions, regardless of reasoning.</p>
</div>
</div>
</div>
</div>
</section>
<!-- SECTION: STRATEGY -->
<section class="section" id="sec-strategy">
<div class="doc-page">
<span class="sec-num">08 // The 5-Year Play</span>
<div class="sec-title">Strategic Arc</div>
<p class="sec-desc">Soma's strategic arc is provider consolidation through intelligence. The more workloads flow through Soma, the more cost and routing data accumulates. That data makes the router smarter, the cost oracle more accurate, and the pre-warm predictions more precise. It's a compounding moat built on operational intelligence — not on proprietary models or locked hardware.</p>
<div class="grid-2 mb-32">
<div class="card reveal">
<div class="card-title mb-8">Why Now</div>
<p>The AI compute market is fractured. Teams are individually solving the multi-provider routing problem — badly, in isolation, with no pooled learning. Soma captures that problem at the platform layer. The timing window is 18-24 months before hyperscalers close the gap with purpose-built AI cloud products.</p>
</div>
<div class="card reveal">
<div class="card-title mb-8">The Moat</div>
<p>Routing intelligence compounds. Every job through Soma adds to the cost oracle's pricing model and the pre-warm predictor's demand signal. A competitor starting today has zero historical routing data. Soma at 12 months has a dataset no one can replicate without running the same workloads.</p>
</div>
</div>
<h2>Five-Year Roadmap</h2>
<div class="timeline">
<div class="timeline-item reveal">
<div class="timeline-year">2025 — YEAR 1</div>
<div class="timeline-content">
<h4>Internal Proof of Concept</h4>
<p>Soma manages Neuron Technologies' own compute. Legion + RunPod as initial node pool. Control plane, router, and observer built and validated. Cost savings measured. Neuron operator loop closed. The platform is its own first customer — every failure is free signal.</p>
<div style="margin-top:12px;">
<span class="tag tag-green">Legion + RunPod</span>
<span class="tag tag-green">Internal only</span>
<span class="tag tag-green">Neuron as operator</span>
</div>
</div>
</div>
<div class="timeline-item reveal">
<div class="timeline-year">2026 — YEAR 2</div>
<div class="timeline-content">
<h4>First External Customers</h4>
<p>Trusted beta partners onboarded. Production environment (dedicated node pools) offered. Customer-isolated secrets and billing. The pipeline engine productized — customers bring workloads, Soma routes them. Revenue validates the routing model's cost-optimization claims.</p>
<div style="margin-top:12px;">
<span class="tag tag-navy">Beta partners</span>
<span class="tag tag-navy">Production env</span>
<span class="tag tag-navy">Revenue signal</span>
</div>
</div>
</div>
<div class="timeline-item reveal">
<div class="timeline-year">2027 — YEAR 3</div>
<div class="timeline-content">
<h4>Platform Expansion</h4>
<p>AWS and Azure added to node pool. Multi-region routing. Spot-market optimization producing measurable savings vs. direct cloud spend. Cost oracle's historical dataset begins generating genuine alpha — routing decisions better than any human-tuned policy.</p>
<div style="margin-top:12px;">
<span class="tag tag-amber">Multi-cloud</span>
<span class="tag tag-amber">Multi-region</span>
<span class="tag tag-amber">Oracle alpha</span>
</div>
</div>
</div>
<div class="timeline-item reveal">
<div class="timeline-year">2028 — YEAR 4</div>
<div class="timeline-content">
<h4>Marketplace Integration</h4>
<p>Soma becomes the runtime for the Neuron marketplace. Customers publish AI products; Soma executes them. The workload orchestrator handles multi-tenant isolation at scale. The routing intelligence is now a competitive differentiator that marketplace customers cite when choosing Neuron over raw cloud.</p>
<div style="margin-top:12px;">
<span class="tag">Marketplace runtime</span>
<span class="tag">Multi-tenant scale</span>
<span class="tag">Competitive moat</span>
</div>
</div>
</div>
<div class="timeline-item reveal">
<div class="timeline-year">2029 — YEAR 5</div>
<div class="timeline-content">
<h4>Infrastructure as a Platform</h4>
<p>Soma offered as a standalone product — the "AI-native cloud router" for enterprise AI teams. The cost oracle data asset is the product. Competing directly with hyperscaler AI products — not on compute price (they win there), but on cross-cloud intelligence. The moat is the 4 years of routing data and the operator model.</p>
<div style="margin-top:12px;">
<span class="tag tag-navy">Standalone product</span>
<span class="tag tag-navy">Enterprise AI</span>
<span class="tag tag-navy">Data asset moat</span>
</div>
</div>
</div>
</div>
<h2>Competitive Positioning</h2>
<div class="card mt-16 reveal">
<table>
<tr><th>Competitor</th><th>Approach</th><th>Soma Advantage</th></tr>
<tr>
<td>AWS Bedrock / Azure AI</td>
<td>Single-cloud, lock-in model</td>
<td>Multi-cloud, best-of-breed per workload</td>
</tr>
<tr>
<td>Replicate / Modal</td>
<td>Serverless inference, no routing intelligence</td>
<td>Tier-aware routing + cost oracle + warm pools</td>
</tr>
<tr>
<td>Vast.ai / RunPod</td>
<td>Compute marketplace, no orchestration</td>
<td>Orchestration + pipeline + operator loop</td>
</tr>
<tr>
<td>Custom infra teams</td>
<td>Hand-built per company, no pooled learning</td>
<td>Platform-level intelligence; compounding data moat</td>
</tr>
</table>
</div>
<div class="callout dark mt-24 reveal">
<div class="label">The Irreducible Bet</div>
<p>Soma is a bet that compute routing intelligence is a durable differentiator — not a feature that hyperscalers will trivially replicate. The bet holds if: (1) AI workload heterogeneity persists (multi-model, multi-modality, variable SLA), (2) no single provider achieves dominant price/performance across all workload types, and (3) the operational data asset compounds faster than competitors can replicate it. <strong>All three conditions appear structurally durable for the next 5 years.</strong></p>
</div>
</div>
</section>
<footer style="border-top:1px solid var(--border2);padding:32px;text-align:center;color:var(--t3);font-family:var(--mono);font-size:.56rem;letter-spacing:.14em;text-transform:uppercase;position:relative;z-index:1;">
<span class="nav-badge" style="margin-right:16px;">EYES ONLY</span>
NEURON TECHNOLOGIES · SOMA ARCHITECTURE · INTERNAL PLANNING DOCUMENT · 2025-04
<span style="margin-left:16px;opacity:.4;">NOT FOR DISTRIBUTION</span>
</footer>
<script>
function showSection(id) {
document.querySelectorAll('.section').forEach(s => s.classList.remove('active'));
document.querySelectorAll('.nav-link').forEach(t => t.classList.remove('active'));
const section = document.getElementById('sec-' + id);
if (section) section.classList.add('active');
const tabs = document.querySelectorAll('.nav-link');
const tabMap = {
'overview': 0, 'diagram': 1, 'components': 2,
'routing': 3, 'workloads': 4, 'loops': 5,
'operator': 6, 'strategy': 7
};
if (tabMap[id] !== undefined) tabs[tabMap[id]].classList.add('active');
}
function toggleCard(card) {
card.classList.toggle('open');
}
// Keyboard navigation
document.addEventListener('keydown', function(e) {
const sections = ['overview','diagram','components','routing','workloads','loops','operator','strategy'];
const active = document.querySelector('.section.active');
if (!active) return;
const currentId = active.id.replace('sec-','');
const idx = sections.indexOf(currentId);
if (e.key === 'ArrowRight' && idx < sections.length - 1) {
showSection(sections[idx + 1]);
} else if (e.key === 'ArrowLeft' && idx > 0) {
showSection(sections[idx - 1]);
}
});
// Reveal on scroll
const obs = new IntersectionObserver(entries => entries.forEach(e => {
if (e.isIntersecting) e.target.classList.add('visible');
}), {threshold: .1});
document.querySelectorAll('.reveal').forEach(el => obs.observe(el));
</script>
</body>
</html>