feat: scale fixes — max-instances, asset caching, shared rate limits, global cap #65

Merged
will.anderson merged 1 commits from fix/checkout-auth-reveal into dev 2026-05-11 03:12:32 +00:00
2 changed files with 101 additions and 29 deletions
+2 -1
View File
@@ -262,7 +262,7 @@ jobs:
--update-env-vars "NEURON_LLM_0_FORMAT=anthropic,NEURON_LLM_0_MODEL=claude-sonnet-4-5,NEURON_LLM_0_URL=https://api.anthropic.com/v1/messages" \
--update-secrets "NEURON_LLM_0_KEY=anthropic-api-key:latest,ANTHROPIC_API_KEY=anthropic-api-key:latest" \
--min-instances 1 \
--max-instances 10 \
--max-instances 50 \
--concurrency 20 \
--port 8080 \
--allow-unauthenticated \
@@ -346,6 +346,7 @@ jobs:
--region us-central1 \
--project neuron-785695 \
--service-account neuron-marketing-sa@neuron-785695.iam.gserviceaccount.com \
--max-instances 200 \
--update-env-vars "NODE_ENV=production,STRIPE_PUBLISHABLE_KEY=pk_test_51TPoHnJg9Fv1D3AUp1FEMcy4MGlKRZqs4scW66kjQFQjWofmNc2rottzXzDaXekHvuw1OQpyp2WCIsc7O5fXIG0G00HQQrkdGX,GCS_SHARE_BUCKET=neuron-shares-prod,SUPABASE_ANON_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6Im9jb2pzZ2hhb25sdHVuaWRrenB3Iiwicm9sZSI6ImFub24iLCJpYXQiOjE3Nzc2NDIxNjgsImV4cCI6MjA5MzIxODE2OH0.e0FVFw1aahnrBVvnkR5R8a-RxCx095U8o_gsk7Quq3E,NEURON_LLM_0_FORMAT=anthropic,NEURON_LLM_0_MODEL=claude-sonnet-4-5,NEURON_LLM_0_URL=https://api.anthropic.com/v1/messages,SOUL_URL=${{ steps.soul-url.outputs.soul_url }}" \
--update-secrets "SUPABASE_SERVICE_KEY=supabase-service-key:latest,NEURON_LLM_0_KEY=anthropic-api-key:latest,ANTHROPIC_API_KEY=anthropic-api-key:latest,STRIPE_SECRET_KEY=stripe-secret-key-stage:latest,STRIPE_WEBHOOK_SECRET=stripe-webhook-secret-stage:latest,STRIPE_PRICE_PROFESSIONAL=stripe-price-professional-stage:latest,STRIPE_PRICE_FOUNDING=stripe-price-founding-stage:latest,STRIPE_PRICE_FAMILY_CHILD=stripe-price-family-child:latest,RESEND_API_KEY=resend-api-key:latest,DOCUSEAL_WEBHOOK_TOKEN=docuseal-webhook-token:latest" \
--allow-unauthenticated \
+99 -28
View File
@@ -894,6 +894,8 @@ fn handle_request_inner(method: String, path: String, body: String) -> String {
}
// Static assets: /assets/*
// Returns Cache-Control: public, max-age=31536000, immutable so Cloudflare
// caches these at the edge and never forwards subsequent requests to Cloud Run.
if str_starts_with(path, "/assets/") {
let rel: String = str_slice(path, 8, str_len(path))
let abs: String = src_dir + "/assets/" + rel
@@ -901,7 +903,7 @@ fn handle_request_inner(method: String, path: String, body: String) -> String {
if str_eq(content, "") {
return "{\"__status__\":404,\"error\":\"not found\"}"
}
return content
return http_response(200, static_asset_headers_json(), content)
}
// Compiled client-side JS: /js/*
@@ -922,6 +924,7 @@ fn handle_request_inner(method: String, path: String, body: String) -> String {
}
// Brand assets: /brand/*
// Same long-lived cache policy as /assets/* served from edge, not Cloud Run.
if str_starts_with(path, "/brand/") {
let rel: String = str_slice(path, 7, str_len(path))
let abs: String = src_dir + "/brand/" + rel
@@ -929,7 +932,7 @@ fn handle_request_inner(method: String, path: String, body: String) -> String {
if str_eq(content, "") {
return "{\"__status__\":404,\"error\":\"not found\"}"
}
return content
return http_response(200, static_asset_headers_json(), content)
}
// Stripe checkout
@@ -1161,36 +1164,92 @@ fn handle_request_inner(method: String, path: String, body: String) -> String {
if str_len(msg) > 8000 {
return "{\"error\":\"Message too long. Please keep your message under 8000 characters.\"}"
}
// Rate limit: 10 chats per uid per day (UTC day, keyed by uid).
// State key: "__rl_<uid>" "<count>|<day_number>"
// day_number = unix_timestamp / 86400 (integer UTC day)
// Global circuit breaker
// Caps total demo requests per Cloud Run instance per UTC day to 2000.
// This bounds per-instance API spend regardless of uid diversity.
// Stored in process state (in-memory) intentionally per-instance
// so no cross-instance coordination is needed for this coarse cap.
let now_ts_cb: Int = unix_timestamp()
let today_day_cb: Int = now_ts_cb / 86400
let global_day_s: String = state_get("__global_demo_day__")
let global_cnt_s: String = state_get("__global_demo_count__")
let global_day: Int = if str_eq(global_day_s, "") { 0 } else { str_to_int(global_day_s) }
let global_cnt: Int = if str_eq(global_cnt_s, "") { 0 } else { str_to_int(global_cnt_s) }
// Reset on new UTC day
if global_day != today_day_cb {
state_set("__global_demo_day__", int_to_str(today_day_cb))
state_set("__global_demo_count__", "0")
let global_cnt = 0
}
if global_cnt >= 2000 {
return "{\"error\":\"Demo is temporarily busy. Try again in a few minutes.\",\"busy\":true}"
}
state_set("__global_demo_count__", int_to_str(global_cnt + 1))
// Per-uid rate limit (Supabase shared across all instances)
// Uses demo_rate_limits table: uid (PK), count, day_number, updated_at.
// Falls back to in-process state_get/state_set when the service key is
// absent (local dev without SUPABASE_SERVICE_KEY set).
// Returns rate_limited JSON with reset_at (next midnight UTC) so
// the frontend can show a real countdown.
let rate_uid: String = json_get(body, "uid")
let rate_uid: String = json_get(body, "uid")
let now_ts: Int = unix_timestamp()
let today_day: Int = now_ts / 86400
let next_reset: Int = (today_day + 1) * 86400
if !str_eq(rate_uid, "") {
let now_ts: Int = unix_timestamp()
let today_day: Int = now_ts / 86400
let next_reset: Int = (today_day + 1) * 86400
let rl_key: String = "__rl_" + rate_uid
let rl_val: String = state_get(rl_key)
let rl_count: Int = 0
let rl_day: Int = 0
if !str_eq(rl_val, "") {
// format: "count|day"
let parts: [String] = str_split(rl_val, "|")
if native_list_len(parts) >= 2 {
let rl_count = str_to_int(native_list_get(parts, 0))
let rl_day = str_to_int(native_list_get(parts, 1))
let rl_sb_url: String = state_get("__supabase_project_url__")
let rl_sb_key: String = state_get("__supabase_service_key__")
if str_eq(rl_sb_key, "") {
// Local dev fallback: in-process rate limiting
let rl_key: String = "__rl_" + rate_uid
let rl_val: String = state_get(rl_key)
let rl_count: Int = 0
let rl_day: Int = 0
if !str_eq(rl_val, "") {
let parts: [String] = str_split(rl_val, "|")
if native_list_len(parts) >= 2 {
let rl_count = str_to_int(native_list_get(parts, 0))
let rl_day = str_to_int(native_list_get(parts, 1))
}
}
if rl_day != today_day {
let rl_count = 0
}
if rl_count >= 10 {
return "{\"rate_limited\":true,\"reset_at\":" + int_to_str(next_reset) + "}"
}
state_set(rl_key, int_to_str(rl_count + 1) + "|" + int_to_str(today_day))
} else {
// Production: read current count from Supabase
let rl_resp: String = supabase_get(rl_sb_url, rl_sb_key,
"demo_rate_limits?uid=eq." + rate_uid + "&select=count,day_number&limit=1")
let rl_row: String = json_array_get(rl_resp, 0)
let rl_count: Int = 0
let rl_day: Int = 0
if !str_eq(rl_row, "") {
let rl_count_s: String = json_get(rl_row, "count")
let rl_day_s: String = json_get(rl_row, "day_number")
if !str_eq(rl_count_s, "") {
let rl_count = str_to_int(rl_count_s)
}
if !str_eq(rl_day_s, "") {
let rl_day = str_to_int(rl_day_s)
}
}
// Reset count on new UTC day
if rl_day != today_day {
let rl_count = 0
}
if rl_count >= 10 {
return "{\"rate_limited\":true,\"reset_at\":" + int_to_str(next_reset) + "}"
}
// Upsert new count supabase_insert uses Prefer: resolution=merge-duplicates
let new_count: Int = rl_count + 1
let rl_row_json: String = "{\"uid\":\"" + rate_uid
+ "\",\"count\":" + int_to_str(new_count)
+ ",\"day_number\":" + int_to_str(today_day) + "}"
let _rl_upsert: String = supabase_insert(rl_sb_url, rl_sb_key, "demo_rate_limits", rl_row_json)
}
// Reset count if it's a new day
if rl_day != today_day {
let rl_count = 0
}
if rl_count >= 10 {
return "{\"rate_limited\":true,\"reset_at\":" + int_to_str(next_reset) + "}"
}
state_set(rl_key, int_to_str(rl_count + 1) + "|" + int_to_str(today_day))
}
// Turnstile: server-side verification is mandatory on every first
// message (tokens are single-use; per-message verification would
@@ -1943,9 +2002,11 @@ fn sec_headers_json() -> String {
// Headers for compiled JS assets. Explicitly sets Content-Type so the browser
// treats them as JavaScript regardless of what http_detect_content_type()
// infers from the content (minified/obfuscated JS can trip the JSON heuristic).
// Cache-Control bumped to 1 year + immutable: JS bundles are content-addressed
// (hash in filename) so safe for Cloudflare to cache indefinitely at the edge.
fn js_headers_json() -> String {
"{\"Content-Type\":\"application/javascript; charset=utf-8\","
+ "\"Cache-Control\":\"public, max-age=3600\","
+ "\"Cache-Control\":\"public, max-age=31536000, immutable\","
+ "\"Strict-Transport-Security\":\"max-age=63072000; includeSubDomains; preload\","
+ "\"X-Content-Type-Options\":\"nosniff\","
+ "\"X-Frame-Options\":\"SAMEORIGIN\","
@@ -1953,6 +2014,16 @@ fn js_headers_json() -> String {
+ "\"Permissions-Policy\":\"geolocation=(), microphone=(), camera=()\"}"
}
// Headers for static assets under /assets/ and /brand/.
// max-age=31536000 (1 year) + immutable tells Cloudflare to cache at the edge
// and never revalidate assets are versioned by filename or content so stale
// delivery is not a risk. This eliminates Cloud Run hits for every image/font/svg.
fn static_asset_headers_json() -> String {
"{\"Cache-Control\":\"public, max-age=31536000, immutable\","
+ "\"Strict-Transport-Security\":\"max-age=63072000; includeSubDomains; preload\","
+ "\"X-Content-Type-Options\":\"nosniff\"}"
}
fn handle_request(method: String, path: String, body: String) -> String {
let inner_resp: String = handle_request_inner(method, path, body)
// Detect envelope already set by inner handler (starts with