fix: three foundation/el root-cause bugs (no more bandaids)

1. Parser+codegen: bare reassignment `x = expr` inside an if-body
   was compiling to three orphan expressions with no store. Now
   emits a real assignment.

2. Runtime json_get: dot-path segments that are all digits now
   correctly traverse array indices. `json_get(s, "0.field")` works.

3. Runtime HTTP writer: response bodies starting with
   `{"__status__":<int>,...}` now set the HTTP status header to
   that value and strip the marker from the served body. Existing
   404/401/503 paths in product code now produce real status codes
   instead of HTTP 200 with the status hidden in the body.

Self-host fixed point holds: gen2 == gen3 byte-identical.
Snapshot tagged at dist/platform/elc.20260502-1231-self-host.

Backlog: bl-c121edda
This commit is contained in:
Will Anderson
2026-05-02 12:32:23 -05:00
parent 1274bcde35
commit 742bd0b4f9
8 changed files with 332 additions and 30 deletions
BIN
View File
Binary file not shown.
BIN
View File
Binary file not shown.
+210 -27
View File
@@ -1264,6 +1264,70 @@ static int http_parse_envelope(const char* s, int* out_status,
return 1;
}
/* Lightweight `__status__` envelope: if the body's first key is `__status__`
* and its value is a numeric literal, lift the status to the HTTP layer and
* strip the marker from the body before sending. This is the common case for
* El handlers that want to return 4xx/5xx without going through
* http_response() they just prepend `{"__status__":<int>,...}` to the JSON
* they were already returning.
*
* We deliberately recognise ONLY the first-key form so the contract is cheap
* to detect and unambiguous: `{"__status__":401,"error":"unauthorized"}` is
* an envelope, but `{"error":"...","__status__":401}` is not. Product code
* controls placement.
*
* On success returns 1 with *out_status set and *out_body_alloc populated
* with a freshly malloc'd body (caller frees). On failure returns 0 and
* leaves outputs untouched. */
static int http_parse_status_envelope(const char* s, int* out_status,
char** out_body_alloc) {
if (!s) return 0;
const char* p = s;
while (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r') p++;
if (*p != '{') return 0;
p++;
while (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r') p++;
static const char marker[] = "\"__status__\"";
size_t mlen = sizeof(marker) - 1;
if (strncmp(p, marker, mlen) != 0) return 0;
p += mlen;
while (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r') p++;
if (*p != ':') return 0;
p++;
while (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r') p++;
if (*p < '0' || *p > '9') return 0; /* non-numeric -> not an envelope */
int status = 0;
while (*p >= '0' && *p <= '9') {
status = status * 10 + (*p - '0');
p++;
}
if (status < 100 || status > 599) return 0;
while (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r') p++;
/* Two trailing shapes accepted:
* ,"k":v,...} -> body becomes {"k":v,...}
* } -> body becomes {}
* Anything else (e.g. `:` re-appearing, garbage) drops the envelope so
* we don't strip what we shouldn't. */
if (*p == '}') {
*out_status = status;
*out_body_alloc = el_strdup("{}");
return 1;
}
if (*p != ',') return 0;
p++; /* skip the comma; the rest of the object follows */
while (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r') p++;
/* Build the trimmed body: '{' + remainder. */
size_t rest_len = strlen(p);
char* out = (char*)malloc(rest_len + 2);
if (!out) return 0;
out[0] = '{';
memcpy(out + 1, p, rest_len);
out[rest_len + 1] = '\0';
*out_status = status;
*out_body_alloc = out;
return 1;
}
/* Send a fully-built HTTP response. If `body` starts with the envelope tag,
* unpack status/headers/body. Otherwise emit the historical 200-OK with
* auto-detected Content-Type. */
@@ -1283,6 +1347,19 @@ static void http_send_response(int fd, const char* body) {
&env_headers_map, &env_body,
&env_parsed_root);
/* If the rich http_response() envelope didn't claim this body, try the
* lightweight `__status__` form. This second envelope is malloc-backed so
* we route it through env_body and let the existing cleanup path free it
* same lifetime contract, no special case at the bottom of the
* function. */
if (!is_envelope) {
char* trimmed = NULL;
if (http_parse_status_envelope(body, &status, &trimmed)) {
env_body = trimmed;
is_envelope = 1;
}
}
const char* eff_body = is_envelope ? env_body : body;
/* Use the real byte count from fs_read if available (handles binary files
* with embedded null bytes PNG, WOFF2, etc.). Fall back to strlen for
@@ -1893,30 +1970,81 @@ el_val_t url_decode(el_val_t sv) {
/* ── JSON ────────────────────────────────────────────────────────────────── */
el_val_t json_get(el_val_t jsonv, el_val_t keyv) {
const char* json = EL_CSTR(jsonv);
const char* key = EL_CSTR(keyv);
if (!json || !key) return el_wrap_str(el_strdup(""));
size_t klen = strlen(key);
/* Use a stack buffer for the pattern to avoid arena double-free.
* Keys in El maps are typically short; 512 bytes is a safe upper bound. */
char stack_pat[512];
char* pattern;
if (klen + 5 <= sizeof(stack_pat)) {
pattern = stack_pat;
} else {
pattern = malloc(klen + 5);
if (!pattern) return el_wrap_str(el_strdup(""));
/* True iff the segment is non-empty and every byte is an ASCII digit. We treat
* such segments as numeric array indices when walking a dot-path; mixed names
* like "0a" remain object-key lookups, so a key named "0" still wins over an
* index when the surrounding container is an object. */
static int json_path_seg_is_index(const char* seg, size_t n) {
if (n == 0) return 0;
for (size_t i = 0; i < n; i++) {
if (seg[i] < '0' || seg[i] > '9') return 0;
}
snprintf(pattern, klen + 5, "\"%s\":", key);
const char* p = strstr(json, pattern);
if (pattern != stack_pat) free(pattern);
if (!p) return el_wrap_str(el_strdup(""));
p += strlen(key) + 3; /* skip "key": */
while (*p == ' ' || *p == '\t' || *p == '\n') p++;
return 1;
}
/* Skip JSON whitespace. */
static const char* json_skip_ws(const char* p) {
while (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r') p++;
return p;
}
/* Descend one segment into the JSON cursor `p`.
* - If `p` points at an array `[...]` and the segment is all digits,
* advance to that element (zero-based).
* - Otherwise treat the segment as an object key and use json_find_key
* scoped to a one-level slice of the current container.
* Returns NULL if the descent fails (segment not found, container mismatch).
*
* `seg` is a pointer into the original path string and `seg_len` is its
* byte length this avoids an extra alloc per segment. */
static const char* json_path_descend(const char* p, const char* seg, size_t seg_len) {
if (!p || !seg) return NULL;
p = json_skip_ws(p);
if (*p == '[' && json_path_seg_is_index(seg, seg_len)) {
long idx = 0;
for (size_t i = 0; i < seg_len; i++) idx = idx * 10 + (seg[i] - '0');
p++; /* step past '[' */
p = json_skip_ws(p);
long cur = 0;
while (*p && *p != ']') {
if (cur == idx) return p;
const char* end = json_skip_value(p);
if (!end || end == p) return NULL;
p = json_skip_ws(end);
if (*p == ',') { p++; p = json_skip_ws(p); cur++; continue; }
/* No comma after this element — only acceptable at the closing ']',
* which means we ran out of elements. */
break;
}
return NULL;
}
/* Object lookup. json_find_key walks at depth 1 of whatever container it
* receives, so we slice from `p` onwards. Caller already positioned us at
* the opening '{' (or at whitespace before it). */
if (*p != '{') return NULL;
/* Build a NUL-terminated copy of the key segment for the lookup. We only
* pay this cost when the segment isn't a numeric index. */
char stack_key[256];
char* k = stack_key;
if (seg_len + 1 > sizeof(stack_key)) {
k = malloc(seg_len + 1);
if (!k) return NULL;
}
memcpy(k, seg, seg_len);
k[seg_len] = '\0';
const char* found = json_find_key(p, k);
if (k != stack_key) free(k);
return found;
}
/* Read the JSON value at `p` into a freshly-allocated, arena-owned el_val_t.
* - String -> unescaped, wrapped el_val_t string
* - Anything else -> raw JSON slice as a string (matches the historical
* json_get behaviour: numbers/bools/null come back stringified). */
static el_val_t json_read_value(const char* p) {
p = json_skip_ws(p);
if (*p == '"') {
p++;
/* Unescape the JSON string value into a clean buffer. */
size_t cap = strlen(p) + 1;
char* out = el_strbuf(cap);
char* w = out;
@@ -1940,15 +2068,70 @@ el_val_t json_get(el_val_t jsonv, el_val_t keyv) {
*w = '\0';
return el_wrap_str(out);
}
const char* start = p;
while (*p && *p != ',' && *p != '}' && *p != ']' && *p != '\n') p++;
size_t len = (size_t)(p - start);
char* out = el_strbuf(len);
memcpy(out, start, len);
out[len] = '\0';
/* Object/array/number/bool/null — return the raw slice up to the value's
* end. json_skip_value tracks brace/bracket/string state so nested objects
* round-trip cleanly. */
const char* end = json_skip_value(p);
if (!end) end = p;
size_t n = (size_t)(end - p);
/* Strip trailing whitespace from scalar values so callers don't see
* `123 ` when they parsed a pretty-printed number. */
while (n > 0 && (p[n-1] == ' ' || p[n-1] == '\t' || p[n-1] == '\n' || p[n-1] == '\r')) {
n--;
}
char* out = el_strbuf(n);
memcpy(out, p, n);
out[n] = '\0';
return el_wrap_str(out);
}
el_val_t json_get(el_val_t jsonv, el_val_t keyv) {
const char* json = EL_CSTR(jsonv);
const char* key = EL_CSTR(keyv);
if (!json || !key) return el_wrap_str(el_strdup(""));
/* Fast path: key contains no '.' — keep the historical single-segment
* substring search so existing callers retain their O(strlen) cost
* profile. The dot-path walker is only paid for when needed. */
if (!strchr(key, '.')) {
size_t klen = strlen(key);
char stack_pat[512];
char* pattern;
if (klen + 5 <= sizeof(stack_pat)) {
pattern = stack_pat;
} else {
pattern = malloc(klen + 5);
if (!pattern) return el_wrap_str(el_strdup(""));
}
snprintf(pattern, klen + 5, "\"%s\":", key);
const char* p = strstr(json, pattern);
if (pattern != stack_pat) free(pattern);
if (!p) return el_wrap_str(el_strdup(""));
p += strlen(key) + 3; /* skip "key": */
return json_read_value(p);
}
/* Dot-path traversal. Walk segments left to right; at each step, descend
* into the current container by either array index (all-digit segment on
* an array cursor) or object key. */
const char* cursor = json_skip_ws(json);
const char* seg_start = key;
const char* k = key;
while (1) {
if (*k == '.' || *k == '\0') {
size_t seg_len = (size_t)(k - seg_start);
cursor = json_path_descend(cursor, seg_start, seg_len);
if (!cursor) return el_wrap_str(el_strdup(""));
if (*k == '\0') break;
k++;
seg_start = k;
continue;
}
k++;
}
return json_read_value(cursor);
}
/* ── Float bit-cast helpers ──────────────────────────────────────────────── */
/* `el_to_float` and `el_from_float` are exposed in el_runtime.h as static
* inlines so generated programs (which #include the header) can call them
+28 -3
View File
@@ -595,9 +595,20 @@ fn cg_if_expr_arm(stmts: [Map<String, Any>], result_var: String) -> String {
let out = out + "(void)(" + val_c + "); "
}
} else {
// Non-trivial stmt kinds (While/For) shouldn't appear in
// expression-position arm bodies; emit nothing rather
// than malformed C.
if str_eq(sk, "Assign") {
// Real reassignment in an expression-position arm
// emit the store; the arm's "value" stays whatever
// result_var was last set to, which is the El
// semantics (assignment is a statement, not a value).
let aname: String = s["name"]
let aval = s["value"]
let aval_c: String = cg_expr(aval)
let out = out + aname + " = " + aval_c + "; "
} else {
// Non-trivial stmt kinds (While/For) shouldn't appear in
// expression-position arm bodies; emit nothing rather
// than malformed C.
}
}
}
}
@@ -686,6 +697,20 @@ fn cg_stmt(stmt: Map<String, Any>, indent: String, declared: [String]) -> [Strin
return declared
}
// Bare reassignment: `name = expr`. Always emits a plain C assignment
// (no `el_val_t` prefix) by construction the parser only produces
// Assign for an existing identifier. If the name happens NOT to be in
// `declared` for the current C scope (it was let-bound by an enclosing
// block) the emit still resolves at C level because the variable lives
// in the surrounding scope.
if kind == "Assign" {
let name: String = stmt["name"]
let val = stmt["value"]
let val_c: String = cg_expr(val)
emit_line(indent + name + " = " + val_c + ";")
return declared
}
if kind == "Expr" {
let val = stmt["value"]
let val_kind: String = val["expr"]
+18
View File
@@ -946,6 +946,24 @@ fn parse_stmt(tokens: [Map<String, Any>], pos: Int) -> Map<String, Any> {
}, p)
}
// Bare reassignment: `name = expr`. Handled BEFORE the expression
// fallback so we don't drop the assign on the floor and emit three
// orphan expressions (the original silent-miscompile bug). El's `let`
// already permits redeclaration, so this only applies when the parser
// sees an Ident followed directly by `=`. `==` is a separate kind
// (EqEq) so there's no ambiguity.
if k == "Ident" {
let k2 = tok_kind(tokens, pos + 1)
if k2 == "Eq" {
let name = tok_value(tokens, pos)
let p = pos + 2
let r = parse_expr(tokens, p)
let val = r["node"]
let p = r["pos"]
return make_result({ "stmt": "Assign", "name": name, "value": val }, p)
}
}
// bare expression or if/match statement
let r = parse_expr(tokens, pos)
let val = r["node"]
+33
View File
@@ -0,0 +1,33 @@
// http-status-envelope.el acceptance test for the __status__ HTTP envelope.
//
// Before fix: a handler returning {"__status__":401,"error":"unauthorized"}
// went out as an HTTP 200 with the JSON body verbatim, so Cloud Run logs were
// full of 200s for what should have been 4xx/5xx.
//
// After fix: when the response body's FIRST key is __status__, the runtime
// reads the integer value as the HTTP status code and strips the marker from
// the body before sending it to the client.
//
// Verify with curl:
// curl -i http://localhost:8081/auth -> HTTP/1.1 401 Unauthorized
// curl -i http://localhost:8081/health -> HTTP/1.1 200 OK
// curl -i http://localhost:8081/oops -> HTTP/1.1 503 Service Unavailable
fn handle(method: String, path: String, body: String) -> String {
if path == "/auth" {
return "{\"__status__\":401,\"error\":\"unauthorized\"}"
}
if path == "/oops" {
return "{\"__status__\":503,\"error\":\"degraded\"}"
}
if path == "/health" {
return "{\"ok\":true}"
}
return "{\"__status__\":404,\"error\":\"not found\"}"
}
fn main() -> Int {
http_set_handler("handle")
http_serve(8081, "handle")
return 0
}
+21
View File
@@ -0,0 +1,21 @@
// json-array-traversal.el acceptance test for json_get dot-path with array
// indices.
//
// Before fix: json_get("...", "0.field") would substring-search for a literal
// key named `"0.field"` and find nothing, returning "".
//
// After fix: dot-path segments that are all digits are treated as array
// indices and the walker descends into the array.
fn test_array_traversal() -> String {
let s: String = "[{\"name\":\"alice\"},{\"name\":\"bob\"}]"
let a: String = json_get(s, "0.name")
let b: String = json_get(s, "1.name")
return a + "," + b
}
fn main() -> Int {
let r: String = test_array_traversal()
print(r)
return 0
}
+22
View File
@@ -0,0 +1,22 @@
// reassign-in-if.el acceptance test for bare reassignment inside if-body.
//
// Before fix: parser dropped `x = "override"` on the floor and codegen emitted
// three orphan expressions (`x; EL_NULL; EL_STR("override");`). Effective store
// was lost, so the function returned "default".
//
// After fix: parse_stmt recognises `Ident "=" Expr` as an Assign statement and
// codegen emits a real C assignment, so the function returns "override".
fn test_reassign() -> String {
let x: String = "default"
if true {
x = "override"
}
return x
}
fn main() -> Int {
let r: String = test_reassign()
print(r)
return 0
}