runtime: rename str_format param 'template' to 'fmt'

template is a reserved keyword in C++; though not in C, it blocks this header from ever being included from C++ code. Match printf- family convention with fmt instead. The deeper question of whether string-template substitution is the right abstraction for our substrate is filed separately as backlog.
2026-05-02 12:45:48 -05:00
parent 742bd0b4f9
commit 6d897289a3
2 changed files with 810 additions and 0 deletions
@@ -1968,6 +1968,526 @@ el_val_t url_decode(el_val_t sv) {
    return el_wrap_str(out);
 }

+/* ── HTML allowlist sanitizer ────────────────────────────────────────────────
+ * el_html_sanitize(input, allowlist_json)
+ *
+ * Strict allowlist HTML cleaner. Replaces the older denylist patterns
+ * (str_replace cascades that wrapped dangerous tags in HTML comments and
+ * renamed `on*` attributes). The denylist approach is fragile: comment-
+ * wrapping can be re-broken by a literal `-->` inside an attacker-supplied
+ * attribute value, and every new attack vector requires a code change.
+ *
+ * Design:
+ *   - Single-pass byte-level state machine.
+ *   - Tag and attribute names are matched case-insensitively against the
+ *     allowlist. Unknown tags are dropped entirely (the open and close
+ *     markers are stripped; their inner text content survives, escaped).
+ *   - A small set of "dangerous container" tags (script, style, iframe,
+ *     object, embed, form, plus a few rarer ones) drop themselves AND
+ *     their full subtree — text between `<script>` and `</script>` is
+ *     CDATA-like and must not be re-emitted as escaped text either.
+ *   - Comments (<!-- -->), doctype (<!DOCTYPE>), CDATA (<![CDATA[...]]>),
+ *     and processing instructions (<?...?>) are dropped entirely.
+ *   - Text content outside dropped subtrees is HTML-escaped (&, <, >, ", ').
+ *   - Attribute values are unquoted/dequoted, then re-emitted with double
+ *     quotes around the cleanly-escaped value.
+ *   - For `<a href>` and any `src` attribute, the URL scheme is validated:
+ *     only http:, https:, mailto:, fragment-only `#anchor`, or relative
+ *     paths are allowed. Anything else (javascript:, data:, vbscript:,
+ *     about:, file:, etc.) drops the attribute.
+ *   - Self-closing void tags (br, hr, img, etc.) emit without a close tag.
+ *   - Malformed input (unclosed tag at EOF, bad attribute syntax) drops
+ *     the pending tag and continues. Pre-encoded entities (&lt;, &amp;,
+ *     etc.) are passed through verbatim — the browser will decode them
+ *     safely on render.
+ *
+ * Allowlist format (JSON string):
+ *   {"p":[],"a":["href","title"],"strong":[],...}
+ *   - Key = lowercase tag name.
+ *   - Value = JSON array of allowed attribute names (lowercase).
+ *   - Empty array means tag allowed but no attributes survive.
+ *
+ * Output is a freshly-allocated arena-tracked el_val_t string. */
+
+/* Internal byte buffer with realloc-doubling. Used during sanitization;
+ * the final result is copied into an arena-tracked el_strbuf so the caller
+ * sees standard runtime memory semantics. */
+typedef struct {
+    char*  data;
+    size_t len;
+    size_t cap;
+} html_buf_t;
+
+static void html_buf_init(html_buf_t* b) {
+    b->cap = 256;
+    b->data = malloc(b->cap);
+    if (!b->data) { fputs("el_runtime: out of memory\n", stderr); exit(1); }
+    b->len = 0;
+}
+
+static void html_buf_grow(html_buf_t* b, size_t need) {
+    if (b->len + need + 1 <= b->cap) return;
+    size_t nc = b->cap;
+    while (b->len + need + 1 > nc) nc *= 2;
+    char* nd = realloc(b->data, nc);
+    if (!nd) { fputs("el_runtime: out of memory\n", stderr); exit(1); }
+    b->data = nd;
+    b->cap = nc;
+}
+
+static void html_buf_putc(html_buf_t* b, char c) {
+    html_buf_grow(b, 1);
+    b->data[b->len++] = c;
+}
+
+static void html_buf_puts(html_buf_t* b, const char* s) {
+    if (!s) return;
+    size_t n = strlen(s);
+    html_buf_grow(b, n);
+    memcpy(b->data + b->len, s, n);
+    b->len += n;
+}
+
+static void html_buf_free(html_buf_t* b) {
+    free(b->data);
+    b->data = NULL;
+    b->len = b->cap = 0;
+}
+
+/* ASCII tolower, locale-independent. */
+static int html_tolower(int c) {
+    return (c >= 'A' && c <= 'Z') ? c + 32 : c;
+}
+
+/* Case-insensitive ASCII compare of [a, a+n) against c-string `s`.
+ * Returns 1 iff lengths match and bytes are equal under tolower. */
+static int html_ieq_n(const char* a, size_t n, const char* s) {
+    if (!a || !s) return 0;
+    if (strlen(s) != n) return 0;
+    for (size_t i = 0; i < n; i++) {
+        if (html_tolower((unsigned char)a[i]) != html_tolower((unsigned char)s[i])) return 0;
+    }
+    return 1;
+}
+
+/* Case-insensitive ASCII compare of two byte slices. */
+static int html_iemem(const char* a, const char* b, size_t n) {
+    for (size_t i = 0; i < n; i++) {
+        if (html_tolower((unsigned char)a[i]) != html_tolower((unsigned char)b[i])) return 0;
+    }
+    return 1;
+}
+
+/* Walk a JSON allowlist object and find the value (an array) for a given
+ * tag key, comparing case-insensitively. On hit returns a pointer to the
+ * opening `[` of the array and writes the byte length of the array span
+ * (including the brackets) to *out_len. On miss returns NULL.
+ *
+ * The parser is intentionally tiny: it does not handle escapes inside
+ * keys (allowlist authors do not need them), and it relies on balanced
+ * brackets/quotes within the value array. */
+static const char* html_allowlist_find(const char* allow, const char* tag,
+                                       size_t tag_len, size_t* out_len) {
+    if (!allow) return NULL;
+    const char* p = allow;
+    while (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r') p++;
+    if (*p != '{') return NULL;
+    p++;
+    while (*p) {
+        while (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r' || *p == ',') p++;
+        if (*p == '}' || *p == 0) return NULL;
+        if (*p != '"') return NULL;
+        p++;
+        const char* k = p;
+        while (*p && *p != '"') p++;
+        if (*p != '"') return NULL;
+        size_t klen = (size_t)(p - k);
+        p++;
+        while (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r') p++;
+        if (*p != ':') return NULL;
+        p++;
+        while (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r') p++;
+        if (*p != '[') return NULL;
+        const char* arr_start = p;
+        int depth = 0;
+        int in_str = 0;
+        while (*p) {
+            char c = *p;
+            if (in_str) {
+                if (c == '\\' && p[1]) { p += 2; continue; }
+                if (c == '"') in_str = 0;
+            } else {
+                if (c == '"') in_str = 1;
+                else if (c == '[') depth++;
+                else if (c == ']') { depth--; if (depth == 0) { p++; break; } }
+            }
+            p++;
+        }
+        size_t alen = (size_t)(p - arr_start);
+        int match = (klen == tag_len) && html_iemem(k, tag, klen);
+        if (match) {
+            if (out_len) *out_len = alen;
+            return arr_start;
+        }
+    }
+    return NULL;
+}
+
+/* Returns 1 iff `attr` (length attr_len) appears as a string element
+ * in the JSON array slice [arr, arr+arr_len). Comparison is case-
+ * insensitive. */
+static int html_attr_in_array(const char* arr, size_t arr_len,
+                              const char* attr, size_t attr_len) {
+    if (!arr || arr_len < 2) return 0;
+    const char* p = arr + 1;
+    const char* end = arr + arr_len - 1;
+    while (p < end) {
+        while (p < end && (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r' || *p == ',')) p++;
+        if (p >= end) return 0;
+        if (*p != '"') return 0;
+        p++;
+        const char* s = p;
+        while (p < end && *p != '"') {
+            if (*p == '\\' && p + 1 < end) p++;
+            p++;
+        }
+        if (p >= end) return 0;
+        size_t slen = (size_t)(p - s);
+        p++;
+        if (slen == attr_len && html_iemem(s, attr, slen)) return 1;
+    }
+    return 0;
+}
+
+/* Hard-coded set of tags whose content is ALSO dropped (entire subtree). */
+static int html_is_dangerous_container(const char* tag, size_t tag_len) {
+    static const char* names[] = {
+        "script", "style", "iframe", "object", "embed", "form",
+        "noscript", "noembed", "template", "svg", "math", "frame",
+        "frameset", "applet", "audio", "video", "source", "track",
+        NULL
+    };
+    for (int i = 0; names[i]; i++) {
+        if (html_ieq_n(tag, tag_len, names[i])) return 1;
+    }
+    return 0;
+}
+
+/* HTML void elements — emit without a close tag. */
+static int html_is_void(const char* tag, size_t tag_len) {
+    static const char* names[] = {
+        "area", "base", "br", "col", "embed", "hr", "img", "input",
+        "link", "meta", "param", "source", "track", "wbr",
+        NULL
+    };
+    for (int i = 0; names[i]; i++) {
+        if (html_ieq_n(tag, tag_len, names[i])) return 1;
+    }
+    return 0;
+}
+
+/* Append a single byte HTML-escaped into the output buffer. */
+static void html_escape_byte(html_buf_t* out, unsigned char c) {
+    switch (c) {
+        case '<':  html_buf_puts(out, "&lt;");   break;
+        case '>':  html_buf_puts(out, "&gt;");   break;
+        case '"':  html_buf_puts(out, "&quot;"); break;
+        case '\'': html_buf_puts(out, "&#39;");  break;
+        default:   html_buf_putc(out, (char)c);  break;
+    }
+}
+
+/* Validate a URL value against the allowlist of safe schemes for hrefs.
+ * Returns 1 iff the URL is safe to emit. Acceptable forms:
+ *   - http:// or https:// (case-insensitive)
+ *   - mailto:
+ *   - fragment-only `#anchor`
+ *   - relative path that does not contain a colon before the first
+ *     slash/?/# (so `foo/bar`, `/foo`, `?x=1` are OK; `javascript:x` is
+ *     not — its colon precedes any path/hash/query separator).
+ *
+ * URL leading whitespace and embedded ASCII control bytes (TAB, LF, CR)
+ * are stripped before the scheme test, mirroring how browsers normalise
+ * URLs (these bytes are otherwise a known XSS bypass: `java\tscript:`). */
+static int html_url_is_safe(const char* url, size_t len) {
+    if (!url || len == 0) return 1;  /* empty href is harmless */
+    size_t i = 0;
+    while (i < len) {
+        unsigned char c = (unsigned char)url[i];
+        if (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == 0x0B || c == 0x0C) {
+            i++; continue;
+        }
+        break;
+    }
+    if (i >= len) return 1;  /* whitespace only */
+    if (url[i] == '#') return 1;  /* fragment only */
+    if (url[i] == '/' || url[i] == '?') return 1;  /* relative */
+    /* Find the first scheme-terminating character. */
+    size_t scheme_end = (size_t)-1;
+    for (size_t j = i; j < len; j++) {
+        char c = url[j];
+        if (c == ':') { scheme_end = j; break; }
+        if (c == '/' || c == '?' || c == '#') break;
+    }
+    if (scheme_end == (size_t)-1) return 1;  /* no colon → relative path */
+    /* Lowercase the scheme, stripping embedded control bytes. */
+    char scheme[32];
+    size_t sl = 0;
+    for (size_t j = i; j < scheme_end && sl < sizeof(scheme) - 1; j++) {
+        unsigned char c = (unsigned char)url[j];
+        if (c == '\t' || c == '\n' || c == '\r' || c == 0x0B || c == 0x0C) continue;
+        scheme[sl++] = (char)html_tolower(c);
+    }
+    scheme[sl] = '\0';
+    if (strcmp(scheme, "http") == 0) return 1;
+    if (strcmp(scheme, "https") == 0) return 1;
+    if (strcmp(scheme, "mailto") == 0) return 1;
+    return 0;
+}
+
+el_val_t el_html_sanitize(el_val_t input_v, el_val_t allowlist_v) {
+    const char* input = EL_CSTR(input_v);
+    const char* allow = EL_CSTR(allowlist_v);
+    if (!input) return el_wrap_str(el_strdup(""));
+    if (!allow) allow = "{}";
+    size_t in_len = strlen(input);
+
+    html_buf_t out;
+    html_buf_init(&out);
+
+    size_t i = 0;
+    while (i < in_len) {
+        unsigned char c = (unsigned char)input[i];
+        if (c != '<') {
+            /* Plain text — escape and emit. We pass `&` through verbatim
+             * to preserve pre-encoded entities (`&lt;`, `&amp;`, `&#x...;`)
+             * which the browser will decode safely. */
+            if (c == '&') html_buf_putc(&out, '&');
+            else          html_escape_byte(&out, c);
+            i++;
+            continue;
+        }
+        /* `<` — try to parse a tag. */
+        if (i + 1 >= in_len) {
+            html_buf_puts(&out, "&lt;");
+            i++;
+            continue;
+        }
+        /* Comments, doctype, CDATA, processing instructions — drop entirely. */
+        if (input[i + 1] == '!') {
+            if (i + 3 < in_len && input[i + 2] == '-' && input[i + 3] == '-') {
+                size_t j = i + 4;
+                while (j + 2 < in_len && !(input[j] == '-' && input[j + 1] == '-' && input[j + 2] == '>')) j++;
+                if (j + 2 < in_len) i = j + 3;
+                else i = in_len;
+                continue;
+            }
+            size_t j = i + 2;
+            while (j < in_len && input[j] != '>') j++;
+            i = (j < in_len) ? j + 1 : in_len;
+            continue;
+        }
+        if (input[i + 1] == '?') {
+            size_t j = i + 2;
+            while (j < in_len && input[j] != '>') j++;
+            i = (j < in_len) ? j + 1 : in_len;
+            continue;
+        }
+        int is_close = 0;
+        size_t name_start = i + 1;
+        if (input[i + 1] == '/') {
+            is_close = 1;
+            name_start = i + 2;
+        }
+        if (name_start >= in_len) {
+            html_buf_puts(&out, "&lt;");
+            i++;
+            continue;
+        }
+        unsigned char nc = (unsigned char)input[name_start];
+        if (!((nc >= 'a' && nc <= 'z') || (nc >= 'A' && nc <= 'Z'))) {
+            /* `<` followed by non-letter — emit as escaped text. */
+            html_buf_puts(&out, "&lt;");
+            i++;
+            continue;
+        }
+        size_t name_end = name_start;
+        while (name_end < in_len) {
+            unsigned char x = (unsigned char)input[name_end];
+            if ((x >= 'a' && x <= 'z') || (x >= 'A' && x <= 'Z') ||
+                (x >= '0' && x <= '9') || x == '-' || x == '_' || x == ':') {
+                name_end++;
+            } else {
+                break;
+            }
+        }
+        const char* tag = input + name_start;
+        size_t tag_len = name_end - name_start;
+        /* Find the `>` that closes this tag, respecting quoted attrs. */
+        size_t cur = name_end;
+        int self_close = 0;
+        while (cur < in_len) {
+            unsigned char x = (unsigned char)input[cur];
+            if (x == '"' || x == '\'') {
+                unsigned char q = x;
+                cur++;
+                while (cur < in_len && (unsigned char)input[cur] != q) cur++;
+                if (cur < in_len) cur++;  /* skip closing quote */
+                continue;
+            }
+            if (x == '/' && cur + 1 < in_len && input[cur + 1] == '>') {
+                self_close = 1;
+                break;
+            }
+            if (x == '>') break;
+            cur++;
+        }
+        if (cur >= in_len) {
+            /* Malformed: unclosed tag at EOF. Drop the rest of the input. */
+            i = in_len;
+            continue;
+        }
+        size_t tag_end = self_close ? cur + 2 : cur + 1;  /* one past `>` */
+        /* Dangerous container — drop the whole subtree. */
+        if (!is_close && html_is_dangerous_container(tag, tag_len)) {
+            if (self_close || html_is_void(tag, tag_len)) {
+                i = tag_end;
+                continue;
+            }
+            size_t scan = tag_end;
+            int found_close = 0;
+            while (scan < in_len) {
+                if (input[scan] != '<') { scan++; continue; }
+                if (scan + 1 < in_len && input[scan + 1] == '/') {
+                    size_t cn_start = scan + 2;
+                    size_t cn_end = cn_start;
+                    while (cn_end < in_len) {
+                        unsigned char x = (unsigned char)input[cn_end];
+                        if ((x >= 'a' && x <= 'z') || (x >= 'A' && x <= 'Z') ||
+                            (x >= '0' && x <= '9') || x == '-' || x == '_' || x == ':') {
+                            cn_end++;
+                        } else break;
+                    }
+                    if (cn_end - cn_start == tag_len &&
+                        html_iemem(input + cn_start, tag, tag_len)) {
+                        size_t end_close = cn_end;
+                        while (end_close < in_len && input[end_close] != '>') end_close++;
+                        i = (end_close < in_len) ? end_close + 1 : in_len;
+                        found_close = 1;
+                        break;
+                    }
+                }
+                scan++;
+            }
+            if (!found_close) {
+                /* No matching close — drop everything from here on. */
+                i = in_len;
+            }
+            continue;
+        }
+        /* Look up the tag in the allowlist. */
+        size_t arr_len = 0;
+        const char* arr = html_allowlist_find(allow, tag, tag_len, &arr_len);
+        if (!arr) {
+            /* Tag not allowed. Drop the open/close marker; inner text is
+             * processed by the outer loop and re-emitted as escaped text. */
+            i = tag_end;
+            continue;
+        }
+        if (is_close) {
+            if (!html_is_void(tag, tag_len)) {
+                html_buf_putc(&out, '<');
+                html_buf_putc(&out, '/');
+                for (size_t k = 0; k < tag_len; k++) {
+                    html_buf_putc(&out, (char)html_tolower((unsigned char)tag[k]));
+                }
+                html_buf_putc(&out, '>');
+            }
+            i = tag_end;
+            continue;
+        }
+        /* Allowed open tag. Emit `<name` and walk the attributes between
+         * `name_end` and the closing `>`. */
+        html_buf_putc(&out, '<');
+        for (size_t k = 0; k < tag_len; k++) {
+            html_buf_putc(&out, (char)html_tolower((unsigned char)tag[k]));
+        }
+        size_t a = name_end;
+        while (a < cur) {
+            unsigned char x = (unsigned char)input[a];
+            if (x == ' ' || x == '\t' || x == '\n' || x == '\r' || x == '/') { a++; continue; }
+            size_t an_start = a;
+            while (a < cur) {
+                unsigned char y = (unsigned char)input[a];
+                if (y == '=' || y == ' ' || y == '\t' || y == '\n' || y == '\r' || y == '/' || y == '>') break;
+                a++;
+            }
+            size_t an_len = a - an_start;
+            if (an_len == 0) { a++; continue; }
+            size_t av_start = 0;
+            size_t av_len = 0;
+            int has_value = 0;
+            size_t b = a;
+            while (b < cur && (input[b] == ' ' || input[b] == '\t' || input[b] == '\n' || input[b] == '\r')) b++;
+            if (b < cur && input[b] == '=') {
+                has_value = 1;
+                b++;
+                while (b < cur && (input[b] == ' ' || input[b] == '\t' || input[b] == '\n' || input[b] == '\r')) b++;
+                if (b < cur && (input[b] == '"' || input[b] == '\'')) {
+                    unsigned char q = (unsigned char)input[b];
+                    b++;
+                    av_start = b;
+                    while (b < cur && (unsigned char)input[b] != q) b++;
+                    av_len = b - av_start;
+                    if (b < cur) b++;
+                } else {
+                    av_start = b;
+                    while (b < cur) {
+                        unsigned char y = (unsigned char)input[b];
+                        if (y == ' ' || y == '\t' || y == '\n' || y == '\r' || y == '>') break;
+                        b++;
+                    }
+                    av_len = b - av_start;
+                }
+                a = b;
+            }
+            if (!html_attr_in_array(arr, arr_len, input + an_start, an_len)) continue;
+            int is_href = (an_len == 4 && html_iemem(input + an_start, "href", 4));
+            int is_src  = (an_len == 3 && html_iemem(input + an_start, "src",  3));
+            if ((is_href || is_src) && has_value) {
+                if (!html_url_is_safe(input + av_start, av_len)) continue;
+            }
+            html_buf_putc(&out, ' ');
+            for (size_t k = 0; k < an_len; k++) {
+                html_buf_putc(&out, (char)html_tolower((unsigned char)input[an_start + k]));
+            }
+            if (has_value) {
+                html_buf_puts(&out, "=\"");
+                for (size_t k = 0; k < av_len; k++) {
+                    unsigned char y = (unsigned char)input[av_start + k];
+                    /* Re-escape so the emitted attribute is well-formed
+                     * double-quoted HTML. `&` passes through to preserve
+                     * pre-encoded entities. */
+                    if (y == '"')      html_buf_puts(&out, "&quot;");
+                    else if (y == '<') html_buf_puts(&out, "&lt;");
+                    else if (y == '>') html_buf_puts(&out, "&gt;");
+                    else                html_buf_putc(&out, (char)y);
+                }
+                html_buf_putc(&out, '"');
+            }
+        }
+        html_buf_putc(&out, '>');
+        i = tag_end;
+    }
+    /* Copy into arena-tracked buffer so the standard runtime memory model
+     * applies to the returned string. */
+    char* result = el_strbuf(out.len);
+    memcpy(result, out.data, out.len);
+    result[out.len] = '\0';
+    html_buf_free(&out);
+    return el_wrap_str(result);
+}
+
 /* ── JSON ────────────────────────────────────────────────────────────────── */

 /* True iff the segment is non-empty and every byte is an ASCII digit. We treat
@@ -2827,6 +3347,283 @@ el_val_t sleep_ms(el_val_t ms) {
    return 0;
 }

+/* ── Instant + Duration: first-class temporal types ──────────────────────────
+ * El's substrate (Neuron) is a temporal cognition system. Memory salience
+ * decay, the six-tier pacemaker, TTL caches, and supersession are all
+ * temporal. Treating time as a raw Int (now() returning ms-since-epoch and
+ * arithmetic done with mixed unit literals) lets bugs through the type
+ * system: `(now - cached_at) < 60` cannot tell ms from sec, and `sleep(30)`
+ * is ambiguous. This block introduces two dedicated representations.
+ *
+ * Representation:
+ *   Instant   — int64 nanoseconds since the Unix epoch
+ *   Duration  — int64 nanoseconds (signed; negative durations are legal,
+ *               e.g. when a deadline has passed)
+ *
+ * Both share the el_val_t (int64) slot the rest of the runtime uses, so no
+ * boxing / arena allocation is needed. Type discipline is enforced at the
+ * codegen layer: `let x: Duration = ...` registers `x` in __duration_names,
+ * and BinOp dispatches through typed wrappers (el_duration_add, etc.) that
+ * make intent explicit in the generated C. Mismatched ops (Instant+Instant,
+ * Duration+Int) are surfaced via #error directives at codegen time so the
+ * downstream cc step fails with a clear El-source-level message.
+ *
+ * Nanosecond precision matches POSIX clock_gettime / nanosleep granularity.
+ * 2^63 nanos covers ~292 years from epoch — comfortably past 2200, plenty
+ * for a memory-system runtime that never schedules outside a human lifespan.
+ */
+
+/* now() — current Instant. Wraps clock_gettime(CLOCK_REALTIME) for nanosecond
+ * precision. Falls back to gettimeofday on systems where clock_gettime is
+ * unavailable (defensive — every supported platform has it). */
+el_val_t el_now_instant(void) {
+    struct timespec ts;
+    if (clock_gettime(CLOCK_REALTIME, &ts) == 0) {
+        int64_t ns = (int64_t)ts.tv_sec * 1000000000LL + (int64_t)ts.tv_nsec;
+        return (el_val_t)ns;
+    }
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    int64_t ns = (int64_t)tv.tv_sec * 1000000000LL
+               + (int64_t)tv.tv_usec * 1000LL;
+    return (el_val_t)ns;
+}
+
+el_val_t now(void) {
+    return el_now_instant();
+}
+
+/* unix_seconds(n)  — Instant from a Unix-epoch second count.
+ * unix_millis(n)   — Instant from a Unix-epoch millisecond count.            */
+el_val_t unix_seconds(el_val_t n) {
+    int64_t s = (int64_t)n;
+    return (el_val_t)(s * 1000000000LL);
+}
+
+el_val_t unix_millis(el_val_t n) {
+    int64_t m = (int64_t)n;
+    return (el_val_t)(m * 1000000LL);
+}
+
+/* instant_from_iso8601 — parse a strict subset:
+ *   YYYY-MM-DDTHH:MM:SS[.fff]Z
+ * Returns 0 (the Unix-epoch sentinel) on parse failure. Callers that need to
+ * distinguish epoch-zero from a parse error should use a wider sentinel
+ * representation; the current zero-on-failure choice matches existing El
+ * runtime conventions for parse builtins (str_to_int, parse_int).            */
+el_val_t instant_from_iso8601(el_val_t s) {
+    const char* str = EL_CSTR(s);
+    if (!str) return (el_val_t)0;
+    int Y, M, D, h, m, sec, frac = 0;
+    int n = sscanf(str, "%d-%d-%dT%d:%d:%d.%3d", &Y, &M, &D, &h, &m, &sec, &frac);
+    if (n < 6) {
+        n = sscanf(str, "%d-%d-%dT%d:%d:%dZ", &Y, &M, &D, &h, &m, &sec);
+        if (n < 6) return (el_val_t)0;
+    }
+    struct tm tm;
+    memset(&tm, 0, sizeof(tm));
+    tm.tm_year = Y - 1900;
+    tm.tm_mon  = M - 1;
+    tm.tm_mday = D;
+    tm.tm_hour = h;
+    tm.tm_min  = m;
+    tm.tm_sec  = sec;
+    /* timegm — UTC. POSIX-Y but available on macOS and glibc. */
+    time_t t = timegm(&tm);
+    if (t == (time_t)-1) return (el_val_t)0;
+    int64_t ns = (int64_t)t * 1000000000LL + (int64_t)frac * 1000000LL;
+    return (el_val_t)ns;
+}
+
+/* Duration constructors. The El-side postfix literals (30.seconds, 1.hour)
+ * are lowered by the codegen directly into a literal int64 of nanoseconds —
+ * these constructors are for runtime values where the count is dynamic. */
+el_val_t el_duration_from_nanos(el_val_t ns) {
+    return (el_val_t)(int64_t)ns;
+}
+
+el_val_t duration_seconds(el_val_t n) {
+    int64_t s = (int64_t)n;
+    return (el_val_t)(s * 1000000000LL);
+}
+
+el_val_t duration_millis(el_val_t n) {
+    int64_t m = (int64_t)n;
+    return (el_val_t)(m * 1000000LL);
+}
+
+el_val_t duration_nanos(el_val_t n) {
+    return (el_val_t)(int64_t)n;
+}
+
+/* Arithmetic — typed wrappers. At the C level these are no-op casts, but
+ * the codegen routes Instant/Duration BinOps through them so the generated
+ * C says `el_instant_add_dur(start, dur)` rather than `start + dur`. The
+ * intent is explicit, the operand order is documented, and a future change
+ * to the underlying representation (saturating arithmetic, overflow guards)
+ * has a single chokepoint.                                                  */
+el_val_t el_instant_add_dur(el_val_t inst, el_val_t dur) {
+    return (el_val_t)((int64_t)inst + (int64_t)dur);
+}
+
+el_val_t el_instant_sub_dur(el_val_t inst, el_val_t dur) {
+    return (el_val_t)((int64_t)inst - (int64_t)dur);
+}
+
+el_val_t el_instant_diff(el_val_t a, el_val_t b) {
+    /* a - b — yields a Duration (negative if b is later than a). */
+    return (el_val_t)((int64_t)a - (int64_t)b);
+}
+
+el_val_t el_duration_add(el_val_t a, el_val_t b) {
+    return (el_val_t)((int64_t)a + (int64_t)b);
+}
+
+el_val_t el_duration_sub(el_val_t a, el_val_t b) {
+    return (el_val_t)((int64_t)a - (int64_t)b);
+}
+
+el_val_t el_duration_scale(el_val_t dur, el_val_t scalar) {
+    return (el_val_t)((int64_t)dur * (int64_t)scalar);
+}
+
+el_val_t el_duration_div(el_val_t dur, el_val_t scalar) {
+    int64_t s = (int64_t)scalar;
+    if (s == 0) return (el_val_t)0;
+    return (el_val_t)((int64_t)dur / s);
+}
+
+/* Comparisons. Return 1/0 in el_val_t convention. */
+el_val_t el_instant_lt(el_val_t a, el_val_t b)  { return (el_val_t)((int64_t)a <  (int64_t)b ? 1 : 0); }
+el_val_t el_instant_le(el_val_t a, el_val_t b)  { return (el_val_t)((int64_t)a <= (int64_t)b ? 1 : 0); }
+el_val_t el_instant_gt(el_val_t a, el_val_t b)  { return (el_val_t)((int64_t)a >  (int64_t)b ? 1 : 0); }
+el_val_t el_instant_ge(el_val_t a, el_val_t b)  { return (el_val_t)((int64_t)a >= (int64_t)b ? 1 : 0); }
+el_val_t el_instant_eq(el_val_t a, el_val_t b)  { return (el_val_t)((int64_t)a == (int64_t)b ? 1 : 0); }
+el_val_t el_instant_ne(el_val_t a, el_val_t b)  { return (el_val_t)((int64_t)a != (int64_t)b ? 1 : 0); }
+el_val_t el_duration_lt(el_val_t a, el_val_t b) { return (el_val_t)((int64_t)a <  (int64_t)b ? 1 : 0); }
+el_val_t el_duration_le(el_val_t a, el_val_t b) { return (el_val_t)((int64_t)a <= (int64_t)b ? 1 : 0); }
+el_val_t el_duration_gt(el_val_t a, el_val_t b) { return (el_val_t)((int64_t)a >  (int64_t)b ? 1 : 0); }
+el_val_t el_duration_ge(el_val_t a, el_val_t b) { return (el_val_t)((int64_t)a >= (int64_t)b ? 1 : 0); }
+el_val_t el_duration_eq(el_val_t a, el_val_t b) { return (el_val_t)((int64_t)a == (int64_t)b ? 1 : 0); }
+el_val_t el_duration_ne(el_val_t a, el_val_t b) { return (el_val_t)((int64_t)a != (int64_t)b ? 1 : 0); }
+
+/* Conversions. */
+el_val_t instant_to_unix_seconds(el_val_t i) {
+    return (el_val_t)((int64_t)i / 1000000000LL);
+}
+
+el_val_t instant_to_unix_millis(el_val_t i) {
+    return (el_val_t)((int64_t)i / 1000000LL);
+}
+
+el_val_t instant_to_iso8601(el_val_t i) {
+    int64_t ns = (int64_t)i;
+    time_t s = (time_t)(ns / 1000000000LL);
+    int    msec = (int)((ns / 1000000LL) % 1000LL);
+    if (msec < 0) { msec += 1000; s -= 1; }
+    struct tm tm;
+    gmtime_r(&s, &tm);
+    char buf[64];
+    snprintf(buf, sizeof(buf), "%04d-%02d-%02dT%02d:%02d:%02d.%03dZ",
+             tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
+             tm.tm_hour, tm.tm_min, tm.tm_sec, msec);
+    return el_wrap_str(el_strdup(buf));
+}
+
+el_val_t duration_to_seconds(el_val_t d) {
+    return (el_val_t)((int64_t)d / 1000000000LL);
+}
+
+el_val_t duration_to_millis(el_val_t d) {
+    return (el_val_t)((int64_t)d / 1000000LL);
+}
+
+el_val_t duration_to_nanos(el_val_t d) {
+    return (el_val_t)(int64_t)d;
+}
+
+/* sleep(Duration) — Phase 1 replacement for ambiguous sleep(Int). The runtime
+ * still exposes sleep_secs/sleep_ms for legacy call sites; codegen lowers
+ * sleep(Duration) to el_sleep_duration(d). Negative durations clamp to 0 so a
+ * stale deadline doesn't block forever. */
+el_val_t el_sleep_duration(el_val_t dur) {
+    int64_t ns = (int64_t)dur;
+    if (ns < 0) ns = 0;
+    struct timespec ts;
+    ts.tv_sec  = (time_t)(ns / 1000000000LL);
+    ts.tv_nsec = (long)(ns % 1000000000LL);
+    nanosleep(&ts, NULL);
+    return (el_val_t)0;
+}
+
+/* unix_timestamp() — back-compat. Existing El callers expect an Int seconds
+ * value; this stays an Int returner so the type system isn't disturbed for
+ * legacy code. New code should call now() and convert when needed.          */
+el_val_t unix_timestamp(void) {
+    return instant_to_unix_seconds(el_now_instant());
+}
+
+/* TTL cache helpers. Backed by the existing process-wide K/V (state_set/get)
+ * with a sibling __ttl_set_at_<key> entry recording the Instant of the last
+ * write. ttl_cache_get returns "" if the entry is missing or stale, so call
+ * sites can branch on `if v == "" { miss } else { hit }` — the same shape
+ * existing get-with-default code uses. No more (now - cached_at) < 60. */
+el_val_t ttl_cache_set(el_val_t key, el_val_t value) {
+    const char* k = EL_CSTR(key);
+    if (!k) return (el_val_t)0;
+    /* Store the value at the user's key. */
+    state_set(key, value);
+    /* Stamp set_at — opaque schema, namespaced under __ttl: prefix so user
+     * keys can't collide with stamps. */
+    size_t klen = strlen(k);
+    char* stamp_key = (char*)malloc(klen + 16);
+    if (!stamp_key) return (el_val_t)0;
+    snprintf(stamp_key, klen + 16, "__ttl_at:%s", k);
+    int64_t now_ns = (int64_t)el_now_instant();
+    char buf[32];
+    snprintf(buf, sizeof(buf), "%lld", (long long)now_ns);
+    state_set(EL_STR(stamp_key), EL_STR(buf));
+    free(stamp_key);
+    return (el_val_t)1;
+}
+
+el_val_t ttl_cache_get(el_val_t key, el_val_t max_age) {
+    const char* k = EL_CSTR(key);
+    if (!k) return el_wrap_str(el_strdup(""));
+    /* Look up stamp. */
+    size_t klen = strlen(k);
+    char* stamp_key = (char*)malloc(klen + 16);
+    if (!stamp_key) return el_wrap_str(el_strdup(""));
+    snprintf(stamp_key, klen + 16, "__ttl_at:%s", k);
+    el_val_t stamp = state_get(EL_STR(stamp_key));
+    free(stamp_key);
+    const char* sv = EL_CSTR(stamp);
+    if (!sv || !*sv) return el_wrap_str(el_strdup(""));
+    int64_t set_at = (int64_t)atoll(sv);
+    int64_t now_ns = (int64_t)el_now_instant();
+    int64_t age    = now_ns - set_at;
+    int64_t max_ns = (int64_t)max_age;
+    if (age < 0)       return el_wrap_str(el_strdup(""));   /* clock skew — treat as miss */
+    if (age > max_ns)  return el_wrap_str(el_strdup(""));   /* expired */
+    return state_get(key);
+}
+
+el_val_t ttl_cache_age(el_val_t key) {
+    const char* k = EL_CSTR(key);
+    if (!k) return (el_val_t)INT64_MAX;
+    size_t klen = strlen(k);
+    char* stamp_key = (char*)malloc(klen + 16);
+    if (!stamp_key) return (el_val_t)INT64_MAX;
+    snprintf(stamp_key, klen + 16, "__ttl_at:%s", k);
+    el_val_t stamp = state_get(EL_STR(stamp_key));
+    free(stamp_key);
+    const char* sv = EL_CSTR(stamp);
+    if (!sv || !*sv) return (el_val_t)INT64_MAX;
+    int64_t set_at = (int64_t)atoll(sv);
+    int64_t now_ns = (int64_t)el_now_instant();
+    return (el_val_t)(now_ns - set_at);
+}
+
 /* ── UUID v4 ─────────────────────────────────────────────────────────────── */

 static int _el_uuid_seeded = 0;
@@ -199,6 +199,19 @@ el_val_t  http_get_to_file(el_val_t url, el_val_t headers_map, el_val_t output_p
 el_val_t  url_encode(el_val_t s);   /* RFC 3986 unreserved set */
 el_val_t  url_decode(el_val_t s);   /* '+' → space, %XX → byte  */

+/* ── HTML allowlist sanitizer ────────────────────────────────────────────────
+ * el_html_sanitize(input_html, allowlist_json) — strict allowlist HTML
+ * cleaner. State-machine parser; tag/attribute names compared case-
+ * insensitively against the allowlist; `<a href>` / `<… src>` URL schemes
+ * validated (http, https, mailto, fragment-only, or relative); whole-
+ * subtree drop for script / style / iframe / object / embed / form; HTML-
+ * escapes free text outside dropped subtrees.
+ *
+ * The allowlist is JSON of the form
+ *     {"p":[],"a":["href","title"],"strong":[],...}
+ * where each value is the array of attribute names allowed for that tag. */
+el_val_t  el_html_sanitize(el_val_t input_html, el_val_t allowlist_json);
+
 /* ── Filesystem ──────────────────────────────────────────────────────────── */

 el_val_t  fs_read(el_val_t path);