From 6d897289a35a8f50d55a68934d954e907cfcef76 Mon Sep 17 00:00:00 2001 From: Will Anderson Date: Sat, 2 May 2026 12:45:48 -0500 Subject: [PATCH] runtime: rename str_format param 'template' to 'fmt' template is a reserved keyword in C++; though not in C, it blocks this header from ever being included from C++ code. Match printf- family convention with fmt instead. The deeper question of whether string-template substitution is the right abstraction for our substrate is filed separately as backlog. --- el-compiler/runtime/el_runtime.c | 797 +++++++++++++++++++++++++++++++ el-compiler/runtime/el_runtime.h | 13 + 2 files changed, 810 insertions(+) diff --git a/el-compiler/runtime/el_runtime.c b/el-compiler/runtime/el_runtime.c index c3cf42d..4ac9eae 100644 --- a/el-compiler/runtime/el_runtime.c +++ b/el-compiler/runtime/el_runtime.c @@ -1968,6 +1968,526 @@ el_val_t url_decode(el_val_t sv) { return el_wrap_str(out); } +/* ── HTML allowlist sanitizer ──────────────────────────────────────────────── + * el_html_sanitize(input, allowlist_json) + * + * Strict allowlist HTML cleaner. Replaces the older denylist patterns + * (str_replace cascades that wrapped dangerous tags in HTML comments and + * renamed `on*` attributes). The denylist approach is fragile: comment- + * wrapping can be re-broken by a literal `-->` inside an attacker-supplied + * attribute value, and every new attack vector requires a code change. + * + * Design: + * - Single-pass byte-level state machine. + * - Tag and attribute names are matched case-insensitively against the + * allowlist. Unknown tags are dropped entirely (the open and close + * markers are stripped; their inner text content survives, escaped). + * - A small set of "dangerous container" tags (script, style, iframe, + * object, embed, form, plus a few rarer ones) drop themselves AND + * their full subtree — text between `` is + * CDATA-like and must not be re-emitted as escaped text either. + * - Comments (), doctype (), CDATA (), + * and processing instructions () are dropped entirely. + * - Text content outside dropped subtrees is HTML-escaped (&, <, >, ", '). + * - Attribute values are unquoted/dequoted, then re-emitted with double + * quotes around the cleanly-escaped value. + * - For `` and any `src` attribute, the URL scheme is validated: + * only http:, https:, mailto:, fragment-only `#anchor`, or relative + * paths are allowed. Anything else (javascript:, data:, vbscript:, + * about:, file:, etc.) drops the attribute. + * - Self-closing void tags (br, hr, img, etc.) emit without a close tag. + * - Malformed input (unclosed tag at EOF, bad attribute syntax) drops + * the pending tag and continues. Pre-encoded entities (<, &, + * etc.) are passed through verbatim — the browser will decode them + * safely on render. + * + * Allowlist format (JSON string): + * {"p":[],"a":["href","title"],"strong":[],...} + * - Key = lowercase tag name. + * - Value = JSON array of allowed attribute names (lowercase). + * - Empty array means tag allowed but no attributes survive. + * + * Output is a freshly-allocated arena-tracked el_val_t string. */ + +/* Internal byte buffer with realloc-doubling. Used during sanitization; + * the final result is copied into an arena-tracked el_strbuf so the caller + * sees standard runtime memory semantics. */ +typedef struct { + char* data; + size_t len; + size_t cap; +} html_buf_t; + +static void html_buf_init(html_buf_t* b) { + b->cap = 256; + b->data = malloc(b->cap); + if (!b->data) { fputs("el_runtime: out of memory\n", stderr); exit(1); } + b->len = 0; +} + +static void html_buf_grow(html_buf_t* b, size_t need) { + if (b->len + need + 1 <= b->cap) return; + size_t nc = b->cap; + while (b->len + need + 1 > nc) nc *= 2; + char* nd = realloc(b->data, nc); + if (!nd) { fputs("el_runtime: out of memory\n", stderr); exit(1); } + b->data = nd; + b->cap = nc; +} + +static void html_buf_putc(html_buf_t* b, char c) { + html_buf_grow(b, 1); + b->data[b->len++] = c; +} + +static void html_buf_puts(html_buf_t* b, const char* s) { + if (!s) return; + size_t n = strlen(s); + html_buf_grow(b, n); + memcpy(b->data + b->len, s, n); + b->len += n; +} + +static void html_buf_free(html_buf_t* b) { + free(b->data); + b->data = NULL; + b->len = b->cap = 0; +} + +/* ASCII tolower, locale-independent. */ +static int html_tolower(int c) { + return (c >= 'A' && c <= 'Z') ? c + 32 : c; +} + +/* Case-insensitive ASCII compare of [a, a+n) against c-string `s`. + * Returns 1 iff lengths match and bytes are equal under tolower. */ +static int html_ieq_n(const char* a, size_t n, const char* s) { + if (!a || !s) return 0; + if (strlen(s) != n) return 0; + for (size_t i = 0; i < n; i++) { + if (html_tolower((unsigned char)a[i]) != html_tolower((unsigned char)s[i])) return 0; + } + return 1; +} + +/* Case-insensitive ASCII compare of two byte slices. */ +static int html_iemem(const char* a, const char* b, size_t n) { + for (size_t i = 0; i < n; i++) { + if (html_tolower((unsigned char)a[i]) != html_tolower((unsigned char)b[i])) return 0; + } + return 1; +} + +/* Walk a JSON allowlist object and find the value (an array) for a given + * tag key, comparing case-insensitively. On hit returns a pointer to the + * opening `[` of the array and writes the byte length of the array span + * (including the brackets) to *out_len. On miss returns NULL. + * + * The parser is intentionally tiny: it does not handle escapes inside + * keys (allowlist authors do not need them), and it relies on balanced + * brackets/quotes within the value array. */ +static const char* html_allowlist_find(const char* allow, const char* tag, + size_t tag_len, size_t* out_len) { + if (!allow) return NULL; + const char* p = allow; + while (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r') p++; + if (*p != '{') return NULL; + p++; + while (*p) { + while (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r' || *p == ',') p++; + if (*p == '}' || *p == 0) return NULL; + if (*p != '"') return NULL; + p++; + const char* k = p; + while (*p && *p != '"') p++; + if (*p != '"') return NULL; + size_t klen = (size_t)(p - k); + p++; + while (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r') p++; + if (*p != ':') return NULL; + p++; + while (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r') p++; + if (*p != '[') return NULL; + const char* arr_start = p; + int depth = 0; + int in_str = 0; + while (*p) { + char c = *p; + if (in_str) { + if (c == '\\' && p[1]) { p += 2; continue; } + if (c == '"') in_str = 0; + } else { + if (c == '"') in_str = 1; + else if (c == '[') depth++; + else if (c == ']') { depth--; if (depth == 0) { p++; break; } } + } + p++; + } + size_t alen = (size_t)(p - arr_start); + int match = (klen == tag_len) && html_iemem(k, tag, klen); + if (match) { + if (out_len) *out_len = alen; + return arr_start; + } + } + return NULL; +} + +/* Returns 1 iff `attr` (length attr_len) appears as a string element + * in the JSON array slice [arr, arr+arr_len). Comparison is case- + * insensitive. */ +static int html_attr_in_array(const char* arr, size_t arr_len, + const char* attr, size_t attr_len) { + if (!arr || arr_len < 2) return 0; + const char* p = arr + 1; + const char* end = arr + arr_len - 1; + while (p < end) { + while (p < end && (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r' || *p == ',')) p++; + if (p >= end) return 0; + if (*p != '"') return 0; + p++; + const char* s = p; + while (p < end && *p != '"') { + if (*p == '\\' && p + 1 < end) p++; + p++; + } + if (p >= end) return 0; + size_t slen = (size_t)(p - s); + p++; + if (slen == attr_len && html_iemem(s, attr, slen)) return 1; + } + return 0; +} + +/* Hard-coded set of tags whose content is ALSO dropped (entire subtree). */ +static int html_is_dangerous_container(const char* tag, size_t tag_len) { + static const char* names[] = { + "script", "style", "iframe", "object", "embed", "form", + "noscript", "noembed", "template", "svg", "math", "frame", + "frameset", "applet", "audio", "video", "source", "track", + NULL + }; + for (int i = 0; names[i]; i++) { + if (html_ieq_n(tag, tag_len, names[i])) return 1; + } + return 0; +} + +/* HTML void elements — emit without a close tag. */ +static int html_is_void(const char* tag, size_t tag_len) { + static const char* names[] = { + "area", "base", "br", "col", "embed", "hr", "img", "input", + "link", "meta", "param", "source", "track", "wbr", + NULL + }; + for (int i = 0; names[i]; i++) { + if (html_ieq_n(tag, tag_len, names[i])) return 1; + } + return 0; +} + +/* Append a single byte HTML-escaped into the output buffer. */ +static void html_escape_byte(html_buf_t* out, unsigned char c) { + switch (c) { + case '<': html_buf_puts(out, "<"); break; + case '>': html_buf_puts(out, ">"); break; + case '"': html_buf_puts(out, """); break; + case '\'': html_buf_puts(out, "'"); break; + default: html_buf_putc(out, (char)c); break; + } +} + +/* Validate a URL value against the allowlist of safe schemes for hrefs. + * Returns 1 iff the URL is safe to emit. Acceptable forms: + * - http:// or https:// (case-insensitive) + * - mailto: + * - fragment-only `#anchor` + * - relative path that does not contain a colon before the first + * slash/?/# (so `foo/bar`, `/foo`, `?x=1` are OK; `javascript:x` is + * not — its colon precedes any path/hash/query separator). + * + * URL leading whitespace and embedded ASCII control bytes (TAB, LF, CR) + * are stripped before the scheme test, mirroring how browsers normalise + * URLs (these bytes are otherwise a known XSS bypass: `java\tscript:`). */ +static int html_url_is_safe(const char* url, size_t len) { + if (!url || len == 0) return 1; /* empty href is harmless */ + size_t i = 0; + while (i < len) { + unsigned char c = (unsigned char)url[i]; + if (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == 0x0B || c == 0x0C) { + i++; continue; + } + break; + } + if (i >= len) return 1; /* whitespace only */ + if (url[i] == '#') return 1; /* fragment only */ + if (url[i] == '/' || url[i] == '?') return 1; /* relative */ + /* Find the first scheme-terminating character. */ + size_t scheme_end = (size_t)-1; + for (size_t j = i; j < len; j++) { + char c = url[j]; + if (c == ':') { scheme_end = j; break; } + if (c == '/' || c == '?' || c == '#') break; + } + if (scheme_end == (size_t)-1) return 1; /* no colon → relative path */ + /* Lowercase the scheme, stripping embedded control bytes. */ + char scheme[32]; + size_t sl = 0; + for (size_t j = i; j < scheme_end && sl < sizeof(scheme) - 1; j++) { + unsigned char c = (unsigned char)url[j]; + if (c == '\t' || c == '\n' || c == '\r' || c == 0x0B || c == 0x0C) continue; + scheme[sl++] = (char)html_tolower(c); + } + scheme[sl] = '\0'; + if (strcmp(scheme, "http") == 0) return 1; + if (strcmp(scheme, "https") == 0) return 1; + if (strcmp(scheme, "mailto") == 0) return 1; + return 0; +} + +el_val_t el_html_sanitize(el_val_t input_v, el_val_t allowlist_v) { + const char* input = EL_CSTR(input_v); + const char* allow = EL_CSTR(allowlist_v); + if (!input) return el_wrap_str(el_strdup("")); + if (!allow) allow = "{}"; + size_t in_len = strlen(input); + + html_buf_t out; + html_buf_init(&out); + + size_t i = 0; + while (i < in_len) { + unsigned char c = (unsigned char)input[i]; + if (c != '<') { + /* Plain text — escape and emit. We pass `&` through verbatim + * to preserve pre-encoded entities (`<`, `&`, `&#x...;`) + * which the browser will decode safely. */ + if (c == '&') html_buf_putc(&out, '&'); + else html_escape_byte(&out, c); + i++; + continue; + } + /* `<` — try to parse a tag. */ + if (i + 1 >= in_len) { + html_buf_puts(&out, "<"); + i++; + continue; + } + /* Comments, doctype, CDATA, processing instructions — drop entirely. */ + if (input[i + 1] == '!') { + if (i + 3 < in_len && input[i + 2] == '-' && input[i + 3] == '-') { + size_t j = i + 4; + while (j + 2 < in_len && !(input[j] == '-' && input[j + 1] == '-' && input[j + 2] == '>')) j++; + if (j + 2 < in_len) i = j + 3; + else i = in_len; + continue; + } + size_t j = i + 2; + while (j < in_len && input[j] != '>') j++; + i = (j < in_len) ? j + 1 : in_len; + continue; + } + if (input[i + 1] == '?') { + size_t j = i + 2; + while (j < in_len && input[j] != '>') j++; + i = (j < in_len) ? j + 1 : in_len; + continue; + } + int is_close = 0; + size_t name_start = i + 1; + if (input[i + 1] == '/') { + is_close = 1; + name_start = i + 2; + } + if (name_start >= in_len) { + html_buf_puts(&out, "<"); + i++; + continue; + } + unsigned char nc = (unsigned char)input[name_start]; + if (!((nc >= 'a' && nc <= 'z') || (nc >= 'A' && nc <= 'Z'))) { + /* `<` followed by non-letter — emit as escaped text. */ + html_buf_puts(&out, "<"); + i++; + continue; + } + size_t name_end = name_start; + while (name_end < in_len) { + unsigned char x = (unsigned char)input[name_end]; + if ((x >= 'a' && x <= 'z') || (x >= 'A' && x <= 'Z') || + (x >= '0' && x <= '9') || x == '-' || x == '_' || x == ':') { + name_end++; + } else { + break; + } + } + const char* tag = input + name_start; + size_t tag_len = name_end - name_start; + /* Find the `>` that closes this tag, respecting quoted attrs. */ + size_t cur = name_end; + int self_close = 0; + while (cur < in_len) { + unsigned char x = (unsigned char)input[cur]; + if (x == '"' || x == '\'') { + unsigned char q = x; + cur++; + while (cur < in_len && (unsigned char)input[cur] != q) cur++; + if (cur < in_len) cur++; /* skip closing quote */ + continue; + } + if (x == '/' && cur + 1 < in_len && input[cur + 1] == '>') { + self_close = 1; + break; + } + if (x == '>') break; + cur++; + } + if (cur >= in_len) { + /* Malformed: unclosed tag at EOF. Drop the rest of the input. */ + i = in_len; + continue; + } + size_t tag_end = self_close ? cur + 2 : cur + 1; /* one past `>` */ + /* Dangerous container — drop the whole subtree. */ + if (!is_close && html_is_dangerous_container(tag, tag_len)) { + if (self_close || html_is_void(tag, tag_len)) { + i = tag_end; + continue; + } + size_t scan = tag_end; + int found_close = 0; + while (scan < in_len) { + if (input[scan] != '<') { scan++; continue; } + if (scan + 1 < in_len && input[scan + 1] == '/') { + size_t cn_start = scan + 2; + size_t cn_end = cn_start; + while (cn_end < in_len) { + unsigned char x = (unsigned char)input[cn_end]; + if ((x >= 'a' && x <= 'z') || (x >= 'A' && x <= 'Z') || + (x >= '0' && x <= '9') || x == '-' || x == '_' || x == ':') { + cn_end++; + } else break; + } + if (cn_end - cn_start == tag_len && + html_iemem(input + cn_start, tag, tag_len)) { + size_t end_close = cn_end; + while (end_close < in_len && input[end_close] != '>') end_close++; + i = (end_close < in_len) ? end_close + 1 : in_len; + found_close = 1; + break; + } + } + scan++; + } + if (!found_close) { + /* No matching close — drop everything from here on. */ + i = in_len; + } + continue; + } + /* Look up the tag in the allowlist. */ + size_t arr_len = 0; + const char* arr = html_allowlist_find(allow, tag, tag_len, &arr_len); + if (!arr) { + /* Tag not allowed. Drop the open/close marker; inner text is + * processed by the outer loop and re-emitted as escaped text. */ + i = tag_end; + continue; + } + if (is_close) { + if (!html_is_void(tag, tag_len)) { + html_buf_putc(&out, '<'); + html_buf_putc(&out, '/'); + for (size_t k = 0; k < tag_len; k++) { + html_buf_putc(&out, (char)html_tolower((unsigned char)tag[k])); + } + html_buf_putc(&out, '>'); + } + i = tag_end; + continue; + } + /* Allowed open tag. Emit ``. */ + html_buf_putc(&out, '<'); + for (size_t k = 0; k < tag_len; k++) { + html_buf_putc(&out, (char)html_tolower((unsigned char)tag[k])); + } + size_t a = name_end; + while (a < cur) { + unsigned char x = (unsigned char)input[a]; + if (x == ' ' || x == '\t' || x == '\n' || x == '\r' || x == '/') { a++; continue; } + size_t an_start = a; + while (a < cur) { + unsigned char y = (unsigned char)input[a]; + if (y == '=' || y == ' ' || y == '\t' || y == '\n' || y == '\r' || y == '/' || y == '>') break; + a++; + } + size_t an_len = a - an_start; + if (an_len == 0) { a++; continue; } + size_t av_start = 0; + size_t av_len = 0; + int has_value = 0; + size_t b = a; + while (b < cur && (input[b] == ' ' || input[b] == '\t' || input[b] == '\n' || input[b] == '\r')) b++; + if (b < cur && input[b] == '=') { + has_value = 1; + b++; + while (b < cur && (input[b] == ' ' || input[b] == '\t' || input[b] == '\n' || input[b] == '\r')) b++; + if (b < cur && (input[b] == '"' || input[b] == '\'')) { + unsigned char q = (unsigned char)input[b]; + b++; + av_start = b; + while (b < cur && (unsigned char)input[b] != q) b++; + av_len = b - av_start; + if (b < cur) b++; + } else { + av_start = b; + while (b < cur) { + unsigned char y = (unsigned char)input[b]; + if (y == ' ' || y == '\t' || y == '\n' || y == '\r' || y == '>') break; + b++; + } + av_len = b - av_start; + } + a = b; + } + if (!html_attr_in_array(arr, arr_len, input + an_start, an_len)) continue; + int is_href = (an_len == 4 && html_iemem(input + an_start, "href", 4)); + int is_src = (an_len == 3 && html_iemem(input + an_start, "src", 3)); + if ((is_href || is_src) && has_value) { + if (!html_url_is_safe(input + av_start, av_len)) continue; + } + html_buf_putc(&out, ' '); + for (size_t k = 0; k < an_len; k++) { + html_buf_putc(&out, (char)html_tolower((unsigned char)input[an_start + k])); + } + if (has_value) { + html_buf_puts(&out, "=\""); + for (size_t k = 0; k < av_len; k++) { + unsigned char y = (unsigned char)input[av_start + k]; + /* Re-escape so the emitted attribute is well-formed + * double-quoted HTML. `&` passes through to preserve + * pre-encoded entities. */ + if (y == '"') html_buf_puts(&out, """); + else if (y == '<') html_buf_puts(&out, "<"); + else if (y == '>') html_buf_puts(&out, ">"); + else html_buf_putc(&out, (char)y); + } + html_buf_putc(&out, '"'); + } + } + html_buf_putc(&out, '>'); + i = tag_end; + } + /* Copy into arena-tracked buffer so the standard runtime memory model + * applies to the returned string. */ + char* result = el_strbuf(out.len); + memcpy(result, out.data, out.len); + result[out.len] = '\0'; + html_buf_free(&out); + return el_wrap_str(result); +} + /* ── JSON ────────────────────────────────────────────────────────────────── */ /* True iff the segment is non-empty and every byte is an ASCII digit. We treat @@ -2827,6 +3347,283 @@ el_val_t sleep_ms(el_val_t ms) { return 0; } +/* ── Instant + Duration: first-class temporal types ────────────────────────── + * El's substrate (Neuron) is a temporal cognition system. Memory salience + * decay, the six-tier pacemaker, TTL caches, and supersession are all + * temporal. Treating time as a raw Int (now() returning ms-since-epoch and + * arithmetic done with mixed unit literals) lets bugs through the type + * system: `(now - cached_at) < 60` cannot tell ms from sec, and `sleep(30)` + * is ambiguous. This block introduces two dedicated representations. + * + * Representation: + * Instant — int64 nanoseconds since the Unix epoch + * Duration — int64 nanoseconds (signed; negative durations are legal, + * e.g. when a deadline has passed) + * + * Both share the el_val_t (int64) slot the rest of the runtime uses, so no + * boxing / arena allocation is needed. Type discipline is enforced at the + * codegen layer: `let x: Duration = ...` registers `x` in __duration_names, + * and BinOp dispatches through typed wrappers (el_duration_add, etc.) that + * make intent explicit in the generated C. Mismatched ops (Instant+Instant, + * Duration+Int) are surfaced via #error directives at codegen time so the + * downstream cc step fails with a clear El-source-level message. + * + * Nanosecond precision matches POSIX clock_gettime / nanosleep granularity. + * 2^63 nanos covers ~292 years from epoch — comfortably past 2200, plenty + * for a memory-system runtime that never schedules outside a human lifespan. + */ + +/* now() — current Instant. Wraps clock_gettime(CLOCK_REALTIME) for nanosecond + * precision. Falls back to gettimeofday on systems where clock_gettime is + * unavailable (defensive — every supported platform has it). */ +el_val_t el_now_instant(void) { + struct timespec ts; + if (clock_gettime(CLOCK_REALTIME, &ts) == 0) { + int64_t ns = (int64_t)ts.tv_sec * 1000000000LL + (int64_t)ts.tv_nsec; + return (el_val_t)ns; + } + struct timeval tv; + gettimeofday(&tv, NULL); + int64_t ns = (int64_t)tv.tv_sec * 1000000000LL + + (int64_t)tv.tv_usec * 1000LL; + return (el_val_t)ns; +} + +el_val_t now(void) { + return el_now_instant(); +} + +/* unix_seconds(n) — Instant from a Unix-epoch second count. + * unix_millis(n) — Instant from a Unix-epoch millisecond count. */ +el_val_t unix_seconds(el_val_t n) { + int64_t s = (int64_t)n; + return (el_val_t)(s * 1000000000LL); +} + +el_val_t unix_millis(el_val_t n) { + int64_t m = (int64_t)n; + return (el_val_t)(m * 1000000LL); +} + +/* instant_from_iso8601 — parse a strict subset: + * YYYY-MM-DDTHH:MM:SS[.fff]Z + * Returns 0 (the Unix-epoch sentinel) on parse failure. Callers that need to + * distinguish epoch-zero from a parse error should use a wider sentinel + * representation; the current zero-on-failure choice matches existing El + * runtime conventions for parse builtins (str_to_int, parse_int). */ +el_val_t instant_from_iso8601(el_val_t s) { + const char* str = EL_CSTR(s); + if (!str) return (el_val_t)0; + int Y, M, D, h, m, sec, frac = 0; + int n = sscanf(str, "%d-%d-%dT%d:%d:%d.%3d", &Y, &M, &D, &h, &m, &sec, &frac); + if (n < 6) { + n = sscanf(str, "%d-%d-%dT%d:%d:%dZ", &Y, &M, &D, &h, &m, &sec); + if (n < 6) return (el_val_t)0; + } + struct tm tm; + memset(&tm, 0, sizeof(tm)); + tm.tm_year = Y - 1900; + tm.tm_mon = M - 1; + tm.tm_mday = D; + tm.tm_hour = h; + tm.tm_min = m; + tm.tm_sec = sec; + /* timegm — UTC. POSIX-Y but available on macOS and glibc. */ + time_t t = timegm(&tm); + if (t == (time_t)-1) return (el_val_t)0; + int64_t ns = (int64_t)t * 1000000000LL + (int64_t)frac * 1000000LL; + return (el_val_t)ns; +} + +/* Duration constructors. The El-side postfix literals (30.seconds, 1.hour) + * are lowered by the codegen directly into a literal int64 of nanoseconds — + * these constructors are for runtime values where the count is dynamic. */ +el_val_t el_duration_from_nanos(el_val_t ns) { + return (el_val_t)(int64_t)ns; +} + +el_val_t duration_seconds(el_val_t n) { + int64_t s = (int64_t)n; + return (el_val_t)(s * 1000000000LL); +} + +el_val_t duration_millis(el_val_t n) { + int64_t m = (int64_t)n; + return (el_val_t)(m * 1000000LL); +} + +el_val_t duration_nanos(el_val_t n) { + return (el_val_t)(int64_t)n; +} + +/* Arithmetic — typed wrappers. At the C level these are no-op casts, but + * the codegen routes Instant/Duration BinOps through them so the generated + * C says `el_instant_add_dur(start, dur)` rather than `start + dur`. The + * intent is explicit, the operand order is documented, and a future change + * to the underlying representation (saturating arithmetic, overflow guards) + * has a single chokepoint. */ +el_val_t el_instant_add_dur(el_val_t inst, el_val_t dur) { + return (el_val_t)((int64_t)inst + (int64_t)dur); +} + +el_val_t el_instant_sub_dur(el_val_t inst, el_val_t dur) { + return (el_val_t)((int64_t)inst - (int64_t)dur); +} + +el_val_t el_instant_diff(el_val_t a, el_val_t b) { + /* a - b — yields a Duration (negative if b is later than a). */ + return (el_val_t)((int64_t)a - (int64_t)b); +} + +el_val_t el_duration_add(el_val_t a, el_val_t b) { + return (el_val_t)((int64_t)a + (int64_t)b); +} + +el_val_t el_duration_sub(el_val_t a, el_val_t b) { + return (el_val_t)((int64_t)a - (int64_t)b); +} + +el_val_t el_duration_scale(el_val_t dur, el_val_t scalar) { + return (el_val_t)((int64_t)dur * (int64_t)scalar); +} + +el_val_t el_duration_div(el_val_t dur, el_val_t scalar) { + int64_t s = (int64_t)scalar; + if (s == 0) return (el_val_t)0; + return (el_val_t)((int64_t)dur / s); +} + +/* Comparisons. Return 1/0 in el_val_t convention. */ +el_val_t el_instant_lt(el_val_t a, el_val_t b) { return (el_val_t)((int64_t)a < (int64_t)b ? 1 : 0); } +el_val_t el_instant_le(el_val_t a, el_val_t b) { return (el_val_t)((int64_t)a <= (int64_t)b ? 1 : 0); } +el_val_t el_instant_gt(el_val_t a, el_val_t b) { return (el_val_t)((int64_t)a > (int64_t)b ? 1 : 0); } +el_val_t el_instant_ge(el_val_t a, el_val_t b) { return (el_val_t)((int64_t)a >= (int64_t)b ? 1 : 0); } +el_val_t el_instant_eq(el_val_t a, el_val_t b) { return (el_val_t)((int64_t)a == (int64_t)b ? 1 : 0); } +el_val_t el_instant_ne(el_val_t a, el_val_t b) { return (el_val_t)((int64_t)a != (int64_t)b ? 1 : 0); } +el_val_t el_duration_lt(el_val_t a, el_val_t b) { return (el_val_t)((int64_t)a < (int64_t)b ? 1 : 0); } +el_val_t el_duration_le(el_val_t a, el_val_t b) { return (el_val_t)((int64_t)a <= (int64_t)b ? 1 : 0); } +el_val_t el_duration_gt(el_val_t a, el_val_t b) { return (el_val_t)((int64_t)a > (int64_t)b ? 1 : 0); } +el_val_t el_duration_ge(el_val_t a, el_val_t b) { return (el_val_t)((int64_t)a >= (int64_t)b ? 1 : 0); } +el_val_t el_duration_eq(el_val_t a, el_val_t b) { return (el_val_t)((int64_t)a == (int64_t)b ? 1 : 0); } +el_val_t el_duration_ne(el_val_t a, el_val_t b) { return (el_val_t)((int64_t)a != (int64_t)b ? 1 : 0); } + +/* Conversions. */ +el_val_t instant_to_unix_seconds(el_val_t i) { + return (el_val_t)((int64_t)i / 1000000000LL); +} + +el_val_t instant_to_unix_millis(el_val_t i) { + return (el_val_t)((int64_t)i / 1000000LL); +} + +el_val_t instant_to_iso8601(el_val_t i) { + int64_t ns = (int64_t)i; + time_t s = (time_t)(ns / 1000000000LL); + int msec = (int)((ns / 1000000LL) % 1000LL); + if (msec < 0) { msec += 1000; s -= 1; } + struct tm tm; + gmtime_r(&s, &tm); + char buf[64]; + snprintf(buf, sizeof(buf), "%04d-%02d-%02dT%02d:%02d:%02d.%03dZ", + tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec, msec); + return el_wrap_str(el_strdup(buf)); +} + +el_val_t duration_to_seconds(el_val_t d) { + return (el_val_t)((int64_t)d / 1000000000LL); +} + +el_val_t duration_to_millis(el_val_t d) { + return (el_val_t)((int64_t)d / 1000000LL); +} + +el_val_t duration_to_nanos(el_val_t d) { + return (el_val_t)(int64_t)d; +} + +/* sleep(Duration) — Phase 1 replacement for ambiguous sleep(Int). The runtime + * still exposes sleep_secs/sleep_ms for legacy call sites; codegen lowers + * sleep(Duration) to el_sleep_duration(d). Negative durations clamp to 0 so a + * stale deadline doesn't block forever. */ +el_val_t el_sleep_duration(el_val_t dur) { + int64_t ns = (int64_t)dur; + if (ns < 0) ns = 0; + struct timespec ts; + ts.tv_sec = (time_t)(ns / 1000000000LL); + ts.tv_nsec = (long)(ns % 1000000000LL); + nanosleep(&ts, NULL); + return (el_val_t)0; +} + +/* unix_timestamp() — back-compat. Existing El callers expect an Int seconds + * value; this stays an Int returner so the type system isn't disturbed for + * legacy code. New code should call now() and convert when needed. */ +el_val_t unix_timestamp(void) { + return instant_to_unix_seconds(el_now_instant()); +} + +/* TTL cache helpers. Backed by the existing process-wide K/V (state_set/get) + * with a sibling __ttl_set_at_ entry recording the Instant of the last + * write. ttl_cache_get returns "" if the entry is missing or stale, so call + * sites can branch on `if v == "" { miss } else { hit }` — the same shape + * existing get-with-default code uses. No more (now - cached_at) < 60. */ +el_val_t ttl_cache_set(el_val_t key, el_val_t value) { + const char* k = EL_CSTR(key); + if (!k) return (el_val_t)0; + /* Store the value at the user's key. */ + state_set(key, value); + /* Stamp set_at — opaque schema, namespaced under __ttl: prefix so user + * keys can't collide with stamps. */ + size_t klen = strlen(k); + char* stamp_key = (char*)malloc(klen + 16); + if (!stamp_key) return (el_val_t)0; + snprintf(stamp_key, klen + 16, "__ttl_at:%s", k); + int64_t now_ns = (int64_t)el_now_instant(); + char buf[32]; + snprintf(buf, sizeof(buf), "%lld", (long long)now_ns); + state_set(EL_STR(stamp_key), EL_STR(buf)); + free(stamp_key); + return (el_val_t)1; +} + +el_val_t ttl_cache_get(el_val_t key, el_val_t max_age) { + const char* k = EL_CSTR(key); + if (!k) return el_wrap_str(el_strdup("")); + /* Look up stamp. */ + size_t klen = strlen(k); + char* stamp_key = (char*)malloc(klen + 16); + if (!stamp_key) return el_wrap_str(el_strdup("")); + snprintf(stamp_key, klen + 16, "__ttl_at:%s", k); + el_val_t stamp = state_get(EL_STR(stamp_key)); + free(stamp_key); + const char* sv = EL_CSTR(stamp); + if (!sv || !*sv) return el_wrap_str(el_strdup("")); + int64_t set_at = (int64_t)atoll(sv); + int64_t now_ns = (int64_t)el_now_instant(); + int64_t age = now_ns - set_at; + int64_t max_ns = (int64_t)max_age; + if (age < 0) return el_wrap_str(el_strdup("")); /* clock skew — treat as miss */ + if (age > max_ns) return el_wrap_str(el_strdup("")); /* expired */ + return state_get(key); +} + +el_val_t ttl_cache_age(el_val_t key) { + const char* k = EL_CSTR(key); + if (!k) return (el_val_t)INT64_MAX; + size_t klen = strlen(k); + char* stamp_key = (char*)malloc(klen + 16); + if (!stamp_key) return (el_val_t)INT64_MAX; + snprintf(stamp_key, klen + 16, "__ttl_at:%s", k); + el_val_t stamp = state_get(EL_STR(stamp_key)); + free(stamp_key); + const char* sv = EL_CSTR(stamp); + if (!sv || !*sv) return (el_val_t)INT64_MAX; + int64_t set_at = (int64_t)atoll(sv); + int64_t now_ns = (int64_t)el_now_instant(); + return (el_val_t)(now_ns - set_at); +} + /* ── UUID v4 ─────────────────────────────────────────────────────────────── */ static int _el_uuid_seeded = 0; diff --git a/el-compiler/runtime/el_runtime.h b/el-compiler/runtime/el_runtime.h index 4bead7d..9e9846c 100644 --- a/el-compiler/runtime/el_runtime.h +++ b/el-compiler/runtime/el_runtime.h @@ -199,6 +199,19 @@ el_val_t http_get_to_file(el_val_t url, el_val_t headers_map, el_val_t output_p el_val_t url_encode(el_val_t s); /* RFC 3986 unreserved set */ el_val_t url_decode(el_val_t s); /* '+' → space, %XX → byte */ +/* ── HTML allowlist sanitizer ──────────────────────────────────────────────── + * el_html_sanitize(input_html, allowlist_json) — strict allowlist HTML + * cleaner. State-machine parser; tag/attribute names compared case- + * insensitively against the allowlist; `` / `<… src>` URL schemes + * validated (http, https, mailto, fragment-only, or relative); whole- + * subtree drop for script / style / iframe / object / embed / form; HTML- + * escapes free text outside dropped subtrees. + * + * The allowlist is JSON of the form + * {"p":[],"a":["href","title"],"strong":[],...} + * where each value is the array of attribute names allowed for that tag. */ +el_val_t el_html_sanitize(el_val_t input_html, el_val_t allowlist_json); + /* ── Filesystem ──────────────────────────────────────────────────────────── */ el_val_t fs_read(el_val_t path);