runtime: rename str_format param 'template' to 'fmt'

template is a reserved keyword in C++; though not in C, it blocks
this header from ever being included from C++ code. Match printf-
family convention with fmt instead.

The deeper question of whether string-template substitution is the
right abstraction for our substrate is filed separately as backlog.
This commit is contained in:
Will Anderson
2026-05-02 12:45:48 -05:00
parent 742bd0b4f9
commit 6d897289a3
2 changed files with 810 additions and 0 deletions
+797
View File
@@ -1968,6 +1968,526 @@ el_val_t url_decode(el_val_t sv) {
return el_wrap_str(out);
}
/* ── HTML allowlist sanitizer ────────────────────────────────────────────────
* el_html_sanitize(input, allowlist_json)
*
* Strict allowlist HTML cleaner. Replaces the older denylist patterns
* (str_replace cascades that wrapped dangerous tags in HTML comments and
* renamed `on*` attributes). The denylist approach is fragile: comment-
* wrapping can be re-broken by a literal `-->` inside an attacker-supplied
* attribute value, and every new attack vector requires a code change.
*
* Design:
* - Single-pass byte-level state machine.
* - Tag and attribute names are matched case-insensitively against the
* allowlist. Unknown tags are dropped entirely (the open and close
* markers are stripped; their inner text content survives, escaped).
* - A small set of "dangerous container" tags (script, style, iframe,
* object, embed, form, plus a few rarer ones) drop themselves AND
* their full subtree text between `<script>` and `</script>` is
* CDATA-like and must not be re-emitted as escaped text either.
* - Comments (<!-- -->), doctype (<!DOCTYPE>), CDATA (<![CDATA[...]]>),
* and processing instructions (<?...?>) are dropped entirely.
* - Text content outside dropped subtrees is HTML-escaped (&, <, >, ", ').
* - Attribute values are unquoted/dequoted, then re-emitted with double
* quotes around the cleanly-escaped value.
* - For `<a href>` and any `src` attribute, the URL scheme is validated:
* only http:, https:, mailto:, fragment-only `#anchor`, or relative
* paths are allowed. Anything else (javascript:, data:, vbscript:,
* about:, file:, etc.) drops the attribute.
* - Self-closing void tags (br, hr, img, etc.) emit without a close tag.
* - Malformed input (unclosed tag at EOF, bad attribute syntax) drops
* the pending tag and continues. Pre-encoded entities (&lt;, &amp;,
* etc.) are passed through verbatim the browser will decode them
* safely on render.
*
* Allowlist format (JSON string):
* {"p":[],"a":["href","title"],"strong":[],...}
* - Key = lowercase tag name.
* - Value = JSON array of allowed attribute names (lowercase).
* - Empty array means tag allowed but no attributes survive.
*
* Output is a freshly-allocated arena-tracked el_val_t string. */
/* Internal byte buffer with realloc-doubling. Used during sanitization;
* the final result is copied into an arena-tracked el_strbuf so the caller
* sees standard runtime memory semantics. */
typedef struct {
char* data;
size_t len;
size_t cap;
} html_buf_t;
static void html_buf_init(html_buf_t* b) {
b->cap = 256;
b->data = malloc(b->cap);
if (!b->data) { fputs("el_runtime: out of memory\n", stderr); exit(1); }
b->len = 0;
}
static void html_buf_grow(html_buf_t* b, size_t need) {
if (b->len + need + 1 <= b->cap) return;
size_t nc = b->cap;
while (b->len + need + 1 > nc) nc *= 2;
char* nd = realloc(b->data, nc);
if (!nd) { fputs("el_runtime: out of memory\n", stderr); exit(1); }
b->data = nd;
b->cap = nc;
}
static void html_buf_putc(html_buf_t* b, char c) {
html_buf_grow(b, 1);
b->data[b->len++] = c;
}
static void html_buf_puts(html_buf_t* b, const char* s) {
if (!s) return;
size_t n = strlen(s);
html_buf_grow(b, n);
memcpy(b->data + b->len, s, n);
b->len += n;
}
static void html_buf_free(html_buf_t* b) {
free(b->data);
b->data = NULL;
b->len = b->cap = 0;
}
/* ASCII tolower, locale-independent. */
static int html_tolower(int c) {
return (c >= 'A' && c <= 'Z') ? c + 32 : c;
}
/* Case-insensitive ASCII compare of [a, a+n) against c-string `s`.
* Returns 1 iff lengths match and bytes are equal under tolower. */
static int html_ieq_n(const char* a, size_t n, const char* s) {
if (!a || !s) return 0;
if (strlen(s) != n) return 0;
for (size_t i = 0; i < n; i++) {
if (html_tolower((unsigned char)a[i]) != html_tolower((unsigned char)s[i])) return 0;
}
return 1;
}
/* Case-insensitive ASCII compare of two byte slices. */
static int html_iemem(const char* a, const char* b, size_t n) {
for (size_t i = 0; i < n; i++) {
if (html_tolower((unsigned char)a[i]) != html_tolower((unsigned char)b[i])) return 0;
}
return 1;
}
/* Walk a JSON allowlist object and find the value (an array) for a given
* tag key, comparing case-insensitively. On hit returns a pointer to the
* opening `[` of the array and writes the byte length of the array span
* (including the brackets) to *out_len. On miss returns NULL.
*
* The parser is intentionally tiny: it does not handle escapes inside
* keys (allowlist authors do not need them), and it relies on balanced
* brackets/quotes within the value array. */
static const char* html_allowlist_find(const char* allow, const char* tag,
size_t tag_len, size_t* out_len) {
if (!allow) return NULL;
const char* p = allow;
while (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r') p++;
if (*p != '{') return NULL;
p++;
while (*p) {
while (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r' || *p == ',') p++;
if (*p == '}' || *p == 0) return NULL;
if (*p != '"') return NULL;
p++;
const char* k = p;
while (*p && *p != '"') p++;
if (*p != '"') return NULL;
size_t klen = (size_t)(p - k);
p++;
while (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r') p++;
if (*p != ':') return NULL;
p++;
while (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r') p++;
if (*p != '[') return NULL;
const char* arr_start = p;
int depth = 0;
int in_str = 0;
while (*p) {
char c = *p;
if (in_str) {
if (c == '\\' && p[1]) { p += 2; continue; }
if (c == '"') in_str = 0;
} else {
if (c == '"') in_str = 1;
else if (c == '[') depth++;
else if (c == ']') { depth--; if (depth == 0) { p++; break; } }
}
p++;
}
size_t alen = (size_t)(p - arr_start);
int match = (klen == tag_len) && html_iemem(k, tag, klen);
if (match) {
if (out_len) *out_len = alen;
return arr_start;
}
}
return NULL;
}
/* Returns 1 iff `attr` (length attr_len) appears as a string element
* in the JSON array slice [arr, arr+arr_len). Comparison is case-
* insensitive. */
static int html_attr_in_array(const char* arr, size_t arr_len,
const char* attr, size_t attr_len) {
if (!arr || arr_len < 2) return 0;
const char* p = arr + 1;
const char* end = arr + arr_len - 1;
while (p < end) {
while (p < end && (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r' || *p == ',')) p++;
if (p >= end) return 0;
if (*p != '"') return 0;
p++;
const char* s = p;
while (p < end && *p != '"') {
if (*p == '\\' && p + 1 < end) p++;
p++;
}
if (p >= end) return 0;
size_t slen = (size_t)(p - s);
p++;
if (slen == attr_len && html_iemem(s, attr, slen)) return 1;
}
return 0;
}
/* Hard-coded set of tags whose content is ALSO dropped (entire subtree). */
static int html_is_dangerous_container(const char* tag, size_t tag_len) {
static const char* names[] = {
"script", "style", "iframe", "object", "embed", "form",
"noscript", "noembed", "template", "svg", "math", "frame",
"frameset", "applet", "audio", "video", "source", "track",
NULL
};
for (int i = 0; names[i]; i++) {
if (html_ieq_n(tag, tag_len, names[i])) return 1;
}
return 0;
}
/* HTML void elements — emit without a close tag. */
static int html_is_void(const char* tag, size_t tag_len) {
static const char* names[] = {
"area", "base", "br", "col", "embed", "hr", "img", "input",
"link", "meta", "param", "source", "track", "wbr",
NULL
};
for (int i = 0; names[i]; i++) {
if (html_ieq_n(tag, tag_len, names[i])) return 1;
}
return 0;
}
/* Append a single byte HTML-escaped into the output buffer. */
static void html_escape_byte(html_buf_t* out, unsigned char c) {
switch (c) {
case '<': html_buf_puts(out, "&lt;"); break;
case '>': html_buf_puts(out, "&gt;"); break;
case '"': html_buf_puts(out, "&quot;"); break;
case '\'': html_buf_puts(out, "&#39;"); break;
default: html_buf_putc(out, (char)c); break;
}
}
/* Validate a URL value against the allowlist of safe schemes for hrefs.
* Returns 1 iff the URL is safe to emit. Acceptable forms:
* - http:// or https:// (case-insensitive)
* - mailto:
* - fragment-only `#anchor`
* - relative path that does not contain a colon before the first
* slash/?/# (so `foo/bar`, `/foo`, `?x=1` are OK; `javascript:x` is
* not its colon precedes any path/hash/query separator).
*
* URL leading whitespace and embedded ASCII control bytes (TAB, LF, CR)
* are stripped before the scheme test, mirroring how browsers normalise
* URLs (these bytes are otherwise a known XSS bypass: `java\tscript:`). */
static int html_url_is_safe(const char* url, size_t len) {
if (!url || len == 0) return 1; /* empty href is harmless */
size_t i = 0;
while (i < len) {
unsigned char c = (unsigned char)url[i];
if (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == 0x0B || c == 0x0C) {
i++; continue;
}
break;
}
if (i >= len) return 1; /* whitespace only */
if (url[i] == '#') return 1; /* fragment only */
if (url[i] == '/' || url[i] == '?') return 1; /* relative */
/* Find the first scheme-terminating character. */
size_t scheme_end = (size_t)-1;
for (size_t j = i; j < len; j++) {
char c = url[j];
if (c == ':') { scheme_end = j; break; }
if (c == '/' || c == '?' || c == '#') break;
}
if (scheme_end == (size_t)-1) return 1; /* no colon → relative path */
/* Lowercase the scheme, stripping embedded control bytes. */
char scheme[32];
size_t sl = 0;
for (size_t j = i; j < scheme_end && sl < sizeof(scheme) - 1; j++) {
unsigned char c = (unsigned char)url[j];
if (c == '\t' || c == '\n' || c == '\r' || c == 0x0B || c == 0x0C) continue;
scheme[sl++] = (char)html_tolower(c);
}
scheme[sl] = '\0';
if (strcmp(scheme, "http") == 0) return 1;
if (strcmp(scheme, "https") == 0) return 1;
if (strcmp(scheme, "mailto") == 0) return 1;
return 0;
}
el_val_t el_html_sanitize(el_val_t input_v, el_val_t allowlist_v) {
const char* input = EL_CSTR(input_v);
const char* allow = EL_CSTR(allowlist_v);
if (!input) return el_wrap_str(el_strdup(""));
if (!allow) allow = "{}";
size_t in_len = strlen(input);
html_buf_t out;
html_buf_init(&out);
size_t i = 0;
while (i < in_len) {
unsigned char c = (unsigned char)input[i];
if (c != '<') {
/* Plain text — escape and emit. We pass `&` through verbatim
* to preserve pre-encoded entities (`&lt;`, `&amp;`, `&#x...;`)
* which the browser will decode safely. */
if (c == '&') html_buf_putc(&out, '&');
else html_escape_byte(&out, c);
i++;
continue;
}
/* `<` — try to parse a tag. */
if (i + 1 >= in_len) {
html_buf_puts(&out, "&lt;");
i++;
continue;
}
/* Comments, doctype, CDATA, processing instructions — drop entirely. */
if (input[i + 1] == '!') {
if (i + 3 < in_len && input[i + 2] == '-' && input[i + 3] == '-') {
size_t j = i + 4;
while (j + 2 < in_len && !(input[j] == '-' && input[j + 1] == '-' && input[j + 2] == '>')) j++;
if (j + 2 < in_len) i = j + 3;
else i = in_len;
continue;
}
size_t j = i + 2;
while (j < in_len && input[j] != '>') j++;
i = (j < in_len) ? j + 1 : in_len;
continue;
}
if (input[i + 1] == '?') {
size_t j = i + 2;
while (j < in_len && input[j] != '>') j++;
i = (j < in_len) ? j + 1 : in_len;
continue;
}
int is_close = 0;
size_t name_start = i + 1;
if (input[i + 1] == '/') {
is_close = 1;
name_start = i + 2;
}
if (name_start >= in_len) {
html_buf_puts(&out, "&lt;");
i++;
continue;
}
unsigned char nc = (unsigned char)input[name_start];
if (!((nc >= 'a' && nc <= 'z') || (nc >= 'A' && nc <= 'Z'))) {
/* `<` followed by non-letter — emit as escaped text. */
html_buf_puts(&out, "&lt;");
i++;
continue;
}
size_t name_end = name_start;
while (name_end < in_len) {
unsigned char x = (unsigned char)input[name_end];
if ((x >= 'a' && x <= 'z') || (x >= 'A' && x <= 'Z') ||
(x >= '0' && x <= '9') || x == '-' || x == '_' || x == ':') {
name_end++;
} else {
break;
}
}
const char* tag = input + name_start;
size_t tag_len = name_end - name_start;
/* Find the `>` that closes this tag, respecting quoted attrs. */
size_t cur = name_end;
int self_close = 0;
while (cur < in_len) {
unsigned char x = (unsigned char)input[cur];
if (x == '"' || x == '\'') {
unsigned char q = x;
cur++;
while (cur < in_len && (unsigned char)input[cur] != q) cur++;
if (cur < in_len) cur++; /* skip closing quote */
continue;
}
if (x == '/' && cur + 1 < in_len && input[cur + 1] == '>') {
self_close = 1;
break;
}
if (x == '>') break;
cur++;
}
if (cur >= in_len) {
/* Malformed: unclosed tag at EOF. Drop the rest of the input. */
i = in_len;
continue;
}
size_t tag_end = self_close ? cur + 2 : cur + 1; /* one past `>` */
/* Dangerous container — drop the whole subtree. */
if (!is_close && html_is_dangerous_container(tag, tag_len)) {
if (self_close || html_is_void(tag, tag_len)) {
i = tag_end;
continue;
}
size_t scan = tag_end;
int found_close = 0;
while (scan < in_len) {
if (input[scan] != '<') { scan++; continue; }
if (scan + 1 < in_len && input[scan + 1] == '/') {
size_t cn_start = scan + 2;
size_t cn_end = cn_start;
while (cn_end < in_len) {
unsigned char x = (unsigned char)input[cn_end];
if ((x >= 'a' && x <= 'z') || (x >= 'A' && x <= 'Z') ||
(x >= '0' && x <= '9') || x == '-' || x == '_' || x == ':') {
cn_end++;
} else break;
}
if (cn_end - cn_start == tag_len &&
html_iemem(input + cn_start, tag, tag_len)) {
size_t end_close = cn_end;
while (end_close < in_len && input[end_close] != '>') end_close++;
i = (end_close < in_len) ? end_close + 1 : in_len;
found_close = 1;
break;
}
}
scan++;
}
if (!found_close) {
/* No matching close — drop everything from here on. */
i = in_len;
}
continue;
}
/* Look up the tag in the allowlist. */
size_t arr_len = 0;
const char* arr = html_allowlist_find(allow, tag, tag_len, &arr_len);
if (!arr) {
/* Tag not allowed. Drop the open/close marker; inner text is
* processed by the outer loop and re-emitted as escaped text. */
i = tag_end;
continue;
}
if (is_close) {
if (!html_is_void(tag, tag_len)) {
html_buf_putc(&out, '<');
html_buf_putc(&out, '/');
for (size_t k = 0; k < tag_len; k++) {
html_buf_putc(&out, (char)html_tolower((unsigned char)tag[k]));
}
html_buf_putc(&out, '>');
}
i = tag_end;
continue;
}
/* Allowed open tag. Emit `<name` and walk the attributes between
* `name_end` and the closing `>`. */
html_buf_putc(&out, '<');
for (size_t k = 0; k < tag_len; k++) {
html_buf_putc(&out, (char)html_tolower((unsigned char)tag[k]));
}
size_t a = name_end;
while (a < cur) {
unsigned char x = (unsigned char)input[a];
if (x == ' ' || x == '\t' || x == '\n' || x == '\r' || x == '/') { a++; continue; }
size_t an_start = a;
while (a < cur) {
unsigned char y = (unsigned char)input[a];
if (y == '=' || y == ' ' || y == '\t' || y == '\n' || y == '\r' || y == '/' || y == '>') break;
a++;
}
size_t an_len = a - an_start;
if (an_len == 0) { a++; continue; }
size_t av_start = 0;
size_t av_len = 0;
int has_value = 0;
size_t b = a;
while (b < cur && (input[b] == ' ' || input[b] == '\t' || input[b] == '\n' || input[b] == '\r')) b++;
if (b < cur && input[b] == '=') {
has_value = 1;
b++;
while (b < cur && (input[b] == ' ' || input[b] == '\t' || input[b] == '\n' || input[b] == '\r')) b++;
if (b < cur && (input[b] == '"' || input[b] == '\'')) {
unsigned char q = (unsigned char)input[b];
b++;
av_start = b;
while (b < cur && (unsigned char)input[b] != q) b++;
av_len = b - av_start;
if (b < cur) b++;
} else {
av_start = b;
while (b < cur) {
unsigned char y = (unsigned char)input[b];
if (y == ' ' || y == '\t' || y == '\n' || y == '\r' || y == '>') break;
b++;
}
av_len = b - av_start;
}
a = b;
}
if (!html_attr_in_array(arr, arr_len, input + an_start, an_len)) continue;
int is_href = (an_len == 4 && html_iemem(input + an_start, "href", 4));
int is_src = (an_len == 3 && html_iemem(input + an_start, "src", 3));
if ((is_href || is_src) && has_value) {
if (!html_url_is_safe(input + av_start, av_len)) continue;
}
html_buf_putc(&out, ' ');
for (size_t k = 0; k < an_len; k++) {
html_buf_putc(&out, (char)html_tolower((unsigned char)input[an_start + k]));
}
if (has_value) {
html_buf_puts(&out, "=\"");
for (size_t k = 0; k < av_len; k++) {
unsigned char y = (unsigned char)input[av_start + k];
/* Re-escape so the emitted attribute is well-formed
* double-quoted HTML. `&` passes through to preserve
* pre-encoded entities. */
if (y == '"') html_buf_puts(&out, "&quot;");
else if (y == '<') html_buf_puts(&out, "&lt;");
else if (y == '>') html_buf_puts(&out, "&gt;");
else html_buf_putc(&out, (char)y);
}
html_buf_putc(&out, '"');
}
}
html_buf_putc(&out, '>');
i = tag_end;
}
/* Copy into arena-tracked buffer so the standard runtime memory model
* applies to the returned string. */
char* result = el_strbuf(out.len);
memcpy(result, out.data, out.len);
result[out.len] = '\0';
html_buf_free(&out);
return el_wrap_str(result);
}
/* ── JSON ────────────────────────────────────────────────────────────────── */
/* True iff the segment is non-empty and every byte is an ASCII digit. We treat
@@ -2827,6 +3347,283 @@ el_val_t sleep_ms(el_val_t ms) {
return 0;
}
/* ── Instant + Duration: first-class temporal types ──────────────────────────
* El's substrate (Neuron) is a temporal cognition system. Memory salience
* decay, the six-tier pacemaker, TTL caches, and supersession are all
* temporal. Treating time as a raw Int (now() returning ms-since-epoch and
* arithmetic done with mixed unit literals) lets bugs through the type
* system: `(now - cached_at) < 60` cannot tell ms from sec, and `sleep(30)`
* is ambiguous. This block introduces two dedicated representations.
*
* Representation:
* Instant int64 nanoseconds since the Unix epoch
* Duration int64 nanoseconds (signed; negative durations are legal,
* e.g. when a deadline has passed)
*
* Both share the el_val_t (int64) slot the rest of the runtime uses, so no
* boxing / arena allocation is needed. Type discipline is enforced at the
* codegen layer: `let x: Duration = ...` registers `x` in __duration_names,
* and BinOp dispatches through typed wrappers (el_duration_add, etc.) that
* make intent explicit in the generated C. Mismatched ops (Instant+Instant,
* Duration+Int) are surfaced via #error directives at codegen time so the
* downstream cc step fails with a clear El-source-level message.
*
* Nanosecond precision matches POSIX clock_gettime / nanosleep granularity.
* 2^63 nanos covers ~292 years from epoch comfortably past 2200, plenty
* for a memory-system runtime that never schedules outside a human lifespan.
*/
/* now() — current Instant. Wraps clock_gettime(CLOCK_REALTIME) for nanosecond
* precision. Falls back to gettimeofday on systems where clock_gettime is
* unavailable (defensive every supported platform has it). */
el_val_t el_now_instant(void) {
struct timespec ts;
if (clock_gettime(CLOCK_REALTIME, &ts) == 0) {
int64_t ns = (int64_t)ts.tv_sec * 1000000000LL + (int64_t)ts.tv_nsec;
return (el_val_t)ns;
}
struct timeval tv;
gettimeofday(&tv, NULL);
int64_t ns = (int64_t)tv.tv_sec * 1000000000LL
+ (int64_t)tv.tv_usec * 1000LL;
return (el_val_t)ns;
}
el_val_t now(void) {
return el_now_instant();
}
/* unix_seconds(n) — Instant from a Unix-epoch second count.
* unix_millis(n) Instant from a Unix-epoch millisecond count. */
el_val_t unix_seconds(el_val_t n) {
int64_t s = (int64_t)n;
return (el_val_t)(s * 1000000000LL);
}
el_val_t unix_millis(el_val_t n) {
int64_t m = (int64_t)n;
return (el_val_t)(m * 1000000LL);
}
/* instant_from_iso8601 — parse a strict subset:
* YYYY-MM-DDTHH:MM:SS[.fff]Z
* Returns 0 (the Unix-epoch sentinel) on parse failure. Callers that need to
* distinguish epoch-zero from a parse error should use a wider sentinel
* representation; the current zero-on-failure choice matches existing El
* runtime conventions for parse builtins (str_to_int, parse_int). */
el_val_t instant_from_iso8601(el_val_t s) {
const char* str = EL_CSTR(s);
if (!str) return (el_val_t)0;
int Y, M, D, h, m, sec, frac = 0;
int n = sscanf(str, "%d-%d-%dT%d:%d:%d.%3d", &Y, &M, &D, &h, &m, &sec, &frac);
if (n < 6) {
n = sscanf(str, "%d-%d-%dT%d:%d:%dZ", &Y, &M, &D, &h, &m, &sec);
if (n < 6) return (el_val_t)0;
}
struct tm tm;
memset(&tm, 0, sizeof(tm));
tm.tm_year = Y - 1900;
tm.tm_mon = M - 1;
tm.tm_mday = D;
tm.tm_hour = h;
tm.tm_min = m;
tm.tm_sec = sec;
/* timegm — UTC. POSIX-Y but available on macOS and glibc. */
time_t t = timegm(&tm);
if (t == (time_t)-1) return (el_val_t)0;
int64_t ns = (int64_t)t * 1000000000LL + (int64_t)frac * 1000000LL;
return (el_val_t)ns;
}
/* Duration constructors. The El-side postfix literals (30.seconds, 1.hour)
* are lowered by the codegen directly into a literal int64 of nanoseconds
* these constructors are for runtime values where the count is dynamic. */
el_val_t el_duration_from_nanos(el_val_t ns) {
return (el_val_t)(int64_t)ns;
}
el_val_t duration_seconds(el_val_t n) {
int64_t s = (int64_t)n;
return (el_val_t)(s * 1000000000LL);
}
el_val_t duration_millis(el_val_t n) {
int64_t m = (int64_t)n;
return (el_val_t)(m * 1000000LL);
}
el_val_t duration_nanos(el_val_t n) {
return (el_val_t)(int64_t)n;
}
/* Arithmetic — typed wrappers. At the C level these are no-op casts, but
* the codegen routes Instant/Duration BinOps through them so the generated
* C says `el_instant_add_dur(start, dur)` rather than `start + dur`. The
* intent is explicit, the operand order is documented, and a future change
* to the underlying representation (saturating arithmetic, overflow guards)
* has a single chokepoint. */
el_val_t el_instant_add_dur(el_val_t inst, el_val_t dur) {
return (el_val_t)((int64_t)inst + (int64_t)dur);
}
el_val_t el_instant_sub_dur(el_val_t inst, el_val_t dur) {
return (el_val_t)((int64_t)inst - (int64_t)dur);
}
el_val_t el_instant_diff(el_val_t a, el_val_t b) {
/* a - b — yields a Duration (negative if b is later than a). */
return (el_val_t)((int64_t)a - (int64_t)b);
}
el_val_t el_duration_add(el_val_t a, el_val_t b) {
return (el_val_t)((int64_t)a + (int64_t)b);
}
el_val_t el_duration_sub(el_val_t a, el_val_t b) {
return (el_val_t)((int64_t)a - (int64_t)b);
}
el_val_t el_duration_scale(el_val_t dur, el_val_t scalar) {
return (el_val_t)((int64_t)dur * (int64_t)scalar);
}
el_val_t el_duration_div(el_val_t dur, el_val_t scalar) {
int64_t s = (int64_t)scalar;
if (s == 0) return (el_val_t)0;
return (el_val_t)((int64_t)dur / s);
}
/* Comparisons. Return 1/0 in el_val_t convention. */
el_val_t el_instant_lt(el_val_t a, el_val_t b) { return (el_val_t)((int64_t)a < (int64_t)b ? 1 : 0); }
el_val_t el_instant_le(el_val_t a, el_val_t b) { return (el_val_t)((int64_t)a <= (int64_t)b ? 1 : 0); }
el_val_t el_instant_gt(el_val_t a, el_val_t b) { return (el_val_t)((int64_t)a > (int64_t)b ? 1 : 0); }
el_val_t el_instant_ge(el_val_t a, el_val_t b) { return (el_val_t)((int64_t)a >= (int64_t)b ? 1 : 0); }
el_val_t el_instant_eq(el_val_t a, el_val_t b) { return (el_val_t)((int64_t)a == (int64_t)b ? 1 : 0); }
el_val_t el_instant_ne(el_val_t a, el_val_t b) { return (el_val_t)((int64_t)a != (int64_t)b ? 1 : 0); }
el_val_t el_duration_lt(el_val_t a, el_val_t b) { return (el_val_t)((int64_t)a < (int64_t)b ? 1 : 0); }
el_val_t el_duration_le(el_val_t a, el_val_t b) { return (el_val_t)((int64_t)a <= (int64_t)b ? 1 : 0); }
el_val_t el_duration_gt(el_val_t a, el_val_t b) { return (el_val_t)((int64_t)a > (int64_t)b ? 1 : 0); }
el_val_t el_duration_ge(el_val_t a, el_val_t b) { return (el_val_t)((int64_t)a >= (int64_t)b ? 1 : 0); }
el_val_t el_duration_eq(el_val_t a, el_val_t b) { return (el_val_t)((int64_t)a == (int64_t)b ? 1 : 0); }
el_val_t el_duration_ne(el_val_t a, el_val_t b) { return (el_val_t)((int64_t)a != (int64_t)b ? 1 : 0); }
/* Conversions. */
el_val_t instant_to_unix_seconds(el_val_t i) {
return (el_val_t)((int64_t)i / 1000000000LL);
}
el_val_t instant_to_unix_millis(el_val_t i) {
return (el_val_t)((int64_t)i / 1000000LL);
}
el_val_t instant_to_iso8601(el_val_t i) {
int64_t ns = (int64_t)i;
time_t s = (time_t)(ns / 1000000000LL);
int msec = (int)((ns / 1000000LL) % 1000LL);
if (msec < 0) { msec += 1000; s -= 1; }
struct tm tm;
gmtime_r(&s, &tm);
char buf[64];
snprintf(buf, sizeof(buf), "%04d-%02d-%02dT%02d:%02d:%02d.%03dZ",
tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
tm.tm_hour, tm.tm_min, tm.tm_sec, msec);
return el_wrap_str(el_strdup(buf));
}
el_val_t duration_to_seconds(el_val_t d) {
return (el_val_t)((int64_t)d / 1000000000LL);
}
el_val_t duration_to_millis(el_val_t d) {
return (el_val_t)((int64_t)d / 1000000LL);
}
el_val_t duration_to_nanos(el_val_t d) {
return (el_val_t)(int64_t)d;
}
/* sleep(Duration) — Phase 1 replacement for ambiguous sleep(Int). The runtime
* still exposes sleep_secs/sleep_ms for legacy call sites; codegen lowers
* sleep(Duration) to el_sleep_duration(d). Negative durations clamp to 0 so a
* stale deadline doesn't block forever. */
el_val_t el_sleep_duration(el_val_t dur) {
int64_t ns = (int64_t)dur;
if (ns < 0) ns = 0;
struct timespec ts;
ts.tv_sec = (time_t)(ns / 1000000000LL);
ts.tv_nsec = (long)(ns % 1000000000LL);
nanosleep(&ts, NULL);
return (el_val_t)0;
}
/* unix_timestamp() — back-compat. Existing El callers expect an Int seconds
* value; this stays an Int returner so the type system isn't disturbed for
* legacy code. New code should call now() and convert when needed. */
el_val_t unix_timestamp(void) {
return instant_to_unix_seconds(el_now_instant());
}
/* TTL cache helpers. Backed by the existing process-wide K/V (state_set/get)
* with a sibling __ttl_set_at_<key> entry recording the Instant of the last
* write. ttl_cache_get returns "" if the entry is missing or stale, so call
* sites can branch on `if v == "" { miss } else { hit }` the same shape
* existing get-with-default code uses. No more (now - cached_at) < 60. */
el_val_t ttl_cache_set(el_val_t key, el_val_t value) {
const char* k = EL_CSTR(key);
if (!k) return (el_val_t)0;
/* Store the value at the user's key. */
state_set(key, value);
/* Stamp set_at — opaque schema, namespaced under __ttl: prefix so user
* keys can't collide with stamps. */
size_t klen = strlen(k);
char* stamp_key = (char*)malloc(klen + 16);
if (!stamp_key) return (el_val_t)0;
snprintf(stamp_key, klen + 16, "__ttl_at:%s", k);
int64_t now_ns = (int64_t)el_now_instant();
char buf[32];
snprintf(buf, sizeof(buf), "%lld", (long long)now_ns);
state_set(EL_STR(stamp_key), EL_STR(buf));
free(stamp_key);
return (el_val_t)1;
}
el_val_t ttl_cache_get(el_val_t key, el_val_t max_age) {
const char* k = EL_CSTR(key);
if (!k) return el_wrap_str(el_strdup(""));
/* Look up stamp. */
size_t klen = strlen(k);
char* stamp_key = (char*)malloc(klen + 16);
if (!stamp_key) return el_wrap_str(el_strdup(""));
snprintf(stamp_key, klen + 16, "__ttl_at:%s", k);
el_val_t stamp = state_get(EL_STR(stamp_key));
free(stamp_key);
const char* sv = EL_CSTR(stamp);
if (!sv || !*sv) return el_wrap_str(el_strdup(""));
int64_t set_at = (int64_t)atoll(sv);
int64_t now_ns = (int64_t)el_now_instant();
int64_t age = now_ns - set_at;
int64_t max_ns = (int64_t)max_age;
if (age < 0) return el_wrap_str(el_strdup("")); /* clock skew — treat as miss */
if (age > max_ns) return el_wrap_str(el_strdup("")); /* expired */
return state_get(key);
}
el_val_t ttl_cache_age(el_val_t key) {
const char* k = EL_CSTR(key);
if (!k) return (el_val_t)INT64_MAX;
size_t klen = strlen(k);
char* stamp_key = (char*)malloc(klen + 16);
if (!stamp_key) return (el_val_t)INT64_MAX;
snprintf(stamp_key, klen + 16, "__ttl_at:%s", k);
el_val_t stamp = state_get(EL_STR(stamp_key));
free(stamp_key);
const char* sv = EL_CSTR(stamp);
if (!sv || !*sv) return (el_val_t)INT64_MAX;
int64_t set_at = (int64_t)atoll(sv);
int64_t now_ns = (int64_t)el_now_instant();
return (el_val_t)(now_ns - set_at);
}
/* ── UUID v4 ─────────────────────────────────────────────────────────────── */
static int _el_uuid_seeded = 0;
+13
View File
@@ -199,6 +199,19 @@ el_val_t http_get_to_file(el_val_t url, el_val_t headers_map, el_val_t output_p
el_val_t url_encode(el_val_t s); /* RFC 3986 unreserved set */
el_val_t url_decode(el_val_t s); /* '+' → space, %XX → byte */
/* ── HTML allowlist sanitizer ────────────────────────────────────────────────
* el_html_sanitize(input_html, allowlist_json) — strict allowlist HTML
* cleaner. State-machine parser; tag/attribute names compared case-
* insensitively against the allowlist; `<a href>` / `<… src>` URL schemes
* validated (http, https, mailto, fragment-only, or relative); whole-
* subtree drop for script / style / iframe / object / embed / form; HTML-
* escapes free text outside dropped subtrees.
*
* The allowlist is JSON of the form
* {"p":[],"a":["href","title"],"strong":[],...}
* where each value is the array of attribute names allowed for that tag. */
el_val_t el_html_sanitize(el_val_t input_html, el_val_t allowlist_json);
/* ── Filesystem ──────────────────────────────────────────────────────────── */
el_val_t fs_read(el_val_t path);