diff --git a/bootstrap.py b/bootstrap.py deleted file mode 100644 index 64b810c..0000000 --- a/bootstrap.py +++ /dev/null @@ -1,1489 +0,0 @@ -#!/usr/bin/env python3 -""" -bootstrap.py — Python bootstrap compiler for the El language. - -Reads an El source file and emits C source code to stdout, following exactly -the same codegen patterns as elc (the self-hosting El compiler). - -Usage: - python3 bootstrap.py > output.c - -This is a clean-room implementation of the El lexer, parser, and C codegen -in Python. It handles the full El grammar as specified in spec/language.md -and as observed from the elc-combined.el source and the soul.c output. -""" - -import sys -import re - -# ─── Lexer ──────────────────────────────────────────────────────────────────── - -KEYWORDS = { - 'let', 'fn', 'type', 'enum', 'match', 'return', - 'if', 'else', 'for', 'in', 'while', 'import', 'from', 'as', - 'with', 'sealed', 'activate', 'where', 'test', 'seed', 'assert', - 'protocol', 'impl', 'retry', 'times', 'fallback', 'reason', - 'parallel', 'trace', 'requires', 'deploy', 'to', 'via', 'target', - 'true', 'false', 'cgi', 'service', 'manager', 'engine', 'accessor', - 'vessel', -} - -KEYWORD_TOKEN = { - 'let': 'Let', 'fn': 'Fn', 'type': 'Type', 'enum': 'Enum', - 'match': 'Match', 'return': 'Return', 'if': 'If', 'else': 'Else', - 'for': 'For', 'in': 'In', 'while': 'While', 'import': 'Import', - 'from': 'From', 'as': 'As', 'with': 'With', 'sealed': 'Sealed', - 'activate': 'Activate', 'where': 'Where', 'test': 'Test', - 'seed': 'Seed', 'assert': 'Assert', 'protocol': 'Protocol', - 'impl': 'Impl', 'retry': 'Retry', 'times': 'Times', - 'fallback': 'Fallback', 'reason': 'Reason', 'parallel': 'Parallel', - 'trace': 'Trace', 'requires': 'Requires', 'deploy': 'Deploy', - 'to': 'To', 'via': 'Via', 'target': 'Target', - 'true': 'Bool', 'false': 'Bool', - 'cgi': 'Cgi', 'service': 'Service', - 'manager': 'Manager', 'engine': 'Engine', 'accessor': 'Accessor', - 'vessel': 'Vessel', -} - - -class Token: - def __init__(self, kind, value, line=0): - self.kind = kind - self.value = value - self.line = line - - def __repr__(self): - return f'Token({self.kind!r}, {self.value!r})' - - -def lex(source): - tokens = [] - i = 0 - n = len(source) - line = 1 - - while i < n: - c = source[i] - - # Whitespace - if c in ' \t\r\n': - if c == '\n': - line += 1 - i += 1 - continue - - # Single-line comment - if c == '/' and i + 1 < n and source[i + 1] == '/': - while i < n and source[i] != '\n': - i += 1 - continue - - # String literal - if c == '"': - i += 1 - s = [] - while i < n and source[i] != '"': - if source[i] == '\\' and i + 1 < n: - e = source[i + 1] - if e == 'n': - s.append('\n') - elif e == 't': - s.append('\t') - elif e == 'r': - s.append('\r') - elif e == '"': - s.append('"') - elif e == '\\': - s.append('\\') - else: - s.append(e) - i += 2 - else: - if source[i] == '\n': - line += 1 - s.append(source[i]) - i += 1 - i += 1 # closing quote - tokens.append(Token('Str', ''.join(s), line)) - continue - - # Number - if c.isdigit(): - j = i - while i < n and source[i].isdigit(): - i += 1 - if i < n and source[i] == '.' and i + 1 < n and source[i + 1].isdigit(): - i += 1 - while i < n and source[i].isdigit(): - i += 1 - tokens.append(Token('Float', source[j:i], line)) - else: - tokens.append(Token('Int', source[j:i], line)) - continue - - # Identifier or keyword - if c.isalpha() or c == '_': - j = i - while i < n and (source[i].isalnum() or source[i] == '_'): - i += 1 - word = source[j:i] - kind = KEYWORD_TOKEN.get(word) - if kind: - tokens.append(Token(kind, word, line)) - else: - tokens.append(Token('Ident', word, line)) - continue - - # Two-char operators first - if i + 1 < n: - two = source[i:i+2] - if two == '==': - tokens.append(Token('EqEq', '==', line)); i += 2; continue - if two == '!=': - tokens.append(Token('NotEq', '!=', line)); i += 2; continue - if two == '<=': - tokens.append(Token('LtEq', '<=', line)); i += 2; continue - if two == '>=': - tokens.append(Token('GtEq', '>=', line)); i += 2; continue - if two == '&&': - tokens.append(Token('And', '&&', line)); i += 2; continue - if two == '||': - tokens.append(Token('Or', '||', line)); i += 2; continue - if two == '|>': - tokens.append(Token('PipeOp', '|>', line)); i += 2; continue - if two == '->': - tokens.append(Token('Arrow', '->', line)); i += 2; continue - if two == '=>': - tokens.append(Token('FatArrow', '=>', line)); i += 2; continue - if two == '::': - tokens.append(Token('ColonColon', '::', line)); i += 2; continue - # Single & — consumed and discarded - if two == '&&': - tokens.append(Token('And', '&&', line)); i += 2; continue - - # Single-char operators - if c == '&': - i += 1; continue # silently consumed - if c == '|': - tokens.append(Token('Pipe', '|', line)); i += 1; continue - if c == '=': - tokens.append(Token('Eq', '=', line)); i += 1; continue - if c == '<': - tokens.append(Token('Lt', '<', line)); i += 1; continue - if c == '>': - tokens.append(Token('Gt', '>', line)); i += 1; continue - if c == '+': - tokens.append(Token('Plus', '+', line)); i += 1; continue - if c == '-': - tokens.append(Token('Minus', '-', line)); i += 1; continue - if c == '*': - tokens.append(Token('Star', '*', line)); i += 1; continue - if c == '/': - tokens.append(Token('Slash', '/', line)); i += 1; continue - if c == '%': - tokens.append(Token('Percent', '%', line)); i += 1; continue - if c == '!': - tokens.append(Token('Not', '!', line)); i += 1; continue - if c == '(': - tokens.append(Token('LParen', '(', line)); i += 1; continue - if c == ')': - tokens.append(Token('RParen', ')', line)); i += 1; continue - if c == '{': - tokens.append(Token('LBrace', '{', line)); i += 1; continue - if c == '}': - tokens.append(Token('RBrace', '}', line)); i += 1; continue - if c == '[': - tokens.append(Token('LBracket', '[', line)); i += 1; continue - if c == ']': - tokens.append(Token('RBracket', ']', line)); i += 1; continue - if c == ',': - tokens.append(Token('Comma', ',', line)); i += 1; continue - if c == '.': - tokens.append(Token('Dot', '.', line)); i += 1; continue - if c == ';': - tokens.append(Token('Semicolon', ';', line)); i += 1; continue - if c == ':': - tokens.append(Token('Colon', ':', line)); i += 1; continue - if c == '@': - tokens.append(Token('At', '@', line)); i += 1; continue - if c == '?': - tokens.append(Token('QuestionMark', '?', line)); i += 1; continue - - # Unknown — skip - i += 1 - - tokens.append(Token('Eof', '', line)) - return tokens - - -# ─── Parser ─────────────────────────────────────────────────────────────────── - -class ParseError(Exception): - pass - - -class Parser: - def __init__(self, tokens): - self.tokens = tokens - self.pos = 0 - - def peek(self): - return self.tokens[self.pos] - - def peek_kind(self): - return self.tokens[self.pos].kind - - def advance(self): - t = self.tokens[self.pos] - self.pos += 1 - return t - - def expect(self, kind): - t = self.advance() - if t.kind != kind: - raise ParseError( - f'Line {t.line}: expected {kind!r}, got {t.kind!r} ({t.value!r})' - ) - return t - - def eat(self, kind): - if self.peek_kind() == kind: - return self.advance() - return None - - # ── Type expression (parse and discard) ─────────────────────────────────── - - def parse_type(self): - """Parse a type expression, returning it (unused in codegen).""" - k = self.peek_kind() - name = '' - if k == 'LBracket': - self.advance() - self.parse_type() - self.expect('RBracket') - name = 'Array' - elif k == 'Ident' or k in KEYWORD_TOKEN.values(): - t = self.advance() - name = t.value - # Generic: Map - if self.peek_kind() == 'Lt': - self.advance() - self.parse_type() - while self.peek_kind() == 'Comma': - self.advance() - self.parse_type() - self.expect('Gt') - else: - t = self.advance() - name = t.value - # Optional suffix - while self.peek_kind() == 'QuestionMark': - self.advance() - name += '?' - return name - - # ── Statement ───────────────────────────────────────────────────────────── - - def parse_program(self): - stmts = [] - while self.peek_kind() != 'Eof': - s = self.parse_stmt() - if s is not None: - stmts.append(s) - return stmts - - def parse_stmt(self): - k = self.peek_kind() - - if k == 'Let': - return self.parse_let() - if k == 'Fn': - return self.parse_fn(decorator=None) - if k == 'Return': - return self.parse_return() - if k == 'While': - return self.parse_while() - if k == 'For': - return self.parse_for() - if k == 'If': - expr = self.parse_if() - return {'stmt': 'Expr', 'value': expr} - if k == 'Type': - return self.parse_type_def() - if k == 'Enum': - return self.parse_enum_def() - if k == 'Import': - return self.parse_import() - if k == 'From': - return self.parse_from_import() - if k == 'At': - return self.parse_decorator_stmt() - if k == 'Cgi': - return self.parse_cgi_block() - if k == 'Service': - return self.parse_service_block() - if k == 'Vessel': - return self.parse_vessel_block() - - # Bare assignment: Ident = expr (without `let`) — rebinding syntax - # Emit as plain C assignment (name already declared in scope). - if k == 'Ident' and self.pos + 1 < len(self.tokens) and self.tokens[self.pos + 1].kind == 'Eq': - name = self.advance().value # consume Ident - self.advance() # consume = - val = self.parse_expr() - self.eat('Semicolon') - return {'stmt': 'Assign', 'name': name, 'value': val} - - # Expression statement - expr = self.parse_expr() - self.eat('Semicolon') - return {'stmt': 'Expr', 'value': expr} - - def parse_let(self): - self.expect('Let') - name = self.expect('Ident').value - type_ann = None - if self.eat('Colon'): - type_ann = self.parse_type() - self.expect('Eq') - val = self.parse_expr() - self.eat('Semicolon') - return {'stmt': 'Let', 'name': name, 'type': type_ann or '', 'value': val} - - def parse_return(self): - self.expect('Return') - # Bare return: next token is } or Eof - k = self.peek_kind() - if k in ('RBrace', 'Eof'): - return {'stmt': 'Return', 'value': {'expr': 'Nil'}} - val = self.parse_expr() - self.eat('Semicolon') - return {'stmt': 'Return', 'value': val} - - def parse_fn(self, decorator=None): - self.expect('Fn') - name = self.expect('Ident').value - self.expect('LParen') - params = self.parse_param_list() - self.expect('RParen') - ret_type = 'Any' - if self.eat('Arrow'): - ret_type = self.parse_type() - self.expect('LBrace') - body = self.parse_block() - return { - 'stmt': 'FnDef', - 'name': name, - 'params': params, - 'ret_type': ret_type, - 'body': body, - 'decorator': decorator or '', - } - - def parse_param_list(self): - params = [] - if self.peek_kind() == 'RParen': - return params - while True: - pname = self.expect('Ident').value - ptype = 'Any' - if self.eat('Colon'): - ptype = self.parse_type() - params.append({'name': pname, 'type': ptype}) - if not self.eat('Comma'): - break - return params - - def parse_while(self): - self.expect('While') - cond = self.parse_expr() - self.expect('LBrace') - body = self.parse_block() - return {'stmt': 'While', 'cond': cond, 'body': body} - - def parse_for(self): - self.expect('For') - item = self.expect('Ident').value - self.expect('In') - lst = self.parse_expr() - self.expect('LBrace') - body = self.parse_block() - return {'stmt': 'For', 'item': item, 'list': lst, 'body': body} - - def parse_if(self): - """Parse if/else if/else — returns an If expression node.""" - self.expect('If') - cond = self.parse_expr() - self.expect('LBrace') - then_stmts = self.parse_block() - else_stmts = [] - has_else = False - if self.peek_kind() == 'Else': - self.advance() - if self.peek_kind() == 'If': - # else if → nest as else body containing the inner if - inner = self.parse_if() - else_stmts = [{'stmt': 'Expr', 'value': inner}] - has_else = True - else: - self.expect('LBrace') - else_stmts = self.parse_block() - has_else = True - return { - 'expr': 'If', - 'cond': cond, - 'then': then_stmts, - 'else': else_stmts, - 'has_else': has_else, - } - - def parse_block(self): - stmts = [] - while self.peek_kind() not in ('RBrace', 'Eof'): - s = self.parse_stmt() - if s is not None: - stmts.append(s) - self.expect('RBrace') - return stmts - - def parse_type_def(self): - self.expect('Type') - name = self.expect('Ident').value - self.expect('LBrace') - fields = [] - while self.peek_kind() != 'RBrace': - if self.peek_kind() == 'Eof': - break - fn = self.expect('Ident').value - if self.eat('Colon'): - self.parse_type() - self.eat('Comma') - fields.append(fn) - self.expect('RBrace') - return {'stmt': 'TypeDef', 'name': name, 'fields': fields} - - def parse_enum_def(self): - self.expect('Enum') - name = self.expect('Ident').value - self.expect('LBrace') - variants = [] - while self.peek_kind() != 'RBrace': - if self.peek_kind() == 'Eof': - break - vn = self.expect('Ident').value - if self.eat('LParen'): - self.parse_type() - self.expect('RParen') - self.eat('Comma') - variants.append(vn) - self.expect('RBrace') - return {'stmt': 'EnumDef', 'name': name, 'variants': variants} - - def parse_import(self): - self.expect('Import') - path = self.expect('Str').value - return {'stmt': 'Import', 'path': path} - - def parse_from_import(self): - self.expect('From') - module = self.expect('Ident').value - self.expect('Import') - self.expect('LBrace') - names = [] - while self.peek_kind() != 'RBrace': - if self.peek_kind() == 'Eof': - break - names.append(self.expect('Ident').value) - self.eat('Comma') - self.expect('RBrace') - return {'stmt': 'Import', 'module': module, 'names': names} - - def parse_decorator_stmt(self): - self.expect('At') - dec_name = self.peek().value - self.advance() # consume decorator name (Ident or keyword like Manager) - # Next must be fn - if self.peek_kind() == 'Fn': - return self.parse_fn(decorator=dec_name) - # Otherwise skip decorator and parse next statement - return self.parse_stmt() - - def parse_cgi_block(self): - """Parse: cgi "name" { field: "value" ... }""" - self.expect('Cgi') - name = '' - if self.peek_kind() == 'Str': - name = self.advance().value - self.expect('LBrace') - fields = {} - while self.peek_kind() != 'RBrace': - if self.peek_kind() == 'Eof': - break - fname = self.expect('Ident').value - self.expect('Colon') - fval = self.expect('Str').value - fields[fname] = fval - self.eat('Comma') - self.expect('RBrace') - return { - 'stmt': 'CgiBlock', - 'name': name, - 'dharma_id': fields.get('dharma_id', ''), - 'principal': fields.get('principal', ''), - 'network': fields.get('network', ''), - 'engram': fields.get('engram', ''), - 'has_dharma_id': 'dharma_id' in fields, - 'has_principal': 'principal' in fields, - 'has_network': 'network' in fields, - 'has_engram': 'engram' in fields, - } - - def parse_service_block(self): - self.expect('Service') - name = '' - if self.peek_kind() == 'Str': - name = self.advance().value - self.expect('LBrace') - while self.peek_kind() not in ('RBrace', 'Eof'): - self.advance() - self.eat('RBrace') - return {'stmt': 'ServiceBlock', 'name': name} - - def parse_vessel_block(self): - self.expect('Vessel') - if self.peek_kind() == 'Str': - self.advance() - if self.peek_kind() == 'LBrace': - self.expect('LBrace') - depth = 1 - while depth > 0 and self.peek_kind() != 'Eof': - k = self.peek_kind() - if k == 'LBrace': - depth += 1 - elif k == 'RBrace': - depth -= 1 - self.advance() - return {'stmt': 'Import', 'path': ''} # no-op - - # ── Expressions ─────────────────────────────────────────────────────────── - - PREC = { - 'Or': 1, 'And': 2, - 'EqEq': 3, 'NotEq': 3, - 'Lt': 4, 'Gt': 4, 'LtEq': 4, 'GtEq': 4, - 'Plus': 5, 'Minus': 5, - 'Star': 6, 'Slash': 6, 'Percent': 6, - } - - def parse_expr(self): - return self.parse_binop(0) - - def parse_binop(self, min_prec): - left = self.parse_unary() - while True: - k = self.peek_kind() - prec = self.PREC.get(k, 0) - if prec <= min_prec: - break - op_tok = self.advance() - right = self.parse_binop(prec) # left-assoc: use prec, not prec+1 - left = {'expr': 'BinOp', 'op': k, 'left': left, 'right': right} - return left - - def parse_unary(self): - k = self.peek_kind() - if k == 'Not': - self.advance() - inner = self.parse_postfix() - return {'expr': 'Not', 'inner': inner} - if k == 'Minus': - self.advance() - inner = self.parse_postfix() - return {'expr': 'Neg', 'inner': inner} - return self.parse_postfix() - - def parse_postfix(self): - node = self.parse_primary() - while True: - k = self.peek_kind() - if k == 'Dot': - self.advance() - field = self.peek().value - self.advance() - if self.peek_kind() == 'LParen': - self.advance() - args = self.parse_arg_list() - self.expect('RParen') - # method call: obj.method(args) → func=Field node - func_node = {'expr': 'Field', 'object': node, 'field': field} - node = {'expr': 'Call', 'func': func_node, 'args': args} - else: - node = {'expr': 'Field', 'object': node, 'field': field} - elif k == 'LParen': - self.advance() - args = self.parse_arg_list() - self.expect('RParen') - node = {'expr': 'Call', 'func': node, 'args': args} - elif k == 'LBracket': - self.advance() - idx = self.parse_expr() - self.expect('RBracket') - node = {'expr': 'Index', 'object': node, 'index': idx} - elif k == 'QuestionMark': - self.advance() - node = {'expr': 'Try', 'inner': node} - else: - break - return node - - def parse_primary(self): - k = self.peek_kind() - - if k == 'Int': - v = self.advance().value - return {'expr': 'Int', 'value': v} - - if k == 'Float': - v = self.advance().value - return {'expr': 'Float', 'value': v} - - if k == 'Str': - v = self.advance().value - return {'expr': 'Str', 'value': v} - - if k == 'Bool': - v = self.advance().value - return {'expr': 'Bool', 'value': v} - - if k == 'Ident': - name = self.advance().value - return {'expr': 'Ident', 'name': name} - - if k == 'LParen': - self.advance() - e = self.parse_expr() - self.expect('RParen') - return e - - if k == 'LBracket': - self.advance() - elems = [] - if self.peek_kind() != 'RBracket': - elems = self.parse_arg_list() - self.expect('RBracket') - return {'expr': 'Array', 'elems': elems} - - if k == 'LBrace': - return self.parse_map_literal() - - if k == 'If': - return self.parse_if() - - if k == 'For': - # for as expression - self.expect('For') - item = self.expect('Ident').value - self.expect('In') - lst = self.parse_expr() - self.expect('LBrace') - body = self.parse_block() - return {'expr': 'For', 'item': item, 'list': lst, 'body': body} - - if k == 'Match': - return self.parse_match() - - # keyword used as identifier (e.g. `to`, `via`, etc.) - t = self.advance() - return {'expr': 'Ident', 'name': t.value} - - def parse_map_literal(self): - self.expect('LBrace') - pairs = [] - while self.peek_kind() != 'RBrace': - if self.peek_kind() == 'Eof': - break - key = self.expect('Str').value - self.expect('Colon') - val = self.parse_expr() - pairs.append({'key': key, 'value': val}) - self.eat('Comma') - self.expect('RBrace') - return {'expr': 'Map', 'pairs': pairs} - - def parse_arg_list(self): - args = [] - if self.peek_kind() in ('RParen', 'RBracket'): - return args - args.append(self.parse_expr()) - while self.eat('Comma'): - if self.peek_kind() in ('RParen', 'RBracket'): - break - args.append(self.parse_expr()) - return args - - def parse_match(self): - self.expect('Match') - subject = self.parse_expr() - self.expect('LBrace') - arms = [] - while self.peek_kind() != 'RBrace': - if self.peek_kind() == 'Eof': - break - pat = self.parse_pattern() - self.expect('FatArrow') - body = self.parse_expr() - self.eat('Comma') - arms.append({'pattern': pat, 'body': body}) - self.expect('RBrace') - return {'expr': 'Match', 'subject': subject, 'arms': arms} - - def parse_pattern(self): - k = self.peek_kind() - if k == 'Ident': - t = self.advance() - if t.value == '_': - return {'pattern': 'Wildcard'} - return {'pattern': 'Binding', 'name': t.value} - if k == 'Int': - v = self.advance().value - return {'pattern': 'LitInt', 'value': v} - if k == 'Str': - v = self.advance().value - return {'pattern': 'LitStr', 'value': v} - if k == 'Bool': - v = self.advance().value - return {'pattern': 'LitBool', 'value': v} - # fallback - self.advance() - return {'pattern': 'Wildcard'} - - -# ─── Codegen ────────────────────────────────────────────────────────────────── - -def c_escape(s): - """Escape a string for a C string literal.""" - out = [] - for ch in s: - if ch == '"': - out.append('\\"') - elif ch == '\\': - out.append('\\\\') - elif ch == '\n': - out.append('\\n') - elif ch == '\r': - out.append('\\r') - elif ch == '\t': - out.append('\\t') - else: - out.append(ch) - return ''.join(out) - - -def c_str_lit(s): - return '"' + c_escape(s) + '"' - - -BINOP_C = { - 'Plus': '+', 'Minus': '-', 'Star': '*', 'Slash': '/', - 'Percent': '%', - 'EqEq': '==', 'NotEq': '!=', - 'Lt': '<', 'Gt': '>', 'LtEq': '<=', 'GtEq': '>=', - 'And': '&&', 'Or': '||', -} - -# Known builtins that return Int -INT_CALL_NAMES = { - 'str_len', 'str_index_of', 'str_to_int', 'str_char_code', - 'native_list_len', 'el_list_len', 'len', 'json_get_int', - 'json_array_len', 'engram_node_count', 'engram_edge_count', - 'time_now', 'time_now_utc', 'time_diff', 'time_add', 'time_from_parts', - 'el_abs', 'el_max', 'el_min', 'float_to_int', -} - - -class CodeGen: - def __init__(self): - self.lines = [] - self._if_counter = 0 - self._match_counter = 0 - self._int_names = set() # per-function typed int names - self._global_int_names = set() # top-level - - def emit(self, line): - self.lines.append(line) - - def blank(self): - self.lines.append('') - - def output(self): - return '\n'.join(self.lines) - - # ── Int-name tracking ────────────────────────────────────────────────────── - - def add_int_name(self, name): - self._int_names.add(name) - - def is_int_name(self, name): - return name in self._int_names or name in self._global_int_names - - def reset_int_names(self): - self._int_names = set() - - def seed_int_names_from_params(self, params): - self.reset_int_names() - for p in params: - if p.get('type') == 'Int': - self.add_int_name(p['name']) - - def is_int_expr(self, expr): - k = expr.get('expr', '') - if k == 'Int': - return True - if k == 'Ident': - return self.is_int_name(expr['name']) - if k == 'Call': - func = expr.get('func', {}) - if func.get('expr') == 'Ident': - return func.get('name', '') in INT_CALL_NAMES - if k == 'Neg': - return self.is_int_expr(expr['inner']) - if k == 'Not': - return True - if k == 'BinOp': - op = expr.get('op', '') - if op in ('EqEq', 'NotEq', 'Lt', 'Gt', 'LtEq', 'GtEq', 'And', 'Or'): - return True - if op in ('Plus', 'Minus', 'Star', 'Slash', 'Percent'): - return self.is_int_expr(expr['left']) and self.is_int_expr(expr['right']) - return False - - # ── Expression codegen ──────────────────────────────────────────────────── - - def cg_expr(self, expr): - k = expr.get('expr', '') - - if k == 'Int': - return expr['value'] - - if k == 'Float': - return 'el_from_float(' + expr['value'] + ')' - - if k == 'Str': - return 'EL_STR(' + c_str_lit(expr['value']) + ')' - - if k == 'Bool': - return '1' if expr['value'] == 'true' else '0' - - if k == 'Nil': - return 'EL_NULL' - - if k == 'Ident': - return expr['name'] - - if k == 'Not': - inner_c = self.cg_expr(expr['inner']) - return '!' + inner_c - - if k == 'Neg': - inner_c = self.cg_expr(expr['inner']) - return '(-' + inner_c + ')' - - if k == 'BinOp': - return self.cg_binop(expr) - - if k == 'Call': - return self.cg_call(expr) - - if k == 'Field': - obj_c = self.cg_expr(expr['object']) - field = expr['field'] - return 'el_get_field(' + obj_c + ', EL_STR(' + c_str_lit(field) + '))' - - if k == 'Index': - obj_c = self.cg_expr(expr['object']) - idx = expr['index'] - idx_c = self.cg_expr(idx) - if idx.get('expr') == 'Str': - return 'el_get_field(' + obj_c + ', ' + idx_c + ')' - return 'el_list_get(' + obj_c + ', ' + idx_c + ')' - - if k == 'Array': - elems = expr.get('elems', []) - n = len(elems) - if n == 0: - return 'el_list_empty()' - items = ', '.join(self.cg_expr(e) for e in elems) - return 'el_list_new(' + str(n) + ', ' + items + ')' - - if k == 'Map': - pairs = expr.get('pairs', []) - n = len(pairs) - if n == 0: - return 'el_map_new(0)' - parts = [] - for p in pairs: - parts.append(c_str_lit(p['key'])) - parts.append(self.cg_expr(p['value'])) - items = ', '.join(parts) - return 'el_map_new(' + str(n) + ', ' + items + ')' - - if k == 'Try': - return self.cg_expr(expr['inner']) - - if k == 'If': - return self.cg_if_expr(expr) - - if k == 'Match': - return self.cg_match(expr) - - if k == 'For': - # For in expression position — emit as void, return 0 - return '0' - - return 'EL_NULL' - - def cg_binop(self, expr): - op = expr['op'] - left = expr['left'] - right = expr['right'] - left_c = self.cg_expr(left) - right_c = self.cg_expr(right) - lk = left.get('expr', '') - rk = right.get('expr', '') - - if op == 'Plus': - # Str literal on either side → always concat - if lk == 'Str': - return 'el_str_concat(' + left_c + ', ' + right_c + ')' - if rk == 'Str': - return 'el_str_concat(' + left_c + ', ' + right_c + ')' - # Both provably Int → arithmetic - if self.is_int_expr(left) and self.is_int_expr(right): - return '(' + left_c + ' + ' + right_c + ')' - # Int literal on either side → arithmetic - if lk == 'Int': - return '(' + left_c + ' + ' + right_c + ')' - if rk == 'Int': - return '(' + left_c + ' + ' + right_c + ')' - # Call/Ident/BinOp(+) → string concat (historical default) - if lk == 'Call': - return 'el_str_concat(' + left_c + ', ' + right_c + ')' - if rk == 'Call': - return 'el_str_concat(' + left_c + ', ' + right_c + ')' - if lk == 'BinOp' and left.get('op') == 'Plus': - return 'el_str_concat(' + left_c + ', ' + right_c + ')' - if rk == 'BinOp' and right.get('op') == 'Plus': - return 'el_str_concat(' + left_c + ', ' + right_c + ')' - if lk == 'Ident': - return 'el_str_concat(' + left_c + ', ' + right_c + ')' - if rk == 'Ident': - return 'el_str_concat(' + left_c + ', ' + right_c + ')' - return '(' + left_c + ' + ' + right_c + ')' - - if op == 'EqEq': - if lk == 'Int' or rk == 'Int': - return '(' + left_c + ' == ' + right_c + ')' - if lk == 'Bool' or rk == 'Bool': - return '(' + left_c + ' == ' + right_c + ')' - # Both Ident and both are int-typed → plain == - if lk == 'Ident' and rk == 'Ident': - if self.is_int_name(left['name']) and self.is_int_name(right['name']): - return '(' + left_c + ' == ' + right_c + ')' - if lk == 'Str' or rk == 'Str': - return 'str_eq(' + left_c + ', ' + right_c + ')' - if lk == 'Ident': - return 'str_eq(' + left_c + ', ' + right_c + ')' - if rk == 'Ident': - return 'str_eq(' + left_c + ', ' + right_c + ')' - if lk == 'Call': - return 'str_eq(' + left_c + ', ' + right_c + ')' - if rk == 'Call': - return 'str_eq(' + left_c + ', ' + right_c + ')' - return '(' + left_c + ' == ' + right_c + ')' - - if op == 'NotEq': - if lk == 'Int' or rk == 'Int': - return '(' + left_c + ' != ' + right_c + ')' - if lk == 'Bool' or rk == 'Bool': - return '(' + left_c + ' != ' + right_c + ')' - if lk == 'Ident' and rk == 'Ident': - if self.is_int_name(left['name']) and self.is_int_name(right['name']): - return '(' + left_c + ' != ' + right_c + ')' - if lk == 'Str' or rk == 'Str': - return '!str_eq(' + left_c + ', ' + right_c + ')' - if lk == 'Ident': - return '!str_eq(' + left_c + ', ' + right_c + ')' - if rk == 'Ident': - return '!str_eq(' + left_c + ', ' + right_c + ')' - if lk == 'Call': - return '!str_eq(' + left_c + ', ' + right_c + ')' - if rk == 'Call': - return '!str_eq(' + left_c + ', ' + right_c + ')' - return '(' + left_c + ' != ' + right_c + ')' - - op_c = BINOP_C.get(op, op) - return '(' + left_c + ' ' + op_c + ' ' + right_c + ')' - - def cg_call(self, expr): - func = expr['func'] - args = expr.get('args', []) - args_c = ', '.join(self.cg_expr(a) for a in args) - fk = func.get('expr', '') - - if fk == 'Ident': - fn_name = func['name'] - if args_c: - return fn_name + '(' + args_c + ')' - return fn_name + '()' - - if fk == 'Field': - obj_c = self.cg_expr(func['object']) - field = func['field'] - if args_c: - return field + '(' + obj_c + ', ' + args_c + ')' - return field + '(' + obj_c + ')' - - fn_c = self.cg_expr(func) - if args_c: - return fn_c + '(' + args_c + ')' - return fn_c + '()' - - def cg_if_expr(self, expr): - self._if_counter += 1 - idx = str(self._if_counter) - result_var = '_if_result_' + idx - cond_c = self.cg_expr(expr['cond']) - then_c = self.cg_if_arm(expr.get('then', []), result_var) - else_c = '' - if expr.get('has_else'): - else_c = self.cg_if_arm(expr.get('else', []), result_var) - return ( - '({ el_val_t ' + result_var + ' = 0; ' - 'if (' + cond_c + ') { ' + then_c + '} else { ' + else_c + '} ' - + result_var + '; })' - ) - - def cg_if_arm(self, stmts, result_var): - """Render a list of statements as the body of an if-expression arm.""" - out = [] - n = len(stmts) - for i, s in enumerate(stmts): - sk = s.get('stmt', '') - is_last = (i == n - 1) - if sk == 'Assign': - val_c = self.cg_expr(s['value']) - out.append(s['name'] + ' = ' + val_c + '; ') - elif sk == 'Let': - val_c = self.cg_expr(s['value']) - out.append('el_val_t ' + s['name'] + ' = ' + val_c + '; ') - elif sk == 'Return': - val_c = self.cg_expr(s['value']) - out.append(result_var + ' = (' + val_c + '); ') - elif sk == 'Expr': - val_c = self.cg_expr(s['value']) - if is_last: - out.append(result_var + ' = (' + val_c + '); ') - else: - out.append('(void)(' + val_c + '); ') - # While/For inside if-arm: skip (uncommon) - return ''.join(out) - - def cg_match(self, expr): - self._match_counter += 1 - idx = str(self._match_counter) - subj_var = '_match_subj_' + idx - result_var = '_match_result_' + idx - done_label = '_match_done_' + idx - subj_c = self.cg_expr(expr['subject']) - parts = ['({ el_val_t ' + subj_var + ' = ' + subj_c + '; el_val_t ' + result_var + ' = 0; '] - for arm in expr.get('arms', []): - pat = arm['pattern'] - body_c = self.cg_expr(arm['body']) - pk = pat.get('pattern', '') - if pk == 'Wildcard': - parts.append('{ ' + result_var + ' = (' + body_c + '); goto ' + done_label + '; } ') - elif pk == 'Binding': - bname = pat['name'] - parts.append('{ el_val_t ' + bname + ' = ' + subj_var + '; ' + - result_var + ' = (' + body_c + '); goto ' + done_label + '; } ') - elif pk == 'LitInt': - v = pat['value'] - parts.append('if (' + subj_var + ' == ' + v + ') { ' + - result_var + ' = (' + body_c + '); goto ' + done_label + '; } ') - elif pk == 'LitStr': - v = pat['value'] - parts.append('if (str_eq(' + subj_var + ', EL_STR(' + c_str_lit(v) + '))) { ' + - result_var + ' = (' + body_c + '); goto ' + done_label + '; } ') - elif pk == 'LitBool': - v = '1' if pat['value'] == 'true' else '0' - parts.append('if (' + subj_var + ' == ' + v + ') { ' + - result_var + ' = (' + body_c + '); goto ' + done_label + '; } ') - else: - parts.append('{ ' + result_var + ' = (' + body_c + '); goto ' + done_label + '; } ') - parts.append(done_label + ':; ' + result_var + '; })') - return ''.join(parts) - - # ── Statement codegen ───────────────────────────────────────────────────── - - def cg_stmt(self, stmt, indent, declared): - """ - Emit C for a statement. `declared` is a set of names declared in the - current C scope. Returns updated declared set. - """ - sk = stmt.get('stmt', '') - - if sk == 'Assign': - # Bare assignment without `let` — always plain assignment - name = stmt['name'] - val = stmt['value'] - val_c = self.cg_expr(val) - self.emit(indent + name + ' = ' + val_c + ';') - # Treat as if declared (so subsequent let-rebind works too) - declared = declared | {name} - return declared - - if sk == 'Let': - name = stmt['name'] - val = stmt['value'] - val_c = self.cg_expr(val) - # Track int names - if stmt.get('type') == 'Int': - self.add_int_name(name) - if val.get('expr') == 'Int': - self.add_int_name(name) - if name in declared: - self.emit(indent + name + ' = ' + val_c + ';') - else: - self.emit(indent + 'el_val_t ' + name + ' = ' + val_c + ';') - declared = declared | {name} - return declared - - if sk == 'Return': - val = stmt['value'] - if val.get('expr') == 'Nil': - self.emit(indent + 'return 0;') - else: - val_c = self.cg_expr(val) - self.emit(indent + 'return ' + val_c + ';') - return declared - - if sk == 'Expr': - val = stmt['value'] - vk = val.get('expr', '') - if vk == 'If': - self.cg_if_stmt(val, indent, declared) - return declared - if vk == 'For': - self.cg_for_body(val['item'], val['list'], val['body'], indent, declared) - return declared - val_c = self.cg_expr(val) - self.emit(indent + val_c + ';') - return declared - - if sk == 'While': - cond_c = self.cg_expr(stmt['cond']) - cond_c = self.strip_outer_parens(cond_c) - self.emit(indent + 'while (' + cond_c + ') {') - self.cg_stmts(stmt['body'], indent + ' ', set(declared)) - self.emit(indent + '}') - return declared - - if sk == 'For': - self.cg_for_body(stmt['item'], stmt['list'], stmt['body'], indent, declared) - return declared - - # FnDef, TypeDef, EnumDef, Import, CgiBlock, ServiceBlock — skip inside fn body - return declared - - def strip_outer_parens(self, s): - if len(s) < 2: - return s - if s[0] != '(' or s[-1] != ')': - return s - depth = 0 - for i, c in enumerate(s[:-1]): - if c == '(': - depth += 1 - elif c == ')': - depth -= 1 - if depth == 0 and i < len(s) - 1: - return s - return s[1:-1] - - def cg_if_stmt(self, expr, indent, declared): - cond_c = self.cg_expr(expr['cond']) - cond_c = self.strip_outer_parens(cond_c) - self.emit(indent + 'if (' + cond_c + ') {') - self.cg_stmts(expr.get('then', []), indent + ' ', set(declared)) - if expr.get('has_else'): - self.emit(indent + '} else {') - self.cg_stmts(expr.get('else', []), indent + ' ', set(declared)) - self.emit(indent + '}') - - def cg_for_body(self, item, list_expr, body, indent, declared): - list_c = self.cg_expr(list_expr) - self.emit(indent + '{') - self.emit(indent + ' el_val_t _el_lst = ' + list_c + ';') - self.emit(indent + ' el_val_t _el_len = el_list_len(_el_lst);') - self.emit(indent + ' for (el_val_t _el_i = 0; _el_i < _el_len; _el_i++) {') - self.emit(indent + ' el_val_t ' + item + ' = el_list_get(_el_lst, _el_i);') - body_decl = set(declared) | {item} - self.cg_stmts(body, indent + ' ', body_decl) - self.emit(indent + ' }') - self.emit(indent + '}') - - def cg_stmts(self, stmts, indent, declared): - decl = set(declared) - for s in stmts: - decl = self.cg_stmt(s, indent, decl) - return decl - - # ── Function codegen ────────────────────────────────────────────────────── - - def params_to_c(self, params): - if not params: - return 'void' - return ', '.join('el_val_t ' + p['name'] for p in params) - - def transform_implicit_return(self, body, ret_type): - """ - If the last statement is a bare Expr (not If/For/While), convert it - to a Return. Skip for Void-returning functions. - """ - if ret_type == 'Void': - return body - if not body: - return body - last = body[-1] - if last.get('stmt') == 'Expr': - val = last['value'] - vk = val.get('expr', '') - if vk not in ('If', 'For'): - new_body = list(body[:-1]) - new_body.append({'stmt': 'Return', 'value': val}) - return new_body - return body - - def cg_fn(self, stmt): - fn_name = stmt['name'] - if fn_name == 'main': - return # skip — C provides main() - params = stmt.get('params', []) - body = stmt.get('body', []) - ret_type = stmt.get('ret_type', 'Any') - params_c = self.params_to_c(params) - self.seed_int_names_from_params(params) - self.emit('el_val_t ' + fn_name + '(' + params_c + ') {') - decl = {p['name'] for p in params} - body_xformed = self.transform_implicit_return(body, ret_type) - self.cg_stmts(body_xformed, ' ', decl) - self.emit(' return 0;') - self.emit('}') - self.blank() - - # ── Top-level codegen entry ─────────────────────────────────────────────── - - def codegen(self, stmts): - # Reset state - self._if_counter = 0 - self._match_counter = 0 - self._int_names = set() - self._global_int_names = set() - - # Preamble - self.emit('#include ') - self.emit('#include ') - self.emit('#include "el_runtime.h"') - self.blank() - - # Forward declarations (skip main) - for s in stmts: - if s.get('stmt') == 'FnDef': - fn_name = s['name'] - if fn_name == 'main': - continue - params = s.get('params', []) - params_c = self.params_to_c(params) - self.emit('el_val_t ' + fn_name + '(' + params_c + ');') - self.blank() - - # Top-level Let → file-scope storage - has_toplevel_lets = False - for s in stmts: - if s.get('stmt') == 'Let': - name = s['name'] - if s.get('type') == 'Int': - self._global_int_names.add(name) - if s.get('value', {}).get('expr') == 'Int': - self._global_int_names.add(name) - self.emit('el_val_t ' + name + ';') - has_toplevel_lets = True - if has_toplevel_lets: - self.blank() - - # Function definitions. Skip El's `fn main()` for the same reason we - # skip its forward decl above: a duplicate `el_val_t main(void)` would - # collide with the `int main(int argc, char**)` we emit below. The - # body of `fn main()` is instead folded into C's main() alongside - # any top-level statements. - el_main_body = None - for s in stmts: - if s.get('stmt') == 'FnDef': - if s.get('name') == 'main': - el_main_body = s.get('body', []) - continue - self.cg_fn(s) - - # main(). Use _argc/_argv as C parameter names so El programs are - # free to declare local `argv` / `argc` (and call args() / count_args()) - # without colliding with the C-side parameters. - self.emit('int main(int _argc, char** _argv) {') - self.emit(' el_runtime_init_args(_argc, _argv);') - - # cgi block init - for s in stmts: - if s.get('stmt') == 'CgiBlock': - cname = s.get('name', '') - cdid = s.get('dharma_id', '') - cprin = s.get('principal', '') - cnet = s.get('network', '') - ceng = s.get('engram', '') - arg_name = 'EL_STR(' + c_str_lit(cname) + ')' - arg_did = ('EL_STR(' + c_str_lit(cdid) + ')' if s.get('has_dharma_id') else 'EL_NULL') - arg_prin = ('EL_STR(' + c_str_lit(cprin) + ')' if s.get('has_principal') else 'EL_NULL') - arg_net = ('EL_STR(' + c_str_lit(cnet) + ')' if s.get('has_network') else 'EL_NULL') - arg_eng = ('EL_STR(' + c_str_lit(ceng) + ')' if s.get('has_engram') else 'EL_NULL') - self.emit(' el_cgi_init(' + arg_name + ', ' + arg_did + ', ' + arg_prin + ', ' + arg_net + ', ' + arg_eng + ');') - break - - # Seed declared with top-level let names (they live at file scope) - main_decl = set() - for s in stmts: - if s.get('stmt') == 'Let': - main_decl.add(s['name']) - - # Reset int names for main body (use global + top-level lets) - self._int_names = set(self._global_int_names) - - # Top-level statements (not FnDef, not declarative) - SKIP_KINDS = {'FnDef', 'TypeDef', 'EnumDef', 'Import', 'CgiBlock', 'ServiceBlock'} - for s in stmts: - sk = s.get('stmt', '') - if sk in SKIP_KINDS: - continue - main_decl = self.cg_stmt(s, ' ', main_decl) - - # If the source declared `fn main() -> Void { ... }`, fold its body - # in here. Mirrors codegen.el's behaviour and lets El programs - # written either way (top-level statements OR an explicit fn main) - # produce the same C main(). compiler.el itself uses this form. - if el_main_body: - for s in el_main_body: - main_decl = self.cg_stmt(s, ' ', main_decl) - - self.emit(' return 0;') - self.emit('}') - self.blank() - - -# ─── Main ───────────────────────────────────────────────────────────────────── - -_IMPORT_RE = re.compile(r'^\s*import\s+"([^"]+\.el)"\s*$') -_FROM_IMPORT_RE = re.compile(r'^\s*from\s+([A-Za-z_][A-Za-z0-9_]*)\s+import\s*\{') - - -def resolve_imports(entry_path): - """Textually inline every imported .el file into a single source string, - deduplicating by absolute path. Mirrors the logic compiler.el's resolve_imports - will do once self-hosted. Two import forms supported: - - import "path/to/file.el" - from import { ... } - - The first is a quoted relative path; the second resolves to - .el in the same directory as the importer. Any module already - visited is skipped (depth-first, prepended once). - - Strict matching via regex avoids false positives like CSS keyframes - ("from { opacity: 0 }") embedded in El string literals. - """ - import os - seen = set() - - def load(path): - path = os.path.abspath(path) - if path in seen: - return '' - seen.add(path) - try: - with open(path, 'r', encoding='utf-8') as f: - source = f.read() - except IOError as e: - print(f'resolve_imports: cannot read {path}: {e}', file=sys.stderr) - return '' - directory = os.path.dirname(path) - prefix = '' - body = [] - for line in source.split('\n'): - imp_path = None - m = _IMPORT_RE.match(line) - if m: - rel = m.group(1) - imp_path = rel if os.path.isabs(rel) else os.path.join(directory, rel) - else: - m = _FROM_IMPORT_RE.match(line) - if m: - imp_path = os.path.join(directory, m.group(1) + '.el') - if imp_path is not None: - prefix += load(imp_path) - # drop the import line itself; codegen treats Import as no-op anyway - else: - body.append(line) - return prefix + '\n'.join(body) + '\n' - - return load(entry_path) - - -def main(): - if len(sys.argv) < 2: - print('Usage: bootstrap.py ', file=sys.stderr) - sys.exit(1) - - path = sys.argv[1] - try: - source = resolve_imports(path) - except IOError as e: - print(f'Error reading {path}: {e}', file=sys.stderr) - sys.exit(1) - - try: - tokens = lex(source) - except Exception as e: - print(f'Lexer error: {e}', file=sys.stderr) - sys.exit(1) - - try: - parser = Parser(tokens) - stmts = parser.parse_program() - except ParseError as e: - print(f'Parse error: {e}', file=sys.stderr) - sys.exit(1) - except Exception as e: - import traceback - print(f'Parser error: {e}', file=sys.stderr) - traceback.print_exc(file=sys.stderr) - sys.exit(1) - - try: - cg = CodeGen() - cg.codegen(stmts) - print(cg.output()) - except Exception as e: - import traceback - print(f'Codegen error: {e}', file=sys.stderr) - traceback.print_exc(file=sys.stderr) - sys.exit(1) - - -if __name__ == '__main__': - main()