diff --git a/c_compiler/Cargo.toml b/c_compiler/Cargo.toml deleted file mode 100644 index ef733ea..0000000 --- a/c_compiler/Cargo.toml +++ /dev/null @@ -1,8 +0,0 @@ -[package] -name = "c_compiler" -version.workspace = true -edition.workspace = true -authors.workspace = true - -[dependencies] -chrono = "0.4.42" diff --git a/c_compiler/code.c b/c_compiler/code.c deleted file mode 100644 index d069fa2..0000000 --- a/c_compiler/code.c +++ /dev/null @@ -1,14 +0,0 @@ -int var_x = 5; - -int factorial(int n) { - if (n <= 1) { - return 1; - } - return n * factorial(n - 1); -} - -int main() { - int result = var_x + factorial(5); - print(result); - return 0; -} diff --git a/c_compiler/compiler.py b/c_compiler/compiler.py deleted file mode 100644 index 627278c..0000000 --- a/c_compiler/compiler.py +++ /dev/null @@ -1,926 +0,0 @@ -#!/usr/bin/env python3 -""" -Simple C to DSA Assembly Compiler -Supports a subset of C including: -- int variables and functions -- Arithmetic operations (+, -, *, /) -- Comparisons (==, !=, <, >, <=, >=) -- If/else statements -- While loops -- Function calls -- Return statements -""" - -import re -import sys -from typing import List, Dict, Optional, Tuple -from dataclasses import dataclass -from enum import Enum -from pprint import pprint -import json - - -class TokenType(Enum): - # Keywords - INT = "int" - IF = "if" - ELSE = "else" - WHILE = "while" - RETURN = "return" - - # Identifiers and literals - IDENTIFIER = "IDENTIFIER" - NUMBER = "NUMBER" - - # Operators - PLUS = "+" - MINUS = "-" - STAR = "*" - SLASH = "/" - ASSIGN = "=" - EQ = "==" - NE = "!=" - LT = "<" - GT = ">" - LE = "<=" - GE = ">=" - - # Delimiters - LPAREN = "(" - RPAREN = ")" - LBRACE = "{" - RBRACE = "}" - SEMICOLON = ";" - COMMA = "," - - EOF = "EOF" - - -@dataclass -class Token: - type: TokenType - value: str - line: int - col: int - - -class Lexer: - def __init__(self, source: str): - self.source = source - self.pos = 0 - self.line = 1 - self.col = 1 - self.tokens = [] - - def error(self, msg: str): - raise SyntaxError(f"Lexer error at line {self.line}, col {self.col}: {msg}") - - def peek(self, offset: int = 0) -> Optional[str]: - pos = self.pos + offset - return self.source[pos] if pos < len(self.source) else None - - def advance(self) -> Optional[str]: - if self.pos >= len(self.source): - return None - char = self.source[self.pos] - self.pos += 1 - if char == "\n": - self.line += 1 - self.col = 1 - else: - self.col += 1 - return char - - def skip_whitespace(self): - while self.peek() and self.peek() in " \t\n\r": - self.advance() - - def skip_comment(self): - if self.peek() == "/" and self.peek(1) == "/": - while self.peek() and self.peek() != "\n": - self.advance() - self.advance() # skip newline - - def read_number(self) -> str: - num = "" - while self.peek() and self.peek().isdigit(): - num += self.advance() - return num - - def read_identifier(self) -> str: - ident = "" - while self.peek() and (self.peek().isalnum() or self.peek() == "_"): - ident += self.advance() - return ident - - def tokenize(self) -> List[Token]: - keywords = { - "int": TokenType.INT, - "if": TokenType.IF, - "else": TokenType.ELSE, - "while": TokenType.WHILE, - "return": TokenType.RETURN, - } - - while self.pos < len(self.source): - self.skip_whitespace() - self.skip_comment() - - if self.pos >= len(self.source): - break - - line, col = self.line, self.col - char = self.peek() - - # Numbers - if char.isdigit(): - num = self.read_number() - self.tokens.append(Token(TokenType.NUMBER, num, line, col)) - - # Identifiers and keywords - elif char.isalpha() or char == "_": - ident = self.read_identifier() - token_type = keywords.get(ident, TokenType.IDENTIFIER) - self.tokens.append(Token(token_type, ident, line, col)) - - # Two-character operators - elif char == "=" and self.peek(1) == "=": - self.advance() - self.advance() - self.tokens.append(Token(TokenType.EQ, "==", line, col)) - elif char == "!" and self.peek(1) == "=": - self.advance() - self.advance() - self.tokens.append(Token(TokenType.NE, "!=", line, col)) - elif char == "<" and self.peek(1) == "=": - self.advance() - self.advance() - self.tokens.append(Token(TokenType.LE, "<=", line, col)) - elif char == ">" and self.peek(1) == "=": - self.advance() - self.advance() - self.tokens.append(Token(TokenType.GE, ">=", line, col)) - - # Single-character operators - elif char == "+": - self.advance() - self.tokens.append(Token(TokenType.PLUS, "+", line, col)) - elif char == "-": - self.advance() - self.tokens.append(Token(TokenType.MINUS, "-", line, col)) - elif char == "*": - self.advance() - self.tokens.append(Token(TokenType.STAR, "*", line, col)) - elif char == "/": - self.advance() - self.tokens.append(Token(TokenType.SLASH, "/", line, col)) - elif char == "=": - self.advance() - self.tokens.append(Token(TokenType.ASSIGN, "=", line, col)) - elif char == "<": - self.advance() - self.tokens.append(Token(TokenType.LT, "<", line, col)) - elif char == ">": - self.advance() - self.tokens.append(Token(TokenType.GT, ">", line, col)) - elif char == "(": - self.advance() - self.tokens.append(Token(TokenType.LPAREN, "(", line, col)) - elif char == ")": - self.advance() - self.tokens.append(Token(TokenType.RPAREN, ")", line, col)) - elif char == "{": - self.advance() - self.tokens.append(Token(TokenType.LBRACE, "{", line, col)) - elif char == "}": - self.advance() - self.tokens.append(Token(TokenType.RBRACE, "}", line, col)) - elif char == ";": - self.advance() - self.tokens.append(Token(TokenType.SEMICOLON, ";", line, col)) - elif char == ",": - self.advance() - self.tokens.append(Token(TokenType.COMMA, ",", line, col)) - else: - self.error(f"Unexpected character: {char}") - - self.tokens.append(Token(TokenType.EOF, "", self.line, self.col)) - return self.tokens - - -# AST Node classes -@dataclass -class ASTNode: - pass - - -@dataclass -class Program(ASTNode): - declarations: List["Declaration"] - - -@dataclass -class Declaration(ASTNode): - pass - - -@dataclass -class FunctionDecl(Declaration): - name: str - params: List[str] - body: "CompoundStmt" - - -@dataclass -class VarDecl(Declaration): - name: str - init: Optional["Expression"] = None - - -@dataclass -class Statement(ASTNode): - pass - - -@dataclass -class CompoundStmt(Statement): - statements: List[Statement] - - -@dataclass -class ExprStmt(Statement): - expr: Optional["Expression"] - - -@dataclass -class IfStmt(Statement): - condition: "Expression" - then_stmt: Statement - else_stmt: Optional[Statement] = None - - -@dataclass -class WhileStmt(Statement): - condition: "Expression" - body: Statement - - -@dataclass -class ReturnStmt(Statement): - expr: Optional["Expression"] - - -@dataclass -class Expression(ASTNode): - pass - - -@dataclass -class BinaryOp(Expression): - op: str - left: Expression - right: Expression - - -@dataclass -class UnaryOp(Expression): - op: str - operand: Expression - - -@dataclass -class AssignExpr(Expression): - name: str - value: Expression - - -@dataclass -class VarExpr(Expression): - name: str - - -@dataclass -class NumberExpr(Expression): - value: int - - -@dataclass -class CallExpr(Expression): - name: str - args: List[Expression] - - -class Parser: - def __init__(self, tokens: List[Token]): - self.tokens = tokens - self.pos = 0 - - def error(self, msg: str): - token = self.current() - raise SyntaxError(f"Parser error at line {token.line}, col {token.col}: {msg}") - - def current(self) -> Token: - return self.tokens[self.pos] if self.pos < len(self.tokens) else self.tokens[-1] - - def peek(self, offset: int = 0) -> Token: - pos = self.pos + offset - return self.tokens[pos] if pos < len(self.tokens) else self.tokens[-1] - - def advance(self) -> Token: - token = self.current() - if self.pos < len(self.tokens) - 1: - self.pos += 1 - return token - - def expect(self, token_type: TokenType) -> Token: - token = self.current() - if token.type != token_type: - self.error(f"Expected {token_type.value}, got {token.type.value}") - return self.advance() - - def parse(self) -> Program: - declarations = [] - while self.current().type != TokenType.EOF: - declarations.append(self.parse_declaration()) - return Program(declarations) - - def parse_declaration(self) -> Declaration: - self.expect(TokenType.INT) - name = self.expect(TokenType.IDENTIFIER).value - - if self.current().type == TokenType.LPAREN: - # Function declaration - self.advance() - params = [] - - if self.current().type != TokenType.RPAREN: - self.expect(TokenType.INT) - params.append(self.expect(TokenType.IDENTIFIER).value) - - while self.current().type == TokenType.COMMA: - self.advance() - self.expect(TokenType.INT) - params.append(self.expect(TokenType.IDENTIFIER).value) - - self.expect(TokenType.RPAREN) - body = self.parse_compound_stmt() - return FunctionDecl(name, params, body) - else: - # Variable declaration - init = None - if self.current().type == TokenType.ASSIGN: - self.advance() - init = self.parse_expression() - self.expect(TokenType.SEMICOLON) - return VarDecl(name, init) - - def parse_compound_stmt(self) -> CompoundStmt: - self.expect(TokenType.LBRACE) - statements = [] - - while self.current().type != TokenType.RBRACE: - statements.append(self.parse_statement()) - - self.expect(TokenType.RBRACE) - return CompoundStmt(statements) - - def parse_statement(self) -> Statement: - token = self.current() - - if token.type == TokenType.LBRACE: - return self.parse_compound_stmt() - elif token.type == TokenType.IF: - return self.parse_if_stmt() - elif token.type == TokenType.WHILE: - return self.parse_while_stmt() - elif token.type == TokenType.RETURN: - return self.parse_return_stmt() - elif token.type == TokenType.INT: - # Local variable declaration - self.advance() - name = self.expect(TokenType.IDENTIFIER).value - init = None - if self.current().type == TokenType.ASSIGN: - self.advance() - init = self.parse_expression() - self.expect(TokenType.SEMICOLON) - return ExprStmt(AssignExpr(name, init) if init else None) - else: - expr = ( - self.parse_expression() - if self.current().type != TokenType.SEMICOLON - else None - ) - self.expect(TokenType.SEMICOLON) - return ExprStmt(expr) - - def parse_if_stmt(self) -> IfStmt: - self.expect(TokenType.IF) - self.expect(TokenType.LPAREN) - condition = self.parse_expression() - self.expect(TokenType.RPAREN) - then_stmt = self.parse_statement() - - else_stmt = None - if self.current().type == TokenType.ELSE: - self.advance() - else_stmt = self.parse_statement() - - return IfStmt(condition, then_stmt, else_stmt) - - def parse_while_stmt(self) -> WhileStmt: - self.expect(TokenType.WHILE) - self.expect(TokenType.LPAREN) - condition = self.parse_expression() - self.expect(TokenType.RPAREN) - body = self.parse_statement() - return WhileStmt(condition, body) - - def parse_return_stmt(self) -> ReturnStmt: - self.expect(TokenType.RETURN) - expr = None - if self.current().type != TokenType.SEMICOLON: - expr = self.parse_expression() - self.expect(TokenType.SEMICOLON) - return ReturnStmt(expr) - - def parse_expression(self) -> Expression: - return self.parse_assignment() - - def parse_assignment(self) -> Expression: - expr = self.parse_comparison() - - if self.current().type == TokenType.ASSIGN: - if not isinstance(expr, VarExpr): - self.error("Invalid assignment target") - self.advance() - value = self.parse_assignment() - return AssignExpr(expr.name, value) - - return expr - - def parse_comparison(self) -> Expression: - expr = self.parse_additive() - - while self.current().type in [ - TokenType.EQ, - TokenType.NE, - TokenType.LT, - TokenType.GT, - TokenType.LE, - TokenType.GE, - ]: - op = self.advance().value - right = self.parse_additive() - expr = BinaryOp(op, expr, right) - - return expr - - def parse_additive(self) -> Expression: - expr = self.parse_multiplicative() - - while self.current().type in [TokenType.PLUS, TokenType.MINUS]: - op = self.advance().value - right = self.parse_multiplicative() - expr = BinaryOp(op, expr, right) - - return expr - - def parse_multiplicative(self) -> Expression: - expr = self.parse_unary() - - while self.current().type in [TokenType.STAR, TokenType.SLASH]: - op = self.advance().value - right = self.parse_unary() - expr = BinaryOp(op, expr, right) - - return expr - - def parse_unary(self) -> Expression: - if self.current().type in [TokenType.PLUS, TokenType.MINUS]: - op = self.advance().value - operand = self.parse_unary() - return UnaryOp(op, operand) - - return self.parse_primary() - - def parse_primary(self) -> Expression: - token = self.current() - - if token.type == TokenType.NUMBER: - self.advance() - return NumberExpr(int(token.value)) - - elif token.type == TokenType.IDENTIFIER: - name = self.advance().value - - if self.current().type == TokenType.LPAREN: - # Function call - self.advance() - args = [] - - if self.current().type != TokenType.RPAREN: - args.append(self.parse_expression()) - while self.current().type == TokenType.COMMA: - self.advance() - args.append(self.parse_expression()) - - self.expect(TokenType.RPAREN) - return CallExpr(name, args) - else: - return VarExpr(name) - - elif token.type == TokenType.LPAREN: - self.advance() - expr = self.parse_expression() - self.expect(TokenType.RPAREN) - return expr - - else: - self.error(f"Unexpected token: {token.type.value}") - - -class CodeGenerator: - def __init__(self): - self.output = [] - self.label_counter = 0 - self.string_counter = 0 - self.functions = {} - self.current_function = None - self.local_vars = {} - self.global_vars = {} - self.register_pool = [f"rg{i:x}" for i in range(16)] - self.used_registers = set() - - def new_label(self, prefix: str = "L") -> str: - label = f"{prefix}{self.label_counter}" - self.label_counter += 1 - return label - - def allocate_register(self) -> str: - for reg in self.register_pool: - if reg not in self.used_registers: - self.used_registers.add(reg) - return reg - raise RuntimeError("Out of registers") - - def free_register(self, reg: str): - self.used_registers.discard(reg) - - def emit(self, code: str): - self.output.append(code) - - def generate(self, program: Program) -> str: - # Emit data section - self.emit("// Global variables") - for decl in program.declarations: - if isinstance(decl, VarDecl): - self.global_vars[decl.name] = f"var_{decl.name}" - if decl.init: - if isinstance(decl.init, NumberExpr): - self.emit(f"dw var_{decl.name}: {decl.init.value}") - else: - self.emit(f"dw var_{decl.name}: 0") - else: - self.emit(f"dw var_{decl.name}: 0") - - self.emit("") - self.emit("// Entry point") - self.emit("dw stack_bottom: 0x10000") - self.emit("") - self.emit("init:") - self.emit(" ldw stack_bottom, spr") - self.emit(" mov spr, bpr") - - self.emit(" push zero") - self.emit(" call main") - self.emit(" pop rg0") - self.emit(" hlt") - self.emit("") - - # Emit functions - for decl in program.declarations: - if isinstance(decl, FunctionDecl): - self.generate_function(decl) - - return "\n".join(self.output) - - def generate_function(self, func: FunctionDecl): - self.current_function = func.name - self.functions[func.name] = func - self.local_vars = {} - - # Map parameters to stack offsets - # Parameters start at bpr+8 (after return addr at bpr+4) - for i, param in enumerate(func.params): - self.local_vars[param] = 8 + (i * 4) - - self.emit(f"{func.name}:") - self.emit(" push bpr") - self.emit(" mov spr, bpr") - self.emit("") - - # Generate function body - self.generate_compound_stmt(func.body) - - # Default return if no explicit return - self.emit("// default return") - self.emit(f"{func.name}_end:") - self.emit(" mov bpr, spr") - self.emit(" pop bpr") - self.emit(" return") - self.emit("") - - def generate_compound_stmt(self, stmt: CompoundStmt): - for s in stmt.statements: - self.generate_statement(s) - - def generate_statement(self, stmt: Statement): - if isinstance(stmt, CompoundStmt): - self.generate_compound_stmt(stmt) - elif isinstance(stmt, ExprStmt): - if stmt.expr: - reg = self.generate_expression(stmt.expr) - self.free_register(reg) - elif isinstance(stmt, IfStmt): - self.generate_if_stmt(stmt) - elif isinstance(stmt, WhileStmt): - self.generate_while_stmt(stmt) - elif isinstance(stmt, ReturnStmt): - self.generate_return_stmt(stmt) - - def generate_if_stmt(self, stmt: IfStmt): - else_label = self.new_label("else") - end_label = self.new_label("endif") - - # Evaluate condition - cond_reg = self.generate_expression(stmt.condition) - self.emit(f" cmp {cond_reg}, zero") - self.free_register(cond_reg) - - if stmt.else_stmt: - self.emit(f" jeq {else_label}") - else: - self.emit(f" jeq {end_label}") - - # Then branch - self.generate_statement(stmt.then_stmt) - - if stmt.else_stmt: - self.emit(f" jmp {end_label}") - self.emit(f"{else_label}:") - self.generate_statement(stmt.else_stmt) - - self.emit(f"{end_label}:") - - def generate_while_stmt(self, stmt: WhileStmt): - start_label = self.new_label("while_start") - end_label = self.new_label("while_end") - - self.emit(f"{start_label}:") - - # Evaluate condition - cond_reg = self.generate_expression(stmt.condition) - self.emit(f" cmp {cond_reg}, zero") - self.free_register(cond_reg) - self.emit(f" jeq {end_label}") - - # Loop body - self.generate_statement(stmt.body) - self.emit(f" jmp {start_label}") - - self.emit(f"{end_label}:") - - def generate_return_stmt(self, stmt: ReturnStmt): - if stmt.expr: - reg = self.generate_expression(stmt.expr) - # Store return value at spr+8 according to calling convention - self.emit(f" stw {reg}, spr, 8") - self.free_register(reg) - self.emit(f" jmp {self.current_function}_end") - - def generate_expression(self, expr: Expression) -> str: - if isinstance(expr, NumberExpr): - reg = self.allocate_register() - if expr.value <= 0xFFFF and expr.value >= 0: - self.emit(f" lli {expr.value}, {reg}") - if expr.value > 0xFF: - self.emit(f" lui {expr.value >> 16}, {reg}") - else: - self.emit(f" lli {expr.value & 0xFFFF}, {reg}") - self.emit(f" lui {(expr.value >> 16) & 0xFFFF}, {reg}") - return reg - - elif isinstance(expr, VarExpr): - reg = self.allocate_register() - if expr.name in self.local_vars: - offset = self.local_vars[expr.name] - self.emit(f" ldw bpr, {reg}, {offset}") - elif expr.name in self.global_vars: - label = self.global_vars[expr.name] - self.emit(f" ldw {label}, {reg}") - else: - raise RuntimeError(f"Undefined variable: {expr.name}") - return reg - - elif isinstance(expr, AssignExpr): - value_reg = self.generate_expression(expr.value) - - if expr.name in self.local_vars: - offset = self.local_vars[expr.name] - self.emit(f" stw {value_reg}, bpr, {offset}") - elif expr.name in self.global_vars: - label = self.global_vars[expr.name] - self.emit(f" stw {value_reg}, {label}") - else: - # New local variable - allocate after params and return value space - # Start local variables at offset -4 from bpr (growing downward) - offset = -(len([v for v in self.local_vars.values() if v < 0]) + 1) * 4 - self.local_vars[expr.name] = offset - self.emit(f" stw {value_reg}, bpr, {offset}") - - return value_reg - - elif isinstance(expr, BinaryOp): - return self.generate_binary_op(expr) - - elif isinstance(expr, UnaryOp): - operand_reg = self.generate_expression(expr.operand) - result_reg = self.allocate_register() - - if expr.op == "-": - self.emit(f" lwi 0, {result_reg}") - self.emit(f" sub {result_reg}, {operand_reg}, {result_reg}") - else: # + - self.emit(f" mov {operand_reg}, {result_reg}") - - self.free_register(operand_reg) - return result_reg - - elif isinstance(expr, CallExpr): - # First, make space for return value (must be pushed BEFORE arguments) - temp_reg = self.allocate_register() - - # Then push arguments in reverse order - arg_regs = [] - for arg in reversed(expr.args): - reg = self.generate_expression(arg) - self.emit(f" push {reg}") - arg_regs.append(reg) - - # Call function - self.emit(f" call {expr.name}") - - # Get return value (it's now on top of stack) - self.emit(f" pop {temp_reg}") - - # Clean up remaining args - for i in range(len(arg_regs) - 1): - self.emit(f" pop zero") - - # Free the arg registers - for reg in arg_regs: - self.free_register(reg) - - return temp_reg - - else: - raise RuntimeError(f"Unknown expression type: {type(expr)}") - - def generate_binary_op(self, expr: BinaryOp) -> str: - # For operations that might contain function calls, we need to be careful - # about register allocation. Evaluate left, save it, evaluate right. - left_reg = self.generate_expression(expr.left) - - # If right side contains a function call, we need to save left_reg - # For now, always save to be safe - saved_reg = self.allocate_register() - self.emit(f" mov {left_reg}, {saved_reg}") - self.free_register(left_reg) - - right_reg = self.generate_expression(expr.right) - result_reg = self.allocate_register() - - if expr.op == "+": - self.emit(f" add {left_reg}, {right_reg}, {result_reg}") - elif expr.op == "-": - self.emit(f" sub {left_reg}, {right_reg}, {result_reg}") - elif expr.op == "*": - # Simple multiplication using loop - temp_label = self.new_label("mult") - end_label = self.new_label("mult_end") - self.emit(f" lli 0, {result_reg}") - self.emit(f"{temp_label}:") - self.emit(f" cmp {right_reg}, zero") - self.emit(f" jeq {end_label}") - self.emit(f" add {result_reg}, {left_reg}, {result_reg}") - self.emit(f" dec {right_reg}") - self.emit(f" jmp {temp_label}") - self.emit(f"{end_label}:") - elif expr.op == "/": - # Simple division using loop - temp_label = self.new_label("div") - end_label = self.new_label("div_end") - self.emit(f" lli 0, {result_reg}") - self.emit(f"{temp_label}:") - self.emit(f" cmp {left_reg}, {right_reg}") - self.emit(f" jlt {end_label}") - self.emit(f" sub {left_reg}, {right_reg}, {left_reg}") - self.emit(f" inc {result_reg}") - self.emit(f" jmp {temp_label}") - self.emit(f"{end_label}:") - elif expr.op in ["==", "!=", "<", ">", "<=", ">="]: - self.emit(f" cmp {left_reg}, {right_reg}") - - # Result is 1 if condition true, 0 otherwise - self.emit(f" lli 0, {result_reg}") - true_label = self.new_label("cmp_true") - end_label = self.new_label("cmp_end") - - if expr.op == "==": - self.emit(f" jeq {true_label}") - elif expr.op == "!=": - self.emit(f" jne {true_label}") - elif expr.op == "<": - self.emit(f" jlt {true_label}") - elif expr.op == ">": - self.emit(f" jgt {true_label}") - elif expr.op == "<=": - self.emit(f" jle {true_label}") - elif expr.op == ">=": - self.emit(f" jge {true_label}") - - self.emit(f" jmp {end_label}") - self.emit(f"{true_label}:") - self.emit(f" lli 1, {result_reg}") - self.emit(f"{end_label}:") - - self.free_register(left_reg) - self.free_register(right_reg) - return result_reg - - -def compile_c_to_asm(source: str) -> str: - """Compile C source code to DSA assembly.""" - lexer = Lexer(source) - tokens = lexer.tokenize() - - parser = Parser(tokens) - ast = parser.parse() - - codegen = CodeGenerator() - assembly = codegen.generate(ast) - - return assembly - - -def main(): - if len(sys.argv) < 2: - print("Usage: python compiler.py [output.dsa]") - sys.exit(1) - - input_file = sys.argv[1] - output_file = sys.argv[2] if len(sys.argv) > 2 else input_file.replace(".c", ".dsa") - - with open(input_file, "r") as f: - source = f.read() - - try: - assembly = compile_c_to_asm(source) - - with open(output_file, "w") as f: - f.write(assembly) - - print(f"Successfully compiled {input_file} to {output_file}") - except (SyntaxError, RuntimeError) as e: - print(f"Compilation error: {e}") - sys.exit(1) - - -if __name__ == "__main__": - main() - # # Example usage - # if len(sys.argv) > 1: - # example_c = sys.argv[1] - - # else: - # example_c = """ - # int factorial(int n) { - # if (n <= 1) { - # return 1; - # } - # return n * factorial(n - 1); - # } - - # int main() { - # int result; - # result = factorial(5); - # return result; - # } - # """ - - # print("Example C program:") - # print(example_c) - # print("\n" + "="*60 + "\n") - # print("Generated DSA assembly:") - # print(compile_c_to_asm(example_c)) diff --git a/c_compiler/example.c b/c_compiler/example.c deleted file mode 100644 index 1182420..0000000 --- a/c_compiler/example.c +++ /dev/null @@ -1,12 +0,0 @@ -int factorial(int n) { - if (n <= 1) { - return 1; - } - return n * factorial(n - 1); -} - -int main() { - int res = factorial(3); - printnum(res); - return 0; -} diff --git a/c_compiler/example.dsc b/c_compiler/example.dsc deleted file mode 100644 index 232535d..0000000 --- a/c_compiler/example.dsc +++ /dev/null @@ -1,25 +0,0 @@ -include print: "lib/io/print.dsa" - -int factorial(int n) { - if (n <= 1) { - return 1; - } - return n * factorial(n - 1); -} - -int add_(int a, int b) { - return a + b; -} - -int greater(int a, int b) { - if (a + a > b + b) { - return a; - } else { - return b + a; - } -} - -int main() { - printnum(-5); - return 0; -} diff --git a/c_compiler/output.dsa b/c_compiler/output.dsa deleted file mode 100644 index 54e84c0..0000000 --- a/c_compiler/output.dsa +++ /dev/null @@ -1,5 +0,0 @@ -// Imports -include maths: "./lib/maths/core.dsa" - -// Reserved Memory - diff --git a/c_compiler/src/assembly.rs b/c_compiler/src/assembly.rs deleted file mode 100644 index 4b46a4f..0000000 --- a/c_compiler/src/assembly.rs +++ /dev/null @@ -1,106 +0,0 @@ -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -#[non_exhaustive] -pub enum Register { - // general purpose registers - Rg0, - Rg1, - Rg2, - Rg3, - Rg4, - Rg5, - Rg6, - Rg7, - Rg8, - Rg9, - Rga, - Rgb, - Rgc, - Rgd, - Rge, - Rgf, - - // special purpose registers - Acc, - Spr, - Bpr, - Ret, - Idr, - Mmr, - Zero, - NoReg, - - // system registers - can't be written to by instructions. - Mar, - Mdr, - Sts, - Cir, - Pcx, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -#[repr(u8)] -#[non_exhaustive] -/// A list of all current instructions in the DSA Assembly language. -pub enum Instruction { - // No-op - Nop = 0x0, - - // Data transfer instructions - Mov(Register, Register) = 0x1, - Movs(Register, Register) = 0x2, - - Ldb(Register, Register, Option) = 0x3, - Ldbs(Register, Register, Option) = 0x4, - Ldh(Register, Register, Option) = 0x5, - Ldhs(Register, Register, Option) = 0x6, - Ldw(Register, Register, Option) = 0x7, - - Stb(Register, Register, Option) = 0x8, - Sth(Register, Register, Option) = 0x9, - Stw(Register, Register, Option) = 0xA, - - Lli(u16, Register) = 0xB, - Lui(u16, Register) = 0xC, - - // Jump Instructions - Jump(u16, Register) = 0xD, - JumpEq(u16, Register) = 0xE, - JumpNeq(u16, Register) = 0xF, - JumpGt(u16, Register) = 0x10, - JumpGe(u16, Register) = 0x11, - JumpLt(u16, Register) = 0x12, - JumpLe(u16, Register) = 0x13, - - // Comparison - Compare(Register, Register) = 0x14, - - // // Arithmetic - // Add(args::RTypeArgs) = 0x19, - // Sub(args::RTypeArgs) = 0x1A, - // Increment(args::RTypeArgs) = 0x15, - // Decrement(args::RTypeArgs) = 0x16, - // ShiftLeft(args::RTypeArgs) = 0x17, - // ShiftRight(args::RTypeArgs) = 0x18, - - // // Logical - // And(args::RTypeArgs) = 0x1B, - // Or(args::RTypeArgs) = 0x1C, - // Not(args::RTypeArgs) = 0x1D, - // Xor(args::RTypeArgs) = 0x1E, - // Nand(args::RTypeArgs) = 0x1F, - // Nor(args::RTypeArgs) = 0x20, - // Xnor(args::RTypeArgs) = 0x21, - - // // Misc - // Interrupt(Interrupt) = 0x22, - // IntReturn = 0x23, - // Halt = 0x24, - - // // Immediate Arithmetic - // AddImmediate(args::ITypeArgs) = 0x25, - // SubImmediate(args::ITypeArgs) = 0x26, - - // Fake Instructions - Data(u32) = 0x3E, - Segment(u32) = 0x3F, -} diff --git a/c_compiler/src/codegen.rs b/c_compiler/src/codegen.rs deleted file mode 100644 index ee3a598..0000000 --- a/c_compiler/src/codegen.rs +++ /dev/null @@ -1,599 +0,0 @@ -use std::collections::HashMap; -use std::hash::Hash; -use std::sync::LazyLock; -use std::sync::atomic::AtomicU32; -use std::time::SystemTime; - -use chrono::{DateTime, Local}; - -use crate::registers::RegisterAllocator; -use crate::{block, cmd, comment, dsa}; - -use crate::parser::{ - BinaryOperator, ConstExpr, Declaration, Expression, Parameter, Program, Statement, - UnaryOperator, -}; - -pub struct CodeGenerator { - ast: Program, - imports: HashMap, - globals: Vec, - functions: Vec, - symbols: Vec, - allocator: RegisterAllocator, -} - -static GLOBAL_METHODS: LazyLock> = LazyLock::new(|| { - HashMap::from([("print", "print::print"), ("printnum", "print::print_num")]) -}); - -fn import(name: &str, path: &str) -> String { - format!("include {name}: \"{}\"", path) -} - -impl CodeGenerator { - const RET: &'static str = "\tjmp _ret"; - - pub fn new(ast: Program) -> Self { - CodeGenerator { - ast, - imports: HashMap::new(), - globals: Vec::new(), - functions: Vec::new(), - symbols: Vec::new(), - allocator: RegisterAllocator::new(), - } - } - - pub fn include(&mut self, name: &str, path: &str) { - self.imports.insert(name.to_string(), path.to_string()); - } - - pub fn generate(&mut self) -> Result { - // always include the print library for debugging! - self.include("print", "./lib/io/print.dsa"); - - for block in self.ast.clone().declarations { - match block { - Declaration::Variable { name, .. } => self.symbols.push(name), - Declaration::Function { name, .. } => self.symbols.push(name), - Declaration::Import { name, .. } => self.symbols.push(name), - } - } - - for block in self.ast.clone().declarations { - self.generate_block(block.clone())?; - } - - self.generate_layout() - } - - fn generate_layout(&mut self) -> Result { - let datetime: DateTime = SystemTime::now().into(); - Ok(dsa![ - "", - comment!("GENERATED BY DSA-C COMPILER"), - comment!(format!( - "Generated at {}", - datetime.format("%Y-%m-%d %H:%M:%S") - )), - "", - // imports - comment!("Imports"), - self.imports - .iter() - .map(|(k, v)| import(k, v)) - .collect::>() - .join("\n"), - "", - // reserved memory - comment!("Globals & Reserved Memory"), - self.globals.join("\n"), - "", - // entry point - comment!("Entry Point"), - "dw stack: 0x10000", - "db message: \"Process Exited with code:\"", - block! [ "_init" - dsa![ldw stack, bpr], - dsa![mov bpr, spr], - dsa![push zero], - dsa![call main], - dsa![call print::print_newline], - dsa![lwi message, rg0], - dsa![push rg0], - dsa![call print::print], - dsa![pop zero], - dsa![call print::print_hex_word], - dsa![pop zero], - dsa![hlt] - ], - "", - comment!("Function return boilerplate"), - block! [ "_ret" - dsa![mov bpr, spr], - dsa![pop bpr], - dsa![return] - ], - // block! [ "main" - // dsa![push bpr], - // dsa![mov spr, bpr], - // dsa![lwi 67, rg1], - // dsa![stw rg1, spr, 8], - // dsa![mov bpr, spr], - // dsa![pop bpr], - // dsa![return] - // ], - "", - self.functions.join("\n"), - ]) - } - - fn generate_global(&mut self, name: &str, init: Option) { - self.globals.push(format!( - "dw {}: {}", - name, - init.unwrap_or(ConstExpr::Number(0)) - )) - } - - fn generate_block(&mut self, block: Declaration) -> Result<(), String> { - match block { - Declaration::Variable { name, init } => self.generate_global(&name, init), - Declaration::Function { - name, - return_type, - params, - body, - } => { - let func = self.generate_function(&name, ¶ms, &body).join("\n"); - - self.functions.push(format!("{func}\n")); - } - Declaration::Import { name, path } => { - self.imports.insert(name, path); - } - }; - - Ok(()) - } - - // Example: Generate code for a function - fn generate_function( - &mut self, - name: &str, - params: &[Parameter], - body: &[Statement], - ) -> Vec { - let mut code = Vec::new(); - - // Reset allocator for new function - self.allocator.reset(); - - // Function prologue - code.push(format!("{}:", name)); - code.push("\tpush bpr".to_string()); - code.push("\tmov spr, bpr".to_string()); - code.push(String::new()); - - // Allocate parameters to registers or stack locations - for (i, param) in params.iter().enumerate() { - let offset = 8 + (i as i32 * 4); // Parameters start at bpr+8 - // Track that this parameter is at a stack location - let (reg, load_code) = self.allocator.alloc_var(¶m.name).unwrap(); - code.extend(load_code); - code.push(format!("\tldw bpr, {}, {}", reg, offset)); - } - - // Generate code for function body - for stmt in body { - let stmt_code = self.generate_statement(stmt).unwrap(); - code.extend(stmt_code); - } - - // automatically return at function end - if let Some(x) = code.last() - && x == Self::RET - { - } else { - code.push(Self::RET.to_string()); - } - - code - } - - // Example: Generate code for a statement - fn generate_statement(&mut self, stmt: &Statement) -> Result, String> { - let mut code = Vec::new(); - - match stmt { - Statement::Assign { - name, - declare_type, - value, - } => { - if let Some(expr) = value { - // Evaluate expression - let (result_reg, expr_code) = self.generate_expression(expr)?; - code.extend(expr_code); - - // Store result in variable - let store_code = self.allocator.store_var(name, &result_reg); - code.extend(store_code); - - // Free temporary register - self.allocator.free_temp(&result_reg); - } else { - // Just declaring variable without initialization - self.allocator.alloc_var(name)?; - } - } - - Statement::Return { expr } => { - if let Some(e) = expr { - let (result_reg, expr_code) = self.generate_expression(e)?; - code.extend(expr_code); - code.push(format!("\tstw {}, bpr, 8", result_reg)); - code.push(format!("\tjmp _ret")); - self.allocator.free_temp(&result_reg); - } - } - - Statement::If { - condition, - then_stmt, - else_stmt, - } => { - // Generate condition - let (cond_reg, cond_code) = self.generate_expression(condition)?; - code.extend(cond_code); - - // Compare with zero - code.push(format!("\tcmp {}, zero", cond_reg)); - self.allocator.free_temp(&cond_reg); - - // Generate unique labels - let then_label = format!("_then_{}", self.get_unique_label()); - let else_label = format!("_else_{}", self.get_unique_label()); - let end_label = format!("_end_{}", self.get_unique_label()); - - // Jump to else if condition is false (equal to zero) - code.push(format!("\tjeq {}", else_label)); - - // Then block - code.push(format!("{}:", then_label)); - for s in then_stmt { - code.extend(self.generate_statement(s)?); - } - - if then_stmt.len() == 0 { - code.push("\tnop".to_string()); - } - - code.push(format!("\tjmp {}", end_label)); - - // Else block - code.push(format!("{}:", else_label)); - for s in else_stmt { - code.extend(self.generate_statement(s)?); - } - - if else_stmt.len() == 0 { - code.push("\tnop".to_string()); - } - - code.push(format!("{}:", end_label)); - } - - Statement::While { condition, body } => { - let loop_start = format!("_while_start_{}", self.get_unique_label()); - let loop_end = format!("_while_end_{}", self.get_unique_label()); - - code.push(format!("{}:", loop_start)); - - // Generate condition - let (cond_reg, cond_code) = self.generate_expression(condition)?; - code.extend(cond_code); - - code.push(format!("\tcmp {}, zero", cond_reg)); - self.allocator.free_temp(&cond_reg); - - code.push(format!("\tjeq {}", loop_end)); - - // Loop body - for s in body { - code.extend(self.generate_statement(s)?); - } - - code.push(format!("\tjmp {}", loop_start)); - code.push(format!("{}:", loop_end)); - } - - Statement::Expression { expr } => { - let (result_reg, expr_code) = self.generate_expression(expr)?; - code.extend(expr_code); - self.allocator.free_temp(&result_reg); - } - - Statement::Block(statements) => { - for s in statements { - code.extend(self.generate_statement(s)?); - } - } - } - - Ok(code) - } - - // Example: Generate code for an expression - // Returns (register containing result, assembly code) - fn generate_expression( - &mut self, - expr: &Expression, - ) -> Result<(String, Vec), String> { - let mut code = Vec::new(); - - match expr { - Expression::Number { value } => { - let (reg, alloc_code) = self.allocator.alloc_temp()?; - code.extend(alloc_code); - - // Load immediate value - code.push(format!("\tlli {}, {}", value & 0xFFFF, reg)); - if *value > 0xFFFF || *value < 0 { - code.push(format!("\tlui {}, {}", (value >> 16) & 0xFFFF, reg)); - } - - Ok((reg, code)) - } - - Expression::Variable { name, .. } => { - let (reg, load_code) = self.allocator.load_var(name)?; - code.extend(load_code); - Ok((reg, code)) - } - - Expression::Binary { op, left, right } => { - // Evaluate left operand - let (left_reg, left_code) = self.generate_expression(left)?; - code.extend(left_code); - - // Evaluate right operand - let (right_reg, right_code) = self.generate_expression(right)?; - code.extend(right_code); - - // Allocate result register - let (result_reg, result_alloc) = self.allocator.alloc_temp()?; - code.extend(result_alloc); - - // Generate operation - match op { - BinaryOperator::Add => { - code.push(format!( - "\tadd {}, {}, {}", - left_reg, right_reg, result_reg - )); - } - BinaryOperator::Sub => { - code.push(format!( - "\tsub {}, {}, {}", - left_reg, right_reg, result_reg - )); - } - BinaryOperator::Mul => { - self.include("maths", "./lib/maths/core.dsa"); - // Call multiply function - code.push(format!("\tpush {}", right_reg)); - code.push(format!("\tpush {}", left_reg)); - code.push("\tcall maths::multiply".to_string()); - code.push(format!("\tpop {}", result_reg)); - code.push("\tpop zero".to_string()); - } - // Comparison operators - return 1 (true) or 0 (false) - BinaryOperator::Eq => { - code.push(format!("\tcmp {}, {}", left_reg, right_reg)); - code.push(format!("\tlli 0, {}", result_reg)); - let end_label = format!("_cmp_end_{}", self.get_unique_label()); - code.push(format!("\tjne {}", end_label)); // If not equal, skip setting to 1 - code.push(format!("\tlli 1, {}", result_reg)); - code.push(format!("{}:", end_label)); - } - BinaryOperator::Ne => { - code.push(format!("\tcmp {}, {}", left_reg, right_reg)); - code.push(format!("\tlli 0, {}", result_reg)); - let end_label = format!("_cmp_end_{}", self.get_unique_label()); - code.push(format!("\tjeq {}", end_label)); // If equal, skip setting to 1 - code.push(format!("\tlli 1, {}", result_reg)); - code.push(format!("{}:", end_label)); - } - BinaryOperator::Lt => { - code.push(format!("\tcmp {}, {}", left_reg, right_reg)); - code.push(format!("\tlli 0, {}", result_reg)); - let end_label = format!("_cmp_end_{}", self.get_unique_label()); - code.push(format!("\tjge {}", end_label)); // If greater or equal, skip setting to 1 - code.push(format!("\tlli 1, {}", result_reg)); - code.push(format!("{}:", end_label)); - } - BinaryOperator::Le => { - code.push(format!("\tcmp {}, {}", left_reg, right_reg)); - code.push(format!("\tlli 0, {}", result_reg)); - let end_label = format!("_cmp_end_{}", self.get_unique_label()); - code.push(format!("\tjgt {}", end_label)); // If greater than, skip setting to 1 - code.push(format!("\tlli 1, {}", result_reg)); - code.push(format!("{}:", end_label)); - } - BinaryOperator::Gt => { - code.push(format!("\tcmp {}, {}", left_reg, right_reg)); - code.push(format!("\tlli 0, {}", result_reg)); - let end_label = format!("_cmp_end_{}", self.get_unique_label()); - code.push(format!("\tjle {}", end_label)); // If less or equal, skip setting to 1 - code.push(format!("\tlli 1, {}", result_reg)); - code.push(format!("{}:", end_label)); - } - BinaryOperator::Ge => { - code.push(format!("\tcmp {}, {}", left_reg, right_reg)); - code.push(format!("\tlli 0, {}", result_reg)); - let end_label = format!("_cmp_end_{}", self.get_unique_label()); - code.push(format!("\tjlt {}", end_label)); // If less than, skip setting to 1 - code.push(format!("\tlli 1, {}", result_reg)); - code.push(format!("{}:", end_label)); - } - _ => return Err(format!("Unsupported binary operator: {:?}", op)), - } - - // Free operand registers (allocator will protect variables) - self.allocator.free_temp(&left_reg); - self.allocator.free_temp(&right_reg); - - Ok((result_reg, code)) - } - - Expression::Call { name, args } => { - // Save caller-saved registers and track which ones we saved - let saved_regs = self.allocator.get_caller_saved_registers(); - for reg in &saved_regs { - code.push(format!("\tpush {}", reg)); - } - - // Evaluate and push arguments in reverse order - let mut arg_regs = Vec::new(); - for arg in args.iter().rev() { - let (arg_reg, arg_code) = self.generate_expression(arg)?; - code.extend(arg_code); - code.push(format!("\tpush {}", arg_reg)); - arg_regs.push(arg_reg); - } - - if GLOBAL_METHODS.contains_key(name.as_str()) { - code.push(format!("\tcall {}", GLOBAL_METHODS[name.as_str()])); - } else if self.symbols.contains(name) { - // Call local function - code.push(format!("\tcall {}", name)); - } else { - return Err(format!("undefined function {name}")); - } - - // Result is in rg0, allocate a register and move it - let (result_reg, result_alloc) = self.allocator.alloc_temp()?; - - code.extend(result_alloc); - code.push(format!("\tpop {}", result_reg)); - - // Clean up arguments - if args.len() > 1 { - for _ in 0..(args.len() - 1) { - code.push("\tpop zero".to_string()); - } - } - - // Restore caller-saved registers in reverse order (LIFO) - for reg in saved_regs.iter().rev() { - code.push(format!("\tpop {}", reg)); - } - - // Free argument registers - for reg in arg_regs { - self.allocator.free_temp(®); - } - - Ok((result_reg, code)) - } - - Expression::Unary { op, operand } => { - let (operand_reg, operand_code) = self.generate_expression(operand)?; - code.extend(operand_code); - - let (result_reg, result_alloc) = self.allocator.alloc_temp()?; - code.extend(result_alloc); - - match op { - UnaryOperator::Minus => { - // Negate: result = 0 - operand - code.push(format!("\tsub zero, {}, {}", operand_reg, result_reg)); - } - UnaryOperator::Plus => { - // Just move - code.push(format!("\tmov {}, {}", operand_reg, result_reg)); - } - } - - self.allocator.free_temp(&operand_reg); - Ok((result_reg, code)) - } - - Expression::Empty => Ok(("zero".to_string(), code)), - } - } - - // Helper for generating unique labels - fn get_unique_label(&mut self) -> String { - // You'd implement a counter here - static COUNTER: AtomicU32 = AtomicU32::new(0); - - let val = COUNTER.fetch_add(1, std::sync::atomic::Ordering::SeqCst); - (val + 1).to_string() - } -} - -/// Build a single string from any number of arguments. -/// Each argument must implement `Display` or be convertible to a string. -#[macro_export] -macro_rules! dsa { - ($($arg:expr),* $(,)?) => {{ - // Start with an empty String – we’ll grow it as we go. - use std::fmt::Write; - let mut s = ::std::string::String::new(); - $( - // `write!` is cheaper than `format!` for each element - // because it re‑uses the same buffer. - - write!(s, "{}\n", $arg).expect("write to String failed"); - )* - s - }}; -} - -// ──────────────────────── dsa! ──────────────────────── -// A tiny helper that just turns its token‑stream into a string. -// The trailing comma is kept – it’s part of the syntax you want. -#[macro_export] -macro_rules! cmd { - ($($tokens:tt)*) => {{ - // We’ll just stringify the tokens and return a String. - format!("{}", concat!(stringify!($tokens), "\n")) - }}; -} - -// ──────────────────────── block! ──────────────────────── -// Usage: -// -// let asm = block![ "name" -// dsa![mov rg0, rg1], -// dsa![add rg1, rg1] -// ]; -// -// `asm` is a `&'static str` containing: -// -// name: -// mov rg0, rg1 -// add rg1, rg1 -// -#[macro_export] -macro_rules! block { - // The first token must be a string literal – that’s the label. - ($label:literal $(dsa![$($ins:tt)*]),* ) => {{ - // Build a single string at compile time. - const CODE: &str = concat!( - $label, ":\n", - // Each `dsa!` call yields a string like `"mov rg0, rg1"`. - // We add a newline after each one to get the desired layout. - $(concat!("\t", stringify!($($ins)*), "\n")),* - ); - CODE - }}; -} - -#[macro_export] -macro_rules! comment { - ($text:expr) => {{ format!("// {}", $text) }}; -} diff --git a/c_compiler/src/lexer.rs b/c_compiler/src/lexer.rs deleted file mode 100644 index 60cf402..0000000 --- a/c_compiler/src/lexer.rs +++ /dev/null @@ -1,335 +0,0 @@ -// ============================================================================ -// Token Types -// ============================================================================ - -#[derive(Debug, Clone, PartialEq)] -pub enum TokenType { - // Keywords - Int, - If, - Else, - While, - Return, - Include, - - // Identifiers and literals - Identifier(String), - Number(i32), - String(String), - Char(char), - - // Operators - Plus, - Minus, - Star, - Slash, - Assign, - Eq, - Ne, - Lt, - Gt, - Le, - Ge, - - // Delimiters - LParen, - RParen, - LBrace, - RBrace, - Semicolon, - Comma, - Colon, - Namespace, - - Eof, -} - -pub enum Type { - Int32, - Int16, - Int8, - Uint32, - Uint16, - Uint8, - Char, -} - -#[derive(Debug, Clone)] -pub struct Token { - pub token_type: TokenType, - pub line: usize, - pub col: usize, -} - -impl Token { - pub fn new(token_type: TokenType, line: usize, col: usize) -> Self { - Self { - token_type, - line, - col, - } - } -} - -// ============================================================================ -// Lexer -// ============================================================================ - -pub struct Lexer { - source: Vec, - pos: usize, - line: usize, - col: usize, -} - -impl Lexer { - pub fn new(source: &str) -> Self { - Self { - source: source.chars().collect(), - pos: 0, - line: 1, - col: 1, - } - } - - fn error(&self, msg: &str) -> String { - format!( - "Lexer error at line {}, col {}: {}", - self.line, self.col, msg - ) - } - - fn peek(&self, offset: usize) -> Option { - self.source.get(self.pos + offset).copied() - } - - fn advance(&mut self) -> Option { - if self.pos >= self.source.len() { - return None; - } - let ch = self.source[self.pos]; - self.pos += 1; - if ch == '\n' { - self.line += 1; - self.col = 1; - } else { - self.col += 1; - } - Some(ch) - } - - fn skip_whitespace(&mut self) { - while let Some(ch) = self.peek(0) { - if ch.is_whitespace() { - self.advance(); - } else { - break; - } - } - } - - fn skip_comment(&mut self) { - if self.peek(0) == Some('/') && self.peek(1) == Some('/') { - while let Some(ch) = self.peek(0) { - if ch == '\n' { - break; - } - self.advance(); - } - } - } - - fn read_number(&mut self) -> i32 { - let mut num_str = String::new(); - while let Some(ch) = self.peek(0) { - if ch.is_ascii_digit() { - num_str.push(ch); - self.advance(); - } else { - break; - } - } - num_str.parse().unwrap_or(0) - } - - fn read_identifier(&mut self) -> String { - let mut ident = String::new(); - while let Some(ch) = self.peek(0) { - if ch.is_alphanumeric() || ch == '_' { - ident.push(ch); - self.advance(); - } else { - break; - } - } - ident - } - - fn read_string(&mut self) -> Result { - let mut string = String::new(); - self.advance(); // Consume the opening quote - - while let Some(ch) = self.peek(0) { - if ch == '"' { - self.advance(); // Consume the closing quote - return Ok(string); - } else if ch == '\\' { - self.advance(); // Consume the backslash - if let Some(escaped_char) = self.peek(0) { - string.push(escaped_char); - self.advance(); - } - } else { - string.push(ch); - self.advance(); - } - } - - Err(String::from("Unexpected EOF")) - } - - fn read_char(&mut self) -> Result { - self.advance(); // Consume the opening quote - - if let Some(ch) = self.peek(0) { - self.advance(); - if self.peek(0) == Some('\'') { - self.advance(); - return Ok(ch); - } else { - Err(String::from("expected closing quote")) - } - } else { - Err(String::from("expected character")) - } - } - - pub fn tokenize(&mut self) -> Result, String> { - let mut tokens = Vec::new(); - - loop { - self.skip_whitespace(); - self.skip_comment(); - - if self.pos >= self.source.len() { - break; - } - - let line = self.line; - let col = self.col; - let ch = self.peek(0).unwrap(); - - let token_type = if ch.is_ascii_digit() { - let num = self.read_number(); - TokenType::Number(num) - } else if ch == '"' { - let string = self.read_string()?; - TokenType::String(string) - } else if ch == '\'' { - let char = self.read_char()?; - TokenType::Char(char) - } else if ch.is_alphabetic() || ch == '_' { - let ident = self.read_identifier(); - match ident.as_str() { - "int" => TokenType::Int, - "if" => TokenType::If, - "else" => TokenType::Else, - "while" => TokenType::While, - "return" => TokenType::Return, - "include" => TokenType::Include, - _ => TokenType::Identifier(ident), - } - } else { - match ch { - ':' if self.peek(1) == Some(':') => { - self.advance(); - self.advance(); - TokenType::Namespace - } - ':' => { - self.advance(); - TokenType::Colon - } - '=' if self.peek(1) == Some('=') => { - self.advance(); - self.advance(); - TokenType::Eq - } - '!' if self.peek(1) == Some('=') => { - self.advance(); - self.advance(); - TokenType::Ne - } - '<' if self.peek(1) == Some('=') => { - self.advance(); - self.advance(); - TokenType::Le - } - '>' if self.peek(1) == Some('=') => { - self.advance(); - self.advance(); - TokenType::Ge - } - '+' => { - self.advance(); - TokenType::Plus - } - '-' => { - self.advance(); - TokenType::Minus - } - '*' => { - self.advance(); - TokenType::Star - } - '/' => { - self.advance(); - TokenType::Slash - } - '=' => { - self.advance(); - TokenType::Assign - } - '<' => { - self.advance(); - TokenType::Lt - } - '>' => { - self.advance(); - TokenType::Gt - } - '(' => { - self.advance(); - TokenType::LParen - } - ')' => { - self.advance(); - TokenType::RParen - } - '{' => { - self.advance(); - TokenType::LBrace - } - '}' => { - self.advance(); - TokenType::RBrace - } - ';' => { - self.advance(); - TokenType::Semicolon - } - ',' => { - self.advance(); - TokenType::Comma - } - _ => return Err(self.error(&format!("Unexpected character: {}", ch))), - } - }; - - tokens.push(Token::new(token_type, line, col)); - } - - tokens.push(Token::new(TokenType::Eof, self.line, self.col)); - Ok(tokens) - } -} diff --git a/c_compiler/src/main.rs b/c_compiler/src/main.rs deleted file mode 100644 index 06cbfed..0000000 --- a/c_compiler/src/main.rs +++ /dev/null @@ -1,74 +0,0 @@ -use std::fmt; - -use crate::{codegen::CodeGenerator, lexer::Lexer, parser::Parser}; - -// mod assembly; -pub mod codegen; -pub mod lexer; -pub mod parser; -mod registers; - -// ============================================================================ -// Main & Tests -// ============================================================================ - -fn main() { - // read from input file: syntax "c_compiler [output.dsa]" - let args: Vec = std::env::args().collect(); - if args.len() < 2 { - eprintln!("Usage: c_compiler [output.dsa]"); - return; - } - - let input_file = &args[1]; - let output_file = if args.len() > 2 { - &args[2] - } else { - "output.dsa" - }; - - // read input - let input = std::fs::read_to_string(input_file).expect("Failed to read input file"); - - // Lexing - let mut lexer = Lexer::new(&input); - let tokens = match lexer.tokenize() { - Ok(tokens) => tokens, - Err(e) => { - eprintln!("Lexing error: {}", e); - return; - } - }; - - println!("Tokens:"); - for token in &tokens { - println!(" {:?}", token.token_type); - } - println!(); - - // Parsing - let mut parser = Parser::new(tokens); - let ast = match parser.parse() { - Ok(ast) => ast, - Err(e) => { - eprintln!("Parsing error: {}", e); - return; - } - }; - - println!("AST:"); - println!("{:#?}", ast); - - // Code Gen - let mut generator = CodeGenerator::new(ast); - let result = match generator.generate() { - Ok(code) => code, - Err(e) => { - eprintln!("Parsing error: {}", e); - return; - } - }; - - std::fs::write(output_file, &result).expect("Failed to write output"); - println!("Result written to {}", output_file); -} diff --git a/c_compiler/src/parser.rs b/c_compiler/src/parser.rs deleted file mode 100644 index 86f2b00..0000000 --- a/c_compiler/src/parser.rs +++ /dev/null @@ -1,610 +0,0 @@ -// ============================================================================ -// AST Node Types -// ============================================================================ - -use std::fmt; - -use crate::lexer::{Token, TokenType}; - -#[derive(Debug, Clone)] -pub struct Program { - pub declarations: Vec, -} - -#[derive(Debug, Clone)] -pub enum Declaration { - Function { - name: String, - return_type: Type, - params: Vec, - body: Block, - }, - Variable { - name: String, - init: Option, - }, - Import { - name: String, - path: String, - }, -} - -#[derive(Debug, Clone)] -pub struct Parameter { - pub name: String, - pub param_type: Type, -} - -#[derive(Debug, Clone)] -pub enum Type { - Int, - Long, - Float, - Double, - Char, - Void, - Ptr(Box), - Array(Box, usize), - Struct(String), -} - -pub type Block = Vec; - -#[derive(Debug, Clone)] -pub enum Statement { - Block(Block), - Assign { - // left side - name: String, - declare_type: Option, - - // right side - value: Option>, - }, - Expression { - expr: Expression, - }, - If { - condition: Expression, - then_stmt: Block, - else_stmt: Block, - }, - While { - condition: Expression, - body: Vec, - }, - Return { - expr: Option, - }, -} - -#[derive(Debug, Clone)] -pub enum ConstExpr { - Number(i32), - String(String), -} - -impl fmt::Display for ConstExpr { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - ConstExpr::Number(n) => write!(f, "{}", n), - ConstExpr::String(s) => write!(f, "\"{}\"", s), - } - } -} - -#[derive(Debug, Clone)] -pub enum Expression { - Empty, - Binary { - op: BinaryOperator, - left: Box, - right: Box, - }, - Unary { - op: UnaryOperator, - operand: Box, - }, - Variable { - name: String, - expr_type: Option, - }, - Number { - value: i32, - }, - Call { - name: String, - args: Vec, - }, -} - -#[derive(Debug, Clone, PartialEq)] -pub enum BinaryOperator { - Add, - Sub, - Mul, - Div, - Eq, - Ne, - Lt, - Gt, - Le, - Ge, -} - -impl fmt::Display for BinaryOperator { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - BinaryOperator::Add => write!(f, "+"), - BinaryOperator::Sub => write!(f, "-"), - BinaryOperator::Mul => write!(f, "*"), - BinaryOperator::Div => write!(f, "/"), - BinaryOperator::Eq => write!(f, "=="), - BinaryOperator::Ne => write!(f, "!="), - BinaryOperator::Lt => write!(f, "<"), - BinaryOperator::Gt => write!(f, ">"), - BinaryOperator::Le => write!(f, "<="), - BinaryOperator::Ge => write!(f, ">="), - } - } -} - -#[derive(Debug, Clone, PartialEq)] -pub enum UnaryOperator { - Plus, - Minus, -} - -impl fmt::Display for UnaryOperator { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - UnaryOperator::Plus => write!(f, "+"), - UnaryOperator::Minus => write!(f, "-"), - } - } -} - -// ============================================================================ -// Parser -// ============================================================================ - -pub struct Parser { - tokens: Vec, - pos: usize, -} - -impl Parser { - pub fn new(tokens: Vec) -> Self { - Self { tokens, pos: 0 } - } - - fn error(&self, msg: &str) -> String { - let token = self.current(); - format!( - "Parser error at line {}, col {}: {}", - token.line, token.col, msg - ) - } - - fn current(&self) -> &Token { - self.tokens - .get(self.pos) - .unwrap_or_else(|| self.tokens.last().unwrap()) - } - - fn peek(&self, offset: usize) -> &Token { - self.tokens - .get(self.pos + offset) - .unwrap_or_else(|| self.tokens.last().unwrap()) - } - - fn advance(&mut self) -> &Token { - if self.pos < self.tokens.len() - 1 { - self.pos += 1; - } - self.current() - } - - fn expect(&mut self, expected: TokenType) -> Result { - let token = self.current().clone(); - if std::mem::discriminant(&token.token_type) != std::mem::discriminant(&expected) - { - return Err(self.error(&format!( - "Expected {:?}, got {:?}", - expected, token.token_type - ))); - } - self.advance(); - Ok(token) - } - - pub fn parse(&mut self) -> Result { - let mut declarations = Vec::new(); - - while !matches!(self.current().token_type, TokenType::Eof) { - declarations.push(self.parse_declaration()?); - } - - Ok(Program { declarations }) - } - - fn parse_declaration(&mut self) -> Result { - // check for an import - if let TokenType::Include = self.current().token_type { - self.advance(); - - let name = - if let TokenType::Identifier(id) = self.current().clone().token_type { - Some(id) - } else { - None - } - .ok_or(String::from("Expected identifier"))?; - - self.advance(); - self.expect(TokenType::Colon)?; - - let path = if let TokenType::String(id) = self.current().clone().token_type { - Some(id) - } else { - None - } - .ok_or(String::from("Expected string literal"))?; - - self.advance(); - return Ok(Declaration::Import { name, path }); - } - - self.expect(TokenType::Int)?; - - let name = match &self.current().token_type { - TokenType::Identifier(s) => s.clone(), - _ => return Err(self.error("Expected identifier")), - }; - self.advance(); - - match &self.current().token_type { - TokenType::LParen => { - // Function declaration - self.advance(); - let mut params = Vec::::new(); - - if !matches!(self.current().token_type, TokenType::RParen) { - self.expect(TokenType::Int)?; - - match &self.current().token_type { - TokenType::Identifier(s) => { - params.push(Parameter { - name: s.clone(), - param_type: Type::Int, - }); - self.advance(); - } - _ => return Err(self.error("Expected parameter name")), - } - - while matches!(self.current().token_type, TokenType::Comma) { - self.advance(); - self.expect(TokenType::Int)?; - - match &self.current().token_type { - TokenType::Identifier(s) => { - params.push(Parameter { - name: s.clone(), - param_type: Type::Int, - }); - self.advance(); - } - _ => return Err(self.error("Expected parameter name")), - } - } - } - - self.expect(TokenType::RParen)?; - let body = self.parse_block()?; - - Ok(Declaration::Function { - name, - params, - body, - return_type: Type::Int, - }) - } - _ => { - // Variable declaration - let init = if matches!(self.current().token_type, TokenType::Assign) { - self.advance(); - - if let TokenType::Number(n) = self.current().token_type { - self.advance(); - Some(ConstExpr::Number(n)) - } else { - return Err(self - .error("Expected constant in global variable declaration")); - } - } else { - None - }; - - self.expect(TokenType::Semicolon)?; - Ok(Declaration::Variable { name, init }) - } - } - } - - fn parse_block(&mut self) -> Result { - self.expect(TokenType::LBrace)?; - let mut statements = Vec::new(); - - while !matches!(self.current().token_type, TokenType::RBrace) { - statements.push(self.parse_statement()?); - } - - self.expect(TokenType::RBrace)?; - Ok(statements) - } - - fn parse_statement(&mut self) -> Result { - match &self.current().token_type { - TokenType::LBrace => Ok(Statement::Block(self.parse_block()?)), - TokenType::If => self.parse_if_stmt(), - TokenType::While => self.parse_while_stmt(), - TokenType::Return => self.parse_return_stmt(), - TokenType::Identifier(name) => { - let name = name.clone(); - - // peek ahead for open paren (func call expr) - if matches!(self.peek(1).token_type, TokenType::LParen) { - let expr = self.parse_expression()?; // a function call expr - self.expect(TokenType::Semicolon)?; - return Ok(Statement::Expression { expr }); - } - - self.advance(); // advance past identifier - - // assignment expression - if matches!(self.current().token_type, TokenType::Assign) { - self.advance(); - let expr = self.parse_expression()?; - - self.expect(TokenType::Semicolon)?; - Ok(Statement::Assign { - name, - value: Some(Box::new(expr)), - declare_type: None, - }) - } - // var expression - else { - self.expect(TokenType::Semicolon)?; - Ok(Statement::Expression { - expr: Expression::Variable { - name, - expr_type: None, - }, - }) - } - } - TokenType::Int => { - // Local variable declaration - self.advance(); - let name = match &self.current().token_type { - TokenType::Identifier(s) => s.clone(), - _ => return Err(self.error("Expected variable name")), - }; - self.advance(); - - let init = if matches!(self.current().token_type, TokenType::Assign) { - self.advance(); - Some(self.parse_expression()?) - } else { - None - }; - - self.expect(TokenType::Semicolon)?; - - // Convert to assignment expression statement - let expr = if let Some(init_expr) = init { - Statement::Assign { - name, - value: Some(Box::new(init_expr)), - declare_type: Some(Type::Int), - } - } else { - Statement::Assign { - name, - value: None, - declare_type: Some(Type::Int), - } - }; - - Ok(expr) - } - _ => { - let expr = if matches!(self.current().token_type, TokenType::Semicolon) { - Expression::Empty - } else { - self.parse_expression()? - }; - - self.expect(TokenType::Semicolon)?; - Ok(Statement::Expression { expr }) - } - } - } - - fn parse_if_stmt(&mut self) -> Result { - self.expect(TokenType::If)?; - self.expect(TokenType::LParen)?; - let condition = self.parse_expression()?; - self.expect(TokenType::RParen)?; - let then_stmt = self.parse_block()?; - - let else_stmt = if matches!(self.current().token_type, TokenType::Else) { - self.advance(); - self.parse_block()? - } else { - Vec::new() - }; - - Ok(Statement::If { - condition, - then_stmt, - else_stmt, - }) - } - - fn parse_while_stmt(&mut self) -> Result { - self.expect(TokenType::While)?; - self.expect(TokenType::LParen)?; - let condition = self.parse_expression()?; - self.expect(TokenType::RParen)?; - let body = self.parse_block()?; - - Ok(Statement::While { condition, body }) - } - - fn parse_return_stmt(&mut self) -> Result { - self.expect(TokenType::Return)?; - - let expr = if matches!(self.current().token_type, TokenType::Semicolon) { - None - } else { - Some(self.parse_expression()?) - }; - - self.expect(TokenType::Semicolon)?; - Ok(Statement::Return { expr }) - } - - fn parse_expression(&mut self) -> Result { - self.parse_comparison() - } - - fn parse_comparison(&mut self) -> Result { - let mut expr = self.parse_additive()?; - - while let Some(op) = match &self.current().token_type { - TokenType::Eq => Some(BinaryOperator::Eq), - TokenType::Ne => Some(BinaryOperator::Ne), - TokenType::Lt => Some(BinaryOperator::Lt), - TokenType::Gt => Some(BinaryOperator::Gt), - TokenType::Le => Some(BinaryOperator::Le), - TokenType::Ge => Some(BinaryOperator::Ge), - _ => None, - } { - self.advance(); - let right = Box::new(self.parse_additive()?); - expr = Expression::Binary { - op, - left: Box::new(expr), - right, - }; - } - - Ok(expr) - } - - fn parse_additive(&mut self) -> Result { - let mut expr = self.parse_multiplicative()?; - - while let Some(op) = match &self.current().token_type { - TokenType::Plus => Some(BinaryOperator::Add), - TokenType::Minus => Some(BinaryOperator::Sub), - _ => None, - } { - self.advance(); - let right = Box::new(self.parse_multiplicative()?); - expr = Expression::Binary { - op, - left: Box::new(expr), - right, - }; - } - - Ok(expr) - } - - fn parse_multiplicative(&mut self) -> Result { - let mut expr = self.parse_unary()?; - - while let Some(op) = match &self.current().token_type { - TokenType::Star => Some(BinaryOperator::Mul), - TokenType::Slash => Some(BinaryOperator::Div), - _ => None, - } { - self.advance(); - let right = Box::new(self.parse_unary()?); - expr = Expression::Binary { - op, - left: Box::new(expr), - right, - }; - } - - Ok(expr) - } - - fn parse_unary(&mut self) -> Result { - let op = match &self.current().token_type { - TokenType::Plus => Some(UnaryOperator::Plus), - TokenType::Minus => Some(UnaryOperator::Minus), - _ => None, - }; - - if let Some(op) = op { - self.advance(); - let operand = Box::new(self.parse_unary()?); - return Ok(Expression::Unary { op, operand }); - } - - self.parse_primary() - } - - fn parse_primary(&mut self) -> Result { - match &self.current().token_type.clone() { - TokenType::Number(n) => { - let value = *n; - self.advance(); - Ok(Expression::Number { value }) - } - TokenType::Identifier(name) => { - let name = name.clone(); - self.advance(); - - if matches!(self.current().token_type, TokenType::LParen) { - // Function call - self.advance(); - let mut args = Vec::new(); - - if !matches!(self.current().token_type, TokenType::RParen) { - args.push(self.parse_expression()?); - - while matches!(self.current().token_type, TokenType::Comma) { - self.advance(); - args.push(self.parse_expression()?); - } - } - - self.expect(TokenType::RParen)?; - Ok(Expression::Call { name, args }) - } else { - Ok(Expression::Variable { - name, - expr_type: None, - }) - } - } - TokenType::LParen => { - self.advance(); - let expr = self.parse_expression()?; - self.expect(TokenType::RParen)?; - Ok(expr) - } - _ => Err(self.error(&format!( - "Unexpected token: {:?}", - self.current().token_type - ))), - } - } -} diff --git a/c_compiler/src/registers.rs b/c_compiler/src/registers.rs deleted file mode 100644 index 1d042e9..0000000 --- a/c_compiler/src/registers.rs +++ /dev/null @@ -1,344 +0,0 @@ -use std::collections::HashMap; - -/// Register allocator for DSA assembly generation -/// Manages general-purpose registers (rg0-rgf) and handles stack spilling -pub struct RegisterAllocator { - /// Available general-purpose registers - available_registers: Vec, - - /// Maps variable names to their current location (register or stack offset) - variable_locations: HashMap, - - /// Maps registers to the variables they currently hold - register_contents: HashMap, - - /// Current stack offset for local variables (relative to bpr) - /// Starts at -4 (going downward from base pointer) - stack_offset: i32, - - /// Track which registers are currently in use - in_use: HashMap, -} - -#[derive(Debug, Clone)] -pub enum Location { - Register(String), - Stack(i32), // offset from bpr -} - -impl RegisterAllocator { - pub fn new() -> Self { - // Initialize with available GP registers (rg0-rgf = 16 registers) - let registers = vec![ - "rg0", "rg1", "rg2", "rg3", "rg4", "rg5", "rg6", "rg7", "rg8", "rg9", "rga", - "rgb", "rgc", "rgd", "rge", "rgf", - ] - .into_iter() - .map(String::from) - .collect(); - - RegisterAllocator { - available_registers: registers, - variable_locations: HashMap::new(), - register_contents: HashMap::new(), - stack_offset: -4, // Start at -4 (first local below saved bpr) - in_use: HashMap::new(), - } - } - - /// Allocate a temporary register for expression evaluation - /// Returns the register name and optionally assembly code to save it - pub fn alloc_temp(&mut self) -> Result<(String, Vec), String> { - let mut code = Vec::new(); - - // Try to find an unused register - for reg in &self.available_registers { - if !self.in_use.get(reg).unwrap_or(&false) { - self.in_use.insert(reg.clone(), true); - return Ok((reg.clone(), code)); - } - } - - // All registers in use - need to spill one - // Choose the first register with a variable we can spill - // Find a register to spill - let reg_to_spill = self - .available_registers - .iter() - .find(|reg| self.register_contents.contains_key(*reg)) - .cloned(); - - if let Some(reg) = reg_to_spill { - // Spill this variable to stack - let spill_code = self.spill_register(®)?; - code.extend(spill_code); - - self.in_use.insert(reg.clone(), true); - return Ok((reg, code)); - } - - Err("No registers available and nothing to spill".to_string()) - } - - /// Free a temporary register after use - /// NOTE: This will NOT free registers that contain variables! - /// Variables persist throughout their scope and must not be freed - pub fn free_temp(&mut self, reg: &str) { - // Check if this register contains a variable - if self.register_contents.contains_key(reg) { - // This register holds a variable - don't free it! - // Variables are only freed when they go out of scope via free_var() - return; - } - - // This is a true temporary - safe to free - self.in_use.insert(reg.to_string(), false); - } - - /// Allocate a register for a named variable - /// Returns the register and any necessary assembly code - pub fn alloc_var(&mut self, var_name: &str) -> Result<(String, Vec), String> { - // Check if variable already has a location - if let Some(location) = self.variable_locations.get(var_name).cloned() { - match location { - Location::Register(reg) => { - return Ok((reg.clone(), Vec::new())); - } - Location::Stack(offset) => { - // Variable is on stack, load it into a register - let (reg, mut code) = self.alloc_temp()?; - code.push(format!("\tldw bpr, {}, {}", reg, offset)); - - // Update location to register - self.variable_locations - .insert(var_name.to_string(), Location::Register(reg.clone())); - self.register_contents - .insert(reg.clone(), var_name.to_string()); - - return Ok((reg, code)); - } - } - } - - // Variable doesn't have a location yet, allocate a new register - let (reg, code) = self.alloc_temp()?; - self.variable_locations - .insert(var_name.to_string(), Location::Register(reg.clone())); - self.register_contents - .insert(reg.clone(), var_name.to_string()); - - Ok((reg, code)) - } - - /// Get the current location of a variable - pub fn get_var_location(&self, var_name: &str) -> Option<&Location> { - self.variable_locations.get(var_name) - } - - /// Load a variable into a register (allocating if necessary) - /// Returns the register and assembly code to load it - pub fn load_var(&mut self, var_name: &str) -> Result<(String, Vec), String> { - self.alloc_var(var_name) - } - - /// Store a value from a register into a variable - /// Updates tracking and returns any necessary assembly code - pub fn store_var(&mut self, var_name: &str, source_reg: &str) -> Vec { - let mut code = Vec::new(); - - // Check if variable already has a location - if let Some(location) = self.variable_locations.get(var_name) { - match location { - Location::Register(dest_reg) => { - if dest_reg != source_reg { - code.push(format!("\tmov {}, {}", source_reg, dest_reg)); - } - } - Location::Stack(offset) => { - code.push(format!("\tstw {}, bpr, {}", source_reg, offset)); - } - } - } else { - // Variable doesn't exist yet - try to allocate a register - if let Some(free_reg) = self.find_free_register() { - if &free_reg != source_reg { - code.push(format!("\tmov {}, {}", source_reg, free_reg)); - } - self.variable_locations - .insert(var_name.to_string(), Location::Register(free_reg.clone())); - self.register_contents - .insert(free_reg.clone(), var_name.to_string()); - self.in_use.insert(free_reg, true); - } else { - // No free registers - allocate on stack - code.push(format!("\tstw {}, bpr, {}", source_reg, self.stack_offset)); - self.variable_locations - .insert(var_name.to_string(), Location::Stack(self.stack_offset)); - self.stack_offset -= 4; // Move to next stack slot - } - } - - code - } - - /// Spill a register to the stack - /// Returns assembly code to perform the spill - fn spill_register(&mut self, reg: &str) -> Result, String> { - let mut code = Vec::new(); - - if let Some(var_name) = self.register_contents.get(reg).cloned() { - // Store register content to stack - code.push(format!("\tstw {}, bpr, {}", reg, self.stack_offset)); - - // Update variable location - self.variable_locations - .insert(var_name.clone(), Location::Stack(self.stack_offset)); - - // Remove from register tracking - self.register_contents.remove(reg); - - // Move to next stack slot - self.stack_offset -= 4; - } - - Ok(code) - } - - /// Find a free register (not currently in use) - fn find_free_register(&self) -> Option { - for reg in &self.available_registers { - if !self.in_use.get(reg).unwrap_or(&false) { - return Some(reg.clone()); - } - } - None - } - - /// Spill all registers to stack (useful before function calls) - pub fn spill_all(&mut self) -> Vec { - let mut code = Vec::new(); - - let regs_to_spill: Vec = self.register_contents.keys().cloned().collect(); - - for reg in regs_to_spill { - if let Ok(spill_code) = self.spill_register(®) { - code.extend(spill_code); - } - } - - code - } - - /// Get the total stack space needed for local variables - pub fn get_stack_size(&self) -> i32 { - -self.stack_offset // Convert negative offset to positive size - } - - /// Reset allocator for a new function - pub fn reset(&mut self) { - self.variable_locations.clear(); - self.register_contents.clear(); - self.stack_offset = -4; - self.in_use.clear(); - } - - /// Mark a variable as dead (no longer needed) - /// Frees its register if it's in one - pub fn free_var(&mut self, var_name: &str) { - if let Some(Location::Register(reg)) = self.variable_locations.get(var_name) { - let reg = reg.clone(); - self.register_contents.remove(®); - self.in_use.insert(reg, false); - } - self.variable_locations.remove(var_name); - } - - /// Get list of registers that contain variables and are in use - /// These need to be saved before function calls - pub fn get_caller_saved_registers(&self) -> Vec { - self.register_contents - .iter() - .filter(|(reg, _)| *self.in_use.get(*reg).unwrap_or(&false)) - .map(|(reg, _)| reg.clone()) - .collect() - } - - /// Save caller-saved registers before a function call - /// Returns assembly code to save them - pub fn save_caller_saved(&mut self) -> Vec { - let mut code = Vec::new(); - - // For simplicity, save all currently used registers - // In a more sophisticated compiler, you'd only save registers that are live - for (reg, var_name) in self.register_contents.clone() { - if *self.in_use.get(®).unwrap_or(&false) { - code.push(format!("\tpush {}", reg)); - } - } - - code - } - - /// Restore caller-saved registers after a function call - /// Returns assembly code to restore them - pub fn restore_caller_saved(&mut self, saved_regs: &[String]) -> Vec { - let mut code = Vec::new(); - - // Restore in reverse order (LIFO) - for reg in saved_regs.iter().rev() { - code.push(format!("\tpop {}", reg)); - } - - code - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_basic_allocation() { - let mut allocator = RegisterAllocator::new(); - - let (reg1, code1) = allocator.alloc_temp().unwrap(); - assert_eq!(code1.len(), 0); // No spill needed - assert_eq!(reg1, "rg0"); - - let (reg2, code2) = allocator.alloc_temp().unwrap(); - assert_eq!(code2.len(), 0); - assert_eq!(reg2, "rg1"); - - allocator.free_temp(®1); - - let (reg3, code3) = allocator.alloc_temp().unwrap(); - assert_eq!(code3.len(), 0); - assert_eq!(reg3, "rg0"); // Reuses freed register - } - - #[test] - fn test_variable_allocation() { - let mut allocator = RegisterAllocator::new(); - - let (reg, _) = allocator.alloc_var("x").unwrap(); - assert_eq!(reg, "rg0"); - - // Requesting same variable again should return same register - let (reg2, _) = allocator.alloc_var("x").unwrap(); - assert_eq!(reg2, "rg0"); - } - - #[test] - fn test_stack_allocation() { - let mut allocator = RegisterAllocator::new(); - - // Allocate all 16 registers - for i in 0..16 { - allocator.alloc_var(&format!("var{}", i)).unwrap(); - } - - // Next allocation should spill to stack - let (reg, code) = allocator.alloc_var("var16").unwrap(); - assert!(code.len() > 0); // Should have spill code - } -} diff --git a/compiler/src/codegen.rs b/compiler/src/codegen.rs deleted file mode 100644 index 6933e8a..0000000 --- a/compiler/src/codegen.rs +++ /dev/null @@ -1,756 +0,0 @@ -use std::collections::HashMap; -use std::hash::Hash; -use std::sync::LazyLock; -use std::sync::atomic::AtomicU32; -use std::time::SystemTime; - -use chrono::{DateTime, Local}; - -use crate::registers::{Location, RegisterAllocator}; -use crate::{block, cmd, comment, dsa}; - -use crate::parser::{ - BinaryOperator, CompilerError, ConstExpr, Declaration, Dependency, Expression, - Program, Statement, UnaryOperator, Variable, -}; - -pub struct CodeGenerator { - ast: Program, - imports: HashMap, - globals: Vec, - functions: Vec, - symbols: Vec, - allocator: RegisterAllocator, -} - -static GLOBAL_METHODS: LazyLock> = LazyLock::new(|| { - HashMap::from([ - // ("print", "print::print"), - // ("println", "print::println"), - // ("printnum", "print::print_num"), - // ("print_space", "print::print_whitespace"), - // ("print_newline", "print::print_newline"), - // ("print_char", "print::print_byte"), - // ("print_word", "print::print_word"), - // ("print_hex", "print::print_hex_word"), - ]) -}); - -fn import(name: &str, path: &str) -> String { - format!("include {name}: \"{}\"", path) -} - -impl CodeGenerator { - const RET: &'static str = "\tjmp _ret"; - - pub fn new(ast: Program) -> Self { - CodeGenerator { - ast, - imports: HashMap::new(), - globals: Vec::new(), - functions: Vec::new(), - symbols: Vec::new(), - allocator: RegisterAllocator::new(), - } - } - - pub fn include(&mut self, name: &str, path: &str) { - self.imports.insert(name.to_string(), path.to_string()); - } - - fn is_global(&self, name: &str) -> bool { - // Check if this variable is in the globals list - self.globals - .iter() - .any(|g| g.contains(&format!("dw {}:", name))) - } - - pub fn generate(&mut self) -> Result { - // always include the print library for debugging! - self.include("print", "./lib/io/print.dsa"); - - for block in self.ast.clone().declarations { - match block { - Declaration::Variable { - var: Variable { name, .. }, - .. - } => self.symbols.push(name), - Declaration::Function { name, .. } => self.symbols.push(name), - Declaration::Dependency(Dependency { name, .. }) => { - self.symbols.push(name) - } - } - } - - for block in self.ast.clone().declarations { - self.generate_block(block.clone())?; - } - - self.generate_layout() - } - - fn generate_layout(&mut self) -> Result { - let datetime: DateTime = SystemTime::now().into(); - Ok(dsa![ - "", - comment!("GENERATED BY DSC COMPILER"), - comment!(format!( - "Generated at {}", - datetime.format("%Y-%m-%d %H:%M:%S") - )), - "", - // imports - comment!("Imports"), - self.imports - .iter() - .map(|(k, v)| import(k, v)) - .collect::>() - .join("\n"), - "", - // reserved memory - comment!("Globals & Reserved Memory"), - self.globals.join("\n"), - "", - // entry point - comment!("Entry Point"), - "dw stack: 0x10000", - "db message: \"Process Exited with code:\"", - block! [ "_init" - dsa![ldw stack, bpr], - dsa![mov bpr, spr], - dsa![push zero], - dsa![call main], - dsa![call print::print_newline], - dsa![lwi message, rg0], - dsa![push rg0], - dsa![call print::print], - dsa![pop zero], - dsa![call print::print_hex_word], - dsa![pop zero], - dsa![hlt] - ], - "", - comment!("Return"), - block! [ "_ret" - dsa![mov bpr, spr], - dsa![pop bpr], - dsa![return] - ], - comment!("Compiled Code Starts..."), - // block! [ "main" - // dsa![push bpr], - // dsa![mov spr, bpr], - // dsa![lwi 67, rg1], - // dsa![stw rg1, spr, 8], - // dsa![mov bpr, spr], - // dsa![pop bpr], - // dsa![return] - // ], - self.functions.join("\n"), - ]) - } - - fn generate_global(&mut self, name: &str, init: Option) { - self.globals.push(format!( - "dw {}: {}", - name, - init.unwrap_or(ConstExpr::Number(0)) - )) - } - - fn generate_block(&mut self, block: Declaration) -> Result<(), CompilerError> { - match block { - Declaration::Variable { var, init, .. } => { - self.generate_global(&var.name, init) - } - Declaration::Function { - name, - return_type, - params, - body, - } => { - let func = self.generate_function(&name, ¶ms, &body).join("\n"); - - self.functions.push(format!("{func}\n")); - } - Declaration::Dependency(Dependency { name, path }) => { - self.imports.insert(name, path); - } - }; - - Ok(()) - } - - // Example: Generate code for a function - fn generate_function( - &mut self, - name: &str, - params: &[Variable], - body: &[Statement], - ) -> Vec { - let mut code = Vec::new(); - - // Reset allocator for new function - self.allocator.reset(); - - // Function prologue - code.push(format!("{}:", name)); - code.push("\tpush bpr".to_string()); - code.push("\tmov spr, bpr".to_string()); - code.push(String::new()); - - // Allocate parameters to registers or stack locations - for (i, param) in params.iter().enumerate() { - let offset = 8 + (i as i32 * 4); // Parameters start at bpr+8 - // Track that this parameter is at a stack location - let (reg, load_code) = self.allocator.alloc_var(¶m.name).unwrap(); - code.extend(load_code); - code.push(format!("\tldw bpr, {}, {}", reg, offset)); - } - - // Generate code for function body - for stmt in body { - let stmt_code = self.generate_statement(stmt).unwrap(); - code.extend(stmt_code); - } - - // automatically return at function end - if let Some(x) = code.last() - && x == Self::RET - { - } else { - code.push(Self::RET.to_string()); - } - - code - } - - // Example: Generate code for a statement - fn generate_statement( - &mut self, - stmt: &Statement, - ) -> Result, CompilerError> { - let mut code = Vec::new(); - - match stmt { - Statement::Declaration { var, value } => { - if let Some(expr) = value { - // Evaluate expression - let (result_reg, expr_code) = self.generate_expression(expr, true)?; - code.extend(expr_code); - - // Store result in variable - let store_code = self.allocator.store_var(&var.name, &result_reg); - code.extend(store_code); - - // Free temporary register - self.allocator.free_temp(&result_reg); - } else { - // Just declaring variable without initialization - self.allocator.alloc_var(&var.name)?; - } - } - - Statement::Break => unimplemented!(), - Statement::Continue => unimplemented!(), - - Statement::PtrWrite { ptr, value } => { - let (result_reg, expr_code) = self.generate_expression(value, true)?; - code.extend(expr_code); - - let (ptr_reg, ptr_code) = self.generate_expression(ptr, true)?; - code.extend(ptr_code); - - code.push(format!("\tstw {}, {}", result_reg, ptr_reg)); - - self.allocator.free_temp(&result_reg); - self.allocator.free_temp(&ptr_reg); - } - - Statement::Assign { varname, value } => { - // Evaluate expression - let (result_reg, expr_code) = self.generate_expression(value, true)?; - code.extend(expr_code); - - // Check if this is a global variable - if self.is_global(varname) { - // Store to global label - code.push(format!("\tstw {}, {}", result_reg, varname)); - } else { - // Store result in local variable - let store_code = self.allocator.store_var(varname, &result_reg); - code.extend(store_code); - } - - // Free temporary register - self.allocator.free_temp(&result_reg); - } - - Statement::Return(expr) => { - if let Some(e) = expr { - let (result_reg, expr_code) = self.generate_expression(e, true)?; - code.extend(expr_code); - code.push(format!("\tstw {}, bpr, 8", result_reg)); - code.push(format!("\tjmp _ret")); - self.allocator.free_temp(&result_reg); - } - } - - Statement::If { - condition, - then_stmt, - else_stmt, - } => { - // Generate condition - let (cond_reg, cond_code) = self.generate_expression(condition, true)?; - code.extend(cond_code); - - // Compare with zero - code.push(format!("\tcmp {}, zero", cond_reg)); - self.allocator.free_temp(&cond_reg); - - // Generate unique labels - let then_label = format!("_then_{}", self.get_unique_label()); - let else_label = format!("_else_{}", self.get_unique_label()); - let end_label = format!("_end_{}", self.get_unique_label()); - - // Jump to else if condition is false (equal to zero) - code.push(format!("\tjeq {}", else_label)); - - // Then block - code.push(format!("{}:", then_label)); - for s in then_stmt { - code.extend(self.generate_statement(s)?); - } - - if then_stmt.len() == 0 { - code.push("\tnop".to_string()); - } - - code.push(format!("\tjmp {}", end_label)); - - // Else block - code.push(format!("{}:", else_label)); - for s in else_stmt { - code.extend(self.generate_statement(s)?); - } - - if else_stmt.len() == 0 { - code.push("\tnop".to_string()); - } - - code.push(format!("{}:", end_label)); - } - - Statement::While { condition, body } => { - let loop_start = format!("_while_start_{}", self.get_unique_label()); - let loop_end = format!("_while_end_{}", self.get_unique_label()); - - code.push(format!("{}:", loop_start)); - - // Generate condition - let (cond_reg, cond_code) = self.generate_expression(condition, true)?; - code.extend(cond_code); - - code.push(format!("\tcmp {}, zero", cond_reg)); - self.allocator.free_temp(&cond_reg); - - code.push(format!("\tjeq {}", loop_end)); - - // Loop body - for s in body { - code.extend(self.generate_statement(s)?); - } - - code.push(format!("\tjmp {}", loop_start)); - code.push(format!("{}:", loop_end)); - } - - Statement::Loop(body) => { - let loop_start = format!("_loop_start_{}", self.get_unique_label()); - - code.push(format!("{}:", loop_start)); - - for s in body { - code.extend(self.generate_statement(s)?); - } - - code.push(format!("\tjmp {}", loop_start)); - } - - Statement::Expression { expr } => { - let (result_reg, expr_code) = self.generate_expression(expr, false)?; - code.extend(expr_code); - self.allocator.free_temp(&result_reg); - } - - Statement::Block(statements) => { - for s in statements { - code.extend(self.generate_statement(s)?); - } - } - } - - Ok(code) - } - - // Example: Generate code for an expression - // Returns (register containing result, assembly code) - fn generate_expression( - &mut self, - expr: &Expression, - use_result: bool, - ) -> Result<(String, Vec), CompilerError> { - let mut code = Vec::new(); - - // optimisation to prevent generating dead code! - if expr.is_pure() && !use_result { - return Ok((String::new(), code)); - } - - match expr { - Expression::StringLiteral(value) => { - let (reg, alloc_code) = self.allocator.alloc_temp()?; - code.extend(alloc_code); - - // write string into memory - let uuid = self.get_unique_label(); - code.push(format!("\tdb str_{uuid}: \"{value}\"")); - - // Load pointer to string - code.push(format!("\tlwi str_{uuid}, {reg}")); - - Ok((reg, code)) - } - - Expression::CharLiteral(value) => { - let (reg, alloc_code) = self.allocator.alloc_temp()?; - code.extend(alloc_code); - - // Load immediate value - code.push(format!("\tlli {}, {} // '{value}'", *value as u8, reg)); - - Ok((reg, code)) - } - - Expression::Number(value) => { - let (reg, alloc_code) = self.allocator.alloc_temp()?; - code.extend(alloc_code); - - // Load immediate value - code.push(format!("\tlli {}, {}", value & 0xFFFF, reg)); - if *value > 0xFFFF || *value < 0 { - code.push(format!("\tlui {}, {}", (value >> 16) & 0xFFFF, reg)); - } - - Ok((reg, code)) - } - - Expression::Variable { name, .. } => { - if self.is_global(&name.name) { - // Allocate a temporary register for the global - let (reg, alloc_code) = self.allocator.alloc_temp()?; - code.extend(alloc_code); - - // Load from global label - code.push(format!("\tldw {}, {}", name.name, reg)); - - Ok((reg, code)) - } else { - // Local variable - use existing allocator logic - let (reg, load_code) = self.allocator.load_var(&name.name)?; - code.extend(load_code); - Ok((reg, code)) - } - } - - Expression::Binary { op, left, right } => { - // Evaluate left operand - let (left_reg, left_code) = self.generate_expression(left, true)?; - code.extend(left_code); - - // Evaluate right operand - let (right_reg, right_code) = self.generate_expression(right, true)?; - code.extend(right_code); - - // Allocate result register - let (result_reg, result_alloc) = self.allocator.alloc_temp()?; - code.extend(result_alloc); - - // Generate operation - match op { - BinaryOperator::Add => { - code.push(format!( - "\tadd {}, {}, {}", - left_reg, right_reg, result_reg - )); - } - BinaryOperator::Sub => { - code.push(format!( - "\tsub {}, {}, {}", - left_reg, right_reg, result_reg - )); - } - BinaryOperator::Mul => { - self.include("maths", "./lib/maths/core.dsa"); - // Call multiply function - code.push(format!("\tpush {}", right_reg)); - code.push(format!("\tpush {}", left_reg)); - code.push("\tcall maths::multiply".to_string()); - code.push(format!("\tpop {}", result_reg)); - code.push("\tpop zero".to_string()); - } - // Comparison operators - return 1 (true) or 0 (false) - BinaryOperator::Eq => { - code.push(format!("\tcmp {}, {}", left_reg, right_reg)); - code.push(format!("\tlli 0, {}", result_reg)); - let end_label = format!("_cmp_end_{}", self.get_unique_label()); - code.push(format!("\tjne {}", end_label)); // If not equal, skip setting to 1 - code.push(format!("\tlli 1, {}", result_reg)); - code.push(format!("{}:", end_label)); - } - BinaryOperator::Ne => { - code.push(format!("\tcmp {}, {}", left_reg, right_reg)); - code.push(format!("\tlli 0, {}", result_reg)); - let end_label = format!("_cmp_end_{}", self.get_unique_label()); - code.push(format!("\tjeq {}", end_label)); // If equal, skip setting to 1 - code.push(format!("\tlli 1, {}", result_reg)); - code.push(format!("{}:", end_label)); - } - BinaryOperator::Lt => { - code.push(format!("\tcmp {}, {}", left_reg, right_reg)); - code.push(format!("\tlli 0, {}", result_reg)); - let end_label = format!("_cmp_end_{}", self.get_unique_label()); - code.push(format!("\tjge {}", end_label)); // If greater or equal, skip setting to 1 - code.push(format!("\tlli 1, {}", result_reg)); - code.push(format!("{}:", end_label)); - } - BinaryOperator::Le => { - code.push(format!("\tcmp {}, {}", left_reg, right_reg)); - code.push(format!("\tlli 0, {}", result_reg)); - let end_label = format!("_cmp_end_{}", self.get_unique_label()); - code.push(format!("\tjgt {}", end_label)); // If greater than, skip setting to 1 - code.push(format!("\tlli 1, {}", result_reg)); - code.push(format!("{}:", end_label)); - } - BinaryOperator::Gt => { - code.push(format!("\tcmp {}, {}", left_reg, right_reg)); - code.push(format!("\tlli 0, {}", result_reg)); - let end_label = format!("_cmp_end_{}", self.get_unique_label()); - code.push(format!("\tjle {}", end_label)); // If less or equal, skip setting to 1 - code.push(format!("\tlli 1, {}", result_reg)); - code.push(format!("{}:", end_label)); - } - BinaryOperator::Ge => { - code.push(format!("\tcmp {}, {}", left_reg, right_reg)); - code.push(format!("\tlli 0, {}", result_reg)); - let end_label = format!("_cmp_end_{}", self.get_unique_label()); - code.push(format!("\tjlt {}", end_label)); // If less than, skip setting to 1 - code.push(format!("\tlli 1, {}", result_reg)); - code.push(format!("{}:", end_label)); - } - _ => unimplemented!(), - } - - // Free operand registers (allocator will protect variables) - self.allocator.free_temp(&left_reg); - self.allocator.free_temp(&right_reg); - - Ok((result_reg, code)) - } - - Expression::Call { name, args } => { - // first evaluate all the args we're going to need - let mut arg_regs = Vec::new(); - for arg in args.iter().rev() { - let (arg_reg, arg_code) = self.generate_expression(arg, true)?; - code.extend(arg_code); - arg_regs.push(arg_reg); - } - - // Save caller-saved registers and track which ones we saved - // old method, inefficient. - // let saved_regs = self.allocator.get_caller_saved_registers(); - // for reg in &saved_regs { - // code.push(format!("\tpush {}", reg)); - // } - - // Save caller-saved registers and track which ones we saved - let saved_regs = self.allocator.get_caller_saved_registers(); - for reg in &saved_regs { - // spill variables to stack - code.extend(self.allocator.spill_register(reg).unwrap()); - } - - // Evaluate and push arguments in reverse order - for (i, arg_reg) in arg_regs.iter().enumerate() { - code.push(format!( - "\tpush {} // push arg {}", - arg_reg, - args.len() - 1 - i - )); - } - - // if GLOBAL_METHODS.contains_key(name.name.as_str()) { - // code.push(format!("\tcall {}", - // GLOBAL_METHODS[name.name.as_str()])); } else - if self.symbols.contains(&name.name) { - // Call local function - code.push(format!("\tcall {}", name)); - } else if let Some(ns) = name.namespace.clone() - && self.imports.contains_key(&ns) - { - code.push(format!("\tcall {}", name)); - } else { - return Err(CompilerError::Undefined(name.clone())); - } - - let result_reg: String; - - if use_result { - let (temp_result_reg, result_alloc) = self.allocator.alloc_temp()?; - result_reg = temp_result_reg; - - code.extend(result_alloc); - code.push(format!("\tpop {}", result_reg)); - - // Clean up arguments - if args.len() > 1 { - for _ in 0..(args.len() - 1) { - code.push("\tpop zero".to_string()); - } - } - } else { - result_reg = "zero".to_string(); - - // Clean up arguments - if args.len() > 0 { - for _ in 0..(args.len()) { - code.push("\tpop zero".to_string()); - } - } - } - - // Restore caller-saved registers in reverse order (LIFO) - // for reg in saved_regs.iter().rev() { - // code.push(format!("\tpop {}", reg)); - // } - - // Free argument registers - for reg in arg_regs { - self.allocator.free_temp(®); - } - - Ok((result_reg, code)) - } - - Expression::Unary { op, operand } => { - let (operand_reg, operand_code) = - self.generate_expression(operand, true)?; - code.extend(operand_code); - - let (result_reg, result_alloc) = self.allocator.alloc_temp()?; - code.extend(result_alloc); - - match op { - UnaryOperator::Minus => { - // Negate: result = 0 - operand - code.push(format!("\tsub zero, {}, {}", operand_reg, result_reg)); - } - UnaryOperator::Plus => { - // Just move - code.push(format!("\tmov {}, {}", operand_reg, result_reg)); - } - UnaryOperator::Dereference => { - code.push(format!("\tldw {}, {}", operand_reg, result_reg)); - } - UnaryOperator::Reference => { - code.extend(self.allocator.spill_register(&operand_reg)?); - code.push(format!( - "\tsubi bpr {} {}", - -(4 + self.allocator.get_stack_offset()), - result_reg - )) - } - } - - self.allocator.free_temp(&operand_reg); - Ok((result_reg, code)) - } - - Expression::Empty => Ok(("zero".to_string(), code)), - } - } - - // Helper for generating unique labels - fn get_unique_label(&mut self) -> String { - // You'd implement a counter here - static COUNTER: AtomicU32 = AtomicU32::new(0); - - let val = COUNTER.fetch_add(1, std::sync::atomic::Ordering::SeqCst); - (val + 1).to_string() - } -} - -/// Build a single string from any number of arguments. -/// Each argument must implement `Display` or be convertible to a string. -#[macro_export] -macro_rules! dsa { - ($($arg:expr),* $(,)?) => {{ - // Start with an empty String – we’ll grow it as we go. - use std::fmt::Write; - let mut s = ::std::string::String::new(); - $( - // `write!` is cheaper than `format!` for each element - // because it re‑uses the same buffer. - - write!(s, "{}\n", $arg).expect("write to String failed"); - )* - s - }}; -} - -// ──────────────────────── dsa! ──────────────────────── -// A tiny helper that just turns its token‑stream into a string. -// The trailing comma is kept – it’s part of the syntax you want. -#[macro_export] -macro_rules! cmd { - ($($tokens:tt)*) => {{ - // We’ll just stringify the tokens and return a String. - format!("{}", concat!(stringify!($tokens), "\n")) - }}; -} - -// ──────────────────────── block! ──────────────────────── -// Usage: -// -// let asm = block![ "name" -// dsa![mov rg0, rg1], -// dsa![add rg1, rg1] -// ]; -// -// `asm` is a `&'static str` containing: -// -// name: -// mov rg0, rg1 -// add rg1, rg1 -// -#[macro_export] -macro_rules! block { - // The first token must be a string literal – that’s the label. - ($label:literal $(dsa![$($ins:tt)*]),* ) => {{ - // Build a single string at compile time. - const CODE: &str = concat!( - $label, ":\n", - // Each `dsa!` call yields a string like `"mov rg0, rg1"`. - // We add a newline after each one to get the desired layout. - $(concat!("\t", stringify!($($ins)*), "\n")),* - ); - CODE - }}; -} - -#[macro_export] -macro_rules! comment { - ($text:expr) => {{ format!("// {}", $text) }}; -} diff --git a/compiler/src/lexer.rs b/compiler/src/lexer.rs deleted file mode 100644 index 5245bee..0000000 --- a/compiler/src/lexer.rs +++ /dev/null @@ -1,627 +0,0 @@ -use std::iter::Peekable; -use std::str::Chars; - -#[derive(Debug, PartialEq, Clone)] -pub enum Token { - // Keywords - Fn, - Let, - If, - Else, - Loop, - While, - Break, - Return, - Continue, - Include, - Static, - Const, - - // Identifiers and literals - Identifier(Name), - String(String), - Integer(u64), - Char(char), - - // Symbols - LeftParen, // ( - RightParen, // ) - LeftBrace, // { - RightBrace, // } - Semicolon, // ; - Colon, // : - Comma, // , - - // Operators - Plus, // + - Minus, // - - Star, // * - Amphersand, // & - Slash, // / - Assign, // = - EqualEqual, // == - Bang, // ! - BangEqual, // != - Less, // < - LessEqual, // <= - Greater, // > - GreaterEqual, // >= - RightArrow, // -> - - // Special - Eof, -} - -#[derive(Debug, PartialEq, Clone)] -pub struct Name { - pub name: String, - pub namespace: Option, -} - -use std::fmt; - -impl fmt::Display for Name { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if let Some(ref ns) = self.namespace { - write!(f, "{}::{}", ns, self.name) - } else { - write!(f, "{}", self.name) - } - } -} - -impl Token { - pub fn tt(&self) -> &str { - match self { - Token::Const => "Const", - Token::Static => "Static", - Token::Include => "Include", - Token::Fn => "Fn", - Token::If => "If", - Token::Let => "Let", - Token::Else => "Else", - Token::Loop => "Loop", - Token::While => "While", - Token::Break => "Break", - Token::Return => "Return", - Token::Continue => "Continue", - Token::Identifier(_) => "Identifier", - Token::String(_) => "String", - Token::Integer(_) => "UnsignedInt", - Token::Char(_) => "Char", - Token::LeftParen => "LeftParen", - Token::RightParen => "RightParen", - Token::LeftBrace => "LeftBrace", - Token::RightBrace => "RightBrace", - Token::Semicolon => "Semicolon", - Token::Colon => "Colon", - Token::Comma => "Comma", - Token::RightArrow => "RightArrow", - Token::Plus => "Plus", - Token::Minus => "Minus", - Token::Star => "Star", - Token::Amphersand => "Amphersand", - Token::Slash => "Slash", - Token::Assign => "Assign", - Token::EqualEqual => "EqualEqual", - Token::Bang => "Bang", - Token::BangEqual => "BangEqual", - Token::Less => "Less", - Token::LessEqual => "LessEqual", - Token::Greater => "Greater", - Token::GreaterEqual => "GreaterEqual", - Token::Eof => "Eof", - } - } -} - -#[derive(Debug)] -pub struct Lexer<'a> { - chars: Peekable>, - current: Option, - line: usize, -} - -impl<'a> Lexer<'a> { - pub fn new(input: &'a str) -> Self { - let mut chars = input.chars().peekable(); - let current = chars.next(); - - Lexer { - chars, - current, - line: 1, - } - } - - fn advance(&mut self) -> Option { - self.current = self.chars.next(); - self.current - } - - fn peek(&mut self) -> Option<&char> { - self.chars.peek() - } - - fn skip_whitespace(&mut self) { - while let Some(c) = self.current { - if !c.is_whitespace() { - break; - } - if c == '\n' { - self.line += 1; - } - self.advance(); - } - } - - fn skip_line_comment(&mut self) { - // Skip the two slashes - self.advance(); // first / - self.advance(); // second / - - // Skip until newline or EOF - while let Some(c) = self.current { - if c == '\n' { - self.line += 1; - self.advance(); - break; - } - self.advance(); - } - } - - fn skip_block_comment(&mut self) -> Result<(), String> { - // Skip the /* - self.advance(); // / - self.advance(); // * - - let start_line = self.line; - - // Look for */ - while let Some(c) = self.current { - if c == '\n' { - self.line += 1; - } - - if c == '*' { - if let Some(&next) = self.peek() { - if next == '/' { - self.advance(); // * - self.advance(); // / - return Ok(()); - } - } - } - - self.advance(); - } - - Err(format!( - "Unterminated block comment starting at line {}", - start_line - )) - } - - fn skip_whitespace_and_comments(&mut self) { - loop { - self.skip_whitespace(); - - // Check for comments - if let Some('/') = self.current { - if let Some(&next) = self.peek() { - match next { - '/' => { - self.skip_line_comment(); - continue; - } - '*' => { - if let Err(e) = self.skip_block_comment() { - eprintln!("Lexer error: {}", e); - } - continue; - } - _ => break, - } - } - } - - break; - } - } - - fn read_identifier(&mut self) -> String { - let mut ident = String::new(); - - // Include the current character if it's valid - if let Some(c) = self.current { - if c.is_alphabetic() || c == '_' { - ident.push(c); - } - } - - // Read remaining characters - while let Some(&c) = self.peek() { - if c.is_alphanumeric() || c == '_' { - self.advance(); - ident.push(c); - } else { - break; - } - } - - ident - } - - fn keyword_or_identifier(&mut self) -> Token { - let first_ident = self.read_identifier(); - - // Check if it's a keyword first (keywords can't have namespaces) - let keyword = match first_ident.as_str() { - "fn" => Some(Token::Fn), - "if" => Some(Token::If), - "else" => Some(Token::Else), - "while" => Some(Token::While), - "loop" => Some(Token::Loop), - "break" => Some(Token::Break), - "return" => Some(Token::Return), - "continue" => Some(Token::Continue), - "include" => Some(Token::Include), - "let" => Some(Token::Let), - "const" => Some(Token::Const), - "static" => Some(Token::Static), - _ => None, - }; - - if let Some(kw) = keyword { - return kw; - } - - // Not a keyword - check for namespace separator (::) - // We need to peek TWO characters ahead without consuming anything - if let Some(&':') = self.peek() { - // We see one colon, but we need to check if there's another one after it - // We can't peek two ahead directly, so we need a different approach - - // Save the current position by using a temporary peekable iterator - // Actually, we can't do that easily. Instead, let's just check: - // If we see ':', temporarily advance and check the next char - - // Create a temporary check - let mut temp_chars = self.chars.clone(); - let first_peek = temp_chars.next(); // This is the ':' we already saw - let second_peek = temp_chars.peek(); - - if let Some(&':') = second_peek { - // It's :: - consume both colons - self.advance(); // consume first : - self.advance(); // consume second : - - // Read the second identifier (the actual name) - let second_ident = self.read_identifier(); - - // Return namespaced identifier - return Token::Identifier(Name { - namespace: Some(first_ident), - name: second_ident, - }); - } - // else: It's a single colon (type annotation) - DON'T consume it - // Just fall through and return the identifier - } - - // No namespace separator - just a regular identifier - Token::Identifier(Name { - namespace: None, - name: first_ident, - }) - } - - fn read_number(&mut self) -> Result { - let current = self.current.unwrap(); - - // Check for hex (0x) or binary (0b) prefix - if current == '0' { - if let Some(&next_char) = self.peek() { - match next_char { - 'x' | 'X' => { - self.advance(); // consume '0' - self.advance(); // consume 'x' - return self.read_hex_number(); - } - 'b' | 'B' => { - self.advance(); // consume '0' - self.advance(); // consume 'b' - return self.read_binary_number(); - } - _ => {} - } - } - } - - // Read decimal number - self.read_decimal_number() - } - - fn read_decimal_number(&mut self) -> Result { - let mut num_str = String::new(); - - if let Some(c) = self.current { - num_str.push(c); - } - - while let Some(&c) = self.peek() { - if c.is_ascii_digit() { - self.advance(); - num_str.push(c); - } else { - break; - } - } - - num_str - .parse::() - .map_err(|_| format!("Invalid decimal number: {}", num_str)) - } - - fn read_hex_number(&mut self) -> Result { - let mut num_str = String::new(); - - // Read current character if it's a hex digit - if let Some(c) = self.current { - if c.is_ascii_hexdigit() { - num_str.push(c); - } - } - - while let Some(&c) = self.peek() { - if c.is_ascii_hexdigit() { - self.advance(); - num_str.push(c); - } else { - break; - } - } - - if num_str.is_empty() { - return Err("Invalid hexadecimal number: no digits after 0x".to_string()); - } - - u64::from_str_radix(&num_str, 16) - .map_err(|_| format!("Invalid hexadecimal number: {}", num_str)) - } - - fn read_binary_number(&mut self) -> Result { - let mut num_str = String::new(); - - // Read current character if it's a binary digit - if let Some(c) = self.current { - if c == '0' || c == '1' { - num_str.push(c); - } - } - - while let Some(&c) = self.peek() { - if c == '0' || c == '1' { - self.advance(); - num_str.push(c); - } else { - break; - } - } - - if num_str.is_empty() { - return Err("Invalid binary number: no digits after 0b".to_string()); - } - - u64::from_str_radix(&num_str, 2) - .map_err(|_| format!("Invalid binary number: {}", num_str)) - } - - fn read_string(&mut self) -> Result { - self.advance(); // Skip the opening quote - let mut s = String::new(); - - while let Some(c) = self.current { - if c == '"' { - return Ok(s); - } - - // Handle escape sequences - if c == '\\' { - self.advance(); - if let Some(escaped) = self.current { - let escaped_char = match escaped { - 'n' => '\n', - 't' => '\t', - 'r' => '\r', - '\\' => '\\', - '"' => '"', - _ => escaped, // For now, just use the character as-is - }; - s.push(escaped_char); - } else { - return Err("Unexpected end of string after escape".to_string()); - } - } else { - s.push(c); - } - - self.advance(); - } - - Err("Unterminated string literal".to_string()) - } - - fn match_next(&mut self, expected: char) -> bool { - match self.peek() { - Some(&c) if c == expected => { - self.advance(); - true - } - _ => false, - } - } - - fn scan_single_char_token(&mut self, c: char) -> Option { - match c { - '(' => Some(Token::LeftParen), - ')' => Some(Token::RightParen), - '{' => Some(Token::LeftBrace), - '}' => Some(Token::RightBrace), - ';' => Some(Token::Semicolon), - ',' => Some(Token::Comma), - '&' => Some(Token::Amphersand), - '+' => Some(Token::Plus), - '*' => Some(Token::Star), - _ => None, - } - } - - fn scan_operator(&mut self, c: char) -> Option { - match c { - '-' => Some(if self.match_next('>') { - Token::RightArrow - } else { - Token::Minus - }), - '!' => Some(if self.match_next('=') { - Token::BangEqual - } else { - Token::Bang - }), - '=' => Some(if self.match_next('=') { - Token::EqualEqual - } else { - Token::Assign - }), - '<' => Some(if self.match_next('=') { - Token::LessEqual - } else { - Token::Less - }), - '>' => Some(if self.match_next('=') { - Token::GreaterEqual - } else { - Token::Greater - }), - ':' => { - // Single colon (for type annotations) - // Note: :: is handled in keyword_or_identifier for namespaces - Some(Token::Colon) - } - '/' => { - // Check if it's a comment or division - if let Some(&next) = self.peek() { - if next == '/' || next == '*' { - // It's a comment, don't consume it here - // Let skip_whitespace_and_comments handle it - None - } else { - Some(Token::Slash) - } - } else { - Some(Token::Slash) - } - } - _ => None, - } - } - - pub fn next_token(&mut self) -> Token { - self.skip_whitespace_and_comments(); - - let Some(c) = self.current else { - return Token::Eof; - }; - - // Try single-character tokens first - if let Some(token) = self.scan_single_char_token(c) { - self.advance(); - return token; - } - - // Try operators (may be multi-character) - if let Some(token) = self.scan_operator(c) { - self.advance(); - return token; - } - - // String literals - if c == '"' { - let token = match self.read_string() { - Ok(s) => Token::String(s), - Err(e) => { - eprintln!("Lexer error on line {}: {}", self.line, e); - // Skip to next quote or end - while let Some(ch) = self.current { - if ch == '"' || ch == '\n' { - break; - } - self.advance(); - } - Token::String(String::new()) - } - }; - self.advance(); - return token; - } - - // Identifiers and keywords (including namespaced identifiers) - if c.is_alphabetic() || c == '_' { - let token = self.keyword_or_identifier(); - self.advance(); - return token; - } - - // Numbers (decimal, hex, binary) - if c.is_ascii_digit() { - let token = match self.read_number() { - Ok(num) => Token::Integer(num), - Err(e) => { - eprintln!("Lexer error on line {}: {}", self.line, e); - // Skip invalid number - while let Some(&ch) = self.peek() { - if !ch.is_alphanumeric() { - break; - } - self.advance(); - } - Token::Integer(0) - } - }; - self.advance(); - return token; - } - - // Unknown character - skip it - eprintln!( - "Lexer warning on line {}: Skipping unknown character '{}'", - self.line, c - ); - self.advance(); - self.next_token() - } -} - -impl<'a> Iterator for Lexer<'a> { - type Item = Token; - - fn next(&mut self) -> Option { - match self.next_token() { - Token::Eof => None, - token => Some(token), - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_basic() { - // Placeholder test - assert!(true); - } -}