diff --git a/Cargo.lock b/Cargo.lock index 51d0e25..4881937 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -539,6 +539,10 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" +[[package]] +name = "c_compiler" +version = "0.2.0" + [[package]] name = "calloop" version = "0.13.0" diff --git a/Cargo.toml b/Cargo.toml index 9ae7c73..b317ec7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ cargo-features = ["codegen-backend"] [workspace] -members = ["emulator", "common", "assembler", "dsa_editor", "compiler"] +members = ["emulator", "common", "assembler", "dsa_editor", "compiler", "c_compiler"] resolver = "3" [workspace.package] diff --git a/assembler/src/assembler/mod.rs b/assembler/src/assembler/mod.rs index 26fd801..966febb 100644 --- a/assembler/src/assembler/mod.rs +++ b/assembler/src/assembler/mod.rs @@ -138,6 +138,11 @@ fn assemble(src: &Path) -> Result, AssembleError> { create_sections(&mut nodes)?; resolve_symbols(&mut nodes)?; + println!("Generating assembly output..."); + for n in &nodes { + println!("{n}"); + } + let instructions = codegen(nodes)?; Ok(instructions) } diff --git a/c_compiler/Cargo.toml b/c_compiler/Cargo.toml new file mode 100644 index 0000000..787ae6a --- /dev/null +++ b/c_compiler/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "c_compiler" +version.workspace = true +edition.workspace = true +authors.workspace = true + +[dependencies] diff --git a/c_compiler/code.c b/c_compiler/code.c new file mode 100644 index 0000000..cc43221 --- /dev/null +++ b/c_compiler/code.c @@ -0,0 +1,19 @@ +int factorial(int n) { + if (n <= 1) { + return 1; + } + return n * factorial(n - 1); +} + +int add(int a, int b) { return a + b; } + +int main() { + int x; + x = 5; + int x = 5; + int result; + int result = 5; + result = x + factorial(5); + print(result); + return 0; +} diff --git a/c_compiler/compiler.py b/c_compiler/compiler.py new file mode 100644 index 0000000..627278c --- /dev/null +++ b/c_compiler/compiler.py @@ -0,0 +1,926 @@ +#!/usr/bin/env python3 +""" +Simple C to DSA Assembly Compiler +Supports a subset of C including: +- int variables and functions +- Arithmetic operations (+, -, *, /) +- Comparisons (==, !=, <, >, <=, >=) +- If/else statements +- While loops +- Function calls +- Return statements +""" + +import re +import sys +from typing import List, Dict, Optional, Tuple +from dataclasses import dataclass +from enum import Enum +from pprint import pprint +import json + + +class TokenType(Enum): + # Keywords + INT = "int" + IF = "if" + ELSE = "else" + WHILE = "while" + RETURN = "return" + + # Identifiers and literals + IDENTIFIER = "IDENTIFIER" + NUMBER = "NUMBER" + + # Operators + PLUS = "+" + MINUS = "-" + STAR = "*" + SLASH = "/" + ASSIGN = "=" + EQ = "==" + NE = "!=" + LT = "<" + GT = ">" + LE = "<=" + GE = ">=" + + # Delimiters + LPAREN = "(" + RPAREN = ")" + LBRACE = "{" + RBRACE = "}" + SEMICOLON = ";" + COMMA = "," + + EOF = "EOF" + + +@dataclass +class Token: + type: TokenType + value: str + line: int + col: int + + +class Lexer: + def __init__(self, source: str): + self.source = source + self.pos = 0 + self.line = 1 + self.col = 1 + self.tokens = [] + + def error(self, msg: str): + raise SyntaxError(f"Lexer error at line {self.line}, col {self.col}: {msg}") + + def peek(self, offset: int = 0) -> Optional[str]: + pos = self.pos + offset + return self.source[pos] if pos < len(self.source) else None + + def advance(self) -> Optional[str]: + if self.pos >= len(self.source): + return None + char = self.source[self.pos] + self.pos += 1 + if char == "\n": + self.line += 1 + self.col = 1 + else: + self.col += 1 + return char + + def skip_whitespace(self): + while self.peek() and self.peek() in " \t\n\r": + self.advance() + + def skip_comment(self): + if self.peek() == "/" and self.peek(1) == "/": + while self.peek() and self.peek() != "\n": + self.advance() + self.advance() # skip newline + + def read_number(self) -> str: + num = "" + while self.peek() and self.peek().isdigit(): + num += self.advance() + return num + + def read_identifier(self) -> str: + ident = "" + while self.peek() and (self.peek().isalnum() or self.peek() == "_"): + ident += self.advance() + return ident + + def tokenize(self) -> List[Token]: + keywords = { + "int": TokenType.INT, + "if": TokenType.IF, + "else": TokenType.ELSE, + "while": TokenType.WHILE, + "return": TokenType.RETURN, + } + + while self.pos < len(self.source): + self.skip_whitespace() + self.skip_comment() + + if self.pos >= len(self.source): + break + + line, col = self.line, self.col + char = self.peek() + + # Numbers + if char.isdigit(): + num = self.read_number() + self.tokens.append(Token(TokenType.NUMBER, num, line, col)) + + # Identifiers and keywords + elif char.isalpha() or char == "_": + ident = self.read_identifier() + token_type = keywords.get(ident, TokenType.IDENTIFIER) + self.tokens.append(Token(token_type, ident, line, col)) + + # Two-character operators + elif char == "=" and self.peek(1) == "=": + self.advance() + self.advance() + self.tokens.append(Token(TokenType.EQ, "==", line, col)) + elif char == "!" and self.peek(1) == "=": + self.advance() + self.advance() + self.tokens.append(Token(TokenType.NE, "!=", line, col)) + elif char == "<" and self.peek(1) == "=": + self.advance() + self.advance() + self.tokens.append(Token(TokenType.LE, "<=", line, col)) + elif char == ">" and self.peek(1) == "=": + self.advance() + self.advance() + self.tokens.append(Token(TokenType.GE, ">=", line, col)) + + # Single-character operators + elif char == "+": + self.advance() + self.tokens.append(Token(TokenType.PLUS, "+", line, col)) + elif char == "-": + self.advance() + self.tokens.append(Token(TokenType.MINUS, "-", line, col)) + elif char == "*": + self.advance() + self.tokens.append(Token(TokenType.STAR, "*", line, col)) + elif char == "/": + self.advance() + self.tokens.append(Token(TokenType.SLASH, "/", line, col)) + elif char == "=": + self.advance() + self.tokens.append(Token(TokenType.ASSIGN, "=", line, col)) + elif char == "<": + self.advance() + self.tokens.append(Token(TokenType.LT, "<", line, col)) + elif char == ">": + self.advance() + self.tokens.append(Token(TokenType.GT, ">", line, col)) + elif char == "(": + self.advance() + self.tokens.append(Token(TokenType.LPAREN, "(", line, col)) + elif char == ")": + self.advance() + self.tokens.append(Token(TokenType.RPAREN, ")", line, col)) + elif char == "{": + self.advance() + self.tokens.append(Token(TokenType.LBRACE, "{", line, col)) + elif char == "}": + self.advance() + self.tokens.append(Token(TokenType.RBRACE, "}", line, col)) + elif char == ";": + self.advance() + self.tokens.append(Token(TokenType.SEMICOLON, ";", line, col)) + elif char == ",": + self.advance() + self.tokens.append(Token(TokenType.COMMA, ",", line, col)) + else: + self.error(f"Unexpected character: {char}") + + self.tokens.append(Token(TokenType.EOF, "", self.line, self.col)) + return self.tokens + + +# AST Node classes +@dataclass +class ASTNode: + pass + + +@dataclass +class Program(ASTNode): + declarations: List["Declaration"] + + +@dataclass +class Declaration(ASTNode): + pass + + +@dataclass +class FunctionDecl(Declaration): + name: str + params: List[str] + body: "CompoundStmt" + + +@dataclass +class VarDecl(Declaration): + name: str + init: Optional["Expression"] = None + + +@dataclass +class Statement(ASTNode): + pass + + +@dataclass +class CompoundStmt(Statement): + statements: List[Statement] + + +@dataclass +class ExprStmt(Statement): + expr: Optional["Expression"] + + +@dataclass +class IfStmt(Statement): + condition: "Expression" + then_stmt: Statement + else_stmt: Optional[Statement] = None + + +@dataclass +class WhileStmt(Statement): + condition: "Expression" + body: Statement + + +@dataclass +class ReturnStmt(Statement): + expr: Optional["Expression"] + + +@dataclass +class Expression(ASTNode): + pass + + +@dataclass +class BinaryOp(Expression): + op: str + left: Expression + right: Expression + + +@dataclass +class UnaryOp(Expression): + op: str + operand: Expression + + +@dataclass +class AssignExpr(Expression): + name: str + value: Expression + + +@dataclass +class VarExpr(Expression): + name: str + + +@dataclass +class NumberExpr(Expression): + value: int + + +@dataclass +class CallExpr(Expression): + name: str + args: List[Expression] + + +class Parser: + def __init__(self, tokens: List[Token]): + self.tokens = tokens + self.pos = 0 + + def error(self, msg: str): + token = self.current() + raise SyntaxError(f"Parser error at line {token.line}, col {token.col}: {msg}") + + def current(self) -> Token: + return self.tokens[self.pos] if self.pos < len(self.tokens) else self.tokens[-1] + + def peek(self, offset: int = 0) -> Token: + pos = self.pos + offset + return self.tokens[pos] if pos < len(self.tokens) else self.tokens[-1] + + def advance(self) -> Token: + token = self.current() + if self.pos < len(self.tokens) - 1: + self.pos += 1 + return token + + def expect(self, token_type: TokenType) -> Token: + token = self.current() + if token.type != token_type: + self.error(f"Expected {token_type.value}, got {token.type.value}") + return self.advance() + + def parse(self) -> Program: + declarations = [] + while self.current().type != TokenType.EOF: + declarations.append(self.parse_declaration()) + return Program(declarations) + + def parse_declaration(self) -> Declaration: + self.expect(TokenType.INT) + name = self.expect(TokenType.IDENTIFIER).value + + if self.current().type == TokenType.LPAREN: + # Function declaration + self.advance() + params = [] + + if self.current().type != TokenType.RPAREN: + self.expect(TokenType.INT) + params.append(self.expect(TokenType.IDENTIFIER).value) + + while self.current().type == TokenType.COMMA: + self.advance() + self.expect(TokenType.INT) + params.append(self.expect(TokenType.IDENTIFIER).value) + + self.expect(TokenType.RPAREN) + body = self.parse_compound_stmt() + return FunctionDecl(name, params, body) + else: + # Variable declaration + init = None + if self.current().type == TokenType.ASSIGN: + self.advance() + init = self.parse_expression() + self.expect(TokenType.SEMICOLON) + return VarDecl(name, init) + + def parse_compound_stmt(self) -> CompoundStmt: + self.expect(TokenType.LBRACE) + statements = [] + + while self.current().type != TokenType.RBRACE: + statements.append(self.parse_statement()) + + self.expect(TokenType.RBRACE) + return CompoundStmt(statements) + + def parse_statement(self) -> Statement: + token = self.current() + + if token.type == TokenType.LBRACE: + return self.parse_compound_stmt() + elif token.type == TokenType.IF: + return self.parse_if_stmt() + elif token.type == TokenType.WHILE: + return self.parse_while_stmt() + elif token.type == TokenType.RETURN: + return self.parse_return_stmt() + elif token.type == TokenType.INT: + # Local variable declaration + self.advance() + name = self.expect(TokenType.IDENTIFIER).value + init = None + if self.current().type == TokenType.ASSIGN: + self.advance() + init = self.parse_expression() + self.expect(TokenType.SEMICOLON) + return ExprStmt(AssignExpr(name, init) if init else None) + else: + expr = ( + self.parse_expression() + if self.current().type != TokenType.SEMICOLON + else None + ) + self.expect(TokenType.SEMICOLON) + return ExprStmt(expr) + + def parse_if_stmt(self) -> IfStmt: + self.expect(TokenType.IF) + self.expect(TokenType.LPAREN) + condition = self.parse_expression() + self.expect(TokenType.RPAREN) + then_stmt = self.parse_statement() + + else_stmt = None + if self.current().type == TokenType.ELSE: + self.advance() + else_stmt = self.parse_statement() + + return IfStmt(condition, then_stmt, else_stmt) + + def parse_while_stmt(self) -> WhileStmt: + self.expect(TokenType.WHILE) + self.expect(TokenType.LPAREN) + condition = self.parse_expression() + self.expect(TokenType.RPAREN) + body = self.parse_statement() + return WhileStmt(condition, body) + + def parse_return_stmt(self) -> ReturnStmt: + self.expect(TokenType.RETURN) + expr = None + if self.current().type != TokenType.SEMICOLON: + expr = self.parse_expression() + self.expect(TokenType.SEMICOLON) + return ReturnStmt(expr) + + def parse_expression(self) -> Expression: + return self.parse_assignment() + + def parse_assignment(self) -> Expression: + expr = self.parse_comparison() + + if self.current().type == TokenType.ASSIGN: + if not isinstance(expr, VarExpr): + self.error("Invalid assignment target") + self.advance() + value = self.parse_assignment() + return AssignExpr(expr.name, value) + + return expr + + def parse_comparison(self) -> Expression: + expr = self.parse_additive() + + while self.current().type in [ + TokenType.EQ, + TokenType.NE, + TokenType.LT, + TokenType.GT, + TokenType.LE, + TokenType.GE, + ]: + op = self.advance().value + right = self.parse_additive() + expr = BinaryOp(op, expr, right) + + return expr + + def parse_additive(self) -> Expression: + expr = self.parse_multiplicative() + + while self.current().type in [TokenType.PLUS, TokenType.MINUS]: + op = self.advance().value + right = self.parse_multiplicative() + expr = BinaryOp(op, expr, right) + + return expr + + def parse_multiplicative(self) -> Expression: + expr = self.parse_unary() + + while self.current().type in [TokenType.STAR, TokenType.SLASH]: + op = self.advance().value + right = self.parse_unary() + expr = BinaryOp(op, expr, right) + + return expr + + def parse_unary(self) -> Expression: + if self.current().type in [TokenType.PLUS, TokenType.MINUS]: + op = self.advance().value + operand = self.parse_unary() + return UnaryOp(op, operand) + + return self.parse_primary() + + def parse_primary(self) -> Expression: + token = self.current() + + if token.type == TokenType.NUMBER: + self.advance() + return NumberExpr(int(token.value)) + + elif token.type == TokenType.IDENTIFIER: + name = self.advance().value + + if self.current().type == TokenType.LPAREN: + # Function call + self.advance() + args = [] + + if self.current().type != TokenType.RPAREN: + args.append(self.parse_expression()) + while self.current().type == TokenType.COMMA: + self.advance() + args.append(self.parse_expression()) + + self.expect(TokenType.RPAREN) + return CallExpr(name, args) + else: + return VarExpr(name) + + elif token.type == TokenType.LPAREN: + self.advance() + expr = self.parse_expression() + self.expect(TokenType.RPAREN) + return expr + + else: + self.error(f"Unexpected token: {token.type.value}") + + +class CodeGenerator: + def __init__(self): + self.output = [] + self.label_counter = 0 + self.string_counter = 0 + self.functions = {} + self.current_function = None + self.local_vars = {} + self.global_vars = {} + self.register_pool = [f"rg{i:x}" for i in range(16)] + self.used_registers = set() + + def new_label(self, prefix: str = "L") -> str: + label = f"{prefix}{self.label_counter}" + self.label_counter += 1 + return label + + def allocate_register(self) -> str: + for reg in self.register_pool: + if reg not in self.used_registers: + self.used_registers.add(reg) + return reg + raise RuntimeError("Out of registers") + + def free_register(self, reg: str): + self.used_registers.discard(reg) + + def emit(self, code: str): + self.output.append(code) + + def generate(self, program: Program) -> str: + # Emit data section + self.emit("// Global variables") + for decl in program.declarations: + if isinstance(decl, VarDecl): + self.global_vars[decl.name] = f"var_{decl.name}" + if decl.init: + if isinstance(decl.init, NumberExpr): + self.emit(f"dw var_{decl.name}: {decl.init.value}") + else: + self.emit(f"dw var_{decl.name}: 0") + else: + self.emit(f"dw var_{decl.name}: 0") + + self.emit("") + self.emit("// Entry point") + self.emit("dw stack_bottom: 0x10000") + self.emit("") + self.emit("init:") + self.emit(" ldw stack_bottom, spr") + self.emit(" mov spr, bpr") + + self.emit(" push zero") + self.emit(" call main") + self.emit(" pop rg0") + self.emit(" hlt") + self.emit("") + + # Emit functions + for decl in program.declarations: + if isinstance(decl, FunctionDecl): + self.generate_function(decl) + + return "\n".join(self.output) + + def generate_function(self, func: FunctionDecl): + self.current_function = func.name + self.functions[func.name] = func + self.local_vars = {} + + # Map parameters to stack offsets + # Parameters start at bpr+8 (after return addr at bpr+4) + for i, param in enumerate(func.params): + self.local_vars[param] = 8 + (i * 4) + + self.emit(f"{func.name}:") + self.emit(" push bpr") + self.emit(" mov spr, bpr") + self.emit("") + + # Generate function body + self.generate_compound_stmt(func.body) + + # Default return if no explicit return + self.emit("// default return") + self.emit(f"{func.name}_end:") + self.emit(" mov bpr, spr") + self.emit(" pop bpr") + self.emit(" return") + self.emit("") + + def generate_compound_stmt(self, stmt: CompoundStmt): + for s in stmt.statements: + self.generate_statement(s) + + def generate_statement(self, stmt: Statement): + if isinstance(stmt, CompoundStmt): + self.generate_compound_stmt(stmt) + elif isinstance(stmt, ExprStmt): + if stmt.expr: + reg = self.generate_expression(stmt.expr) + self.free_register(reg) + elif isinstance(stmt, IfStmt): + self.generate_if_stmt(stmt) + elif isinstance(stmt, WhileStmt): + self.generate_while_stmt(stmt) + elif isinstance(stmt, ReturnStmt): + self.generate_return_stmt(stmt) + + def generate_if_stmt(self, stmt: IfStmt): + else_label = self.new_label("else") + end_label = self.new_label("endif") + + # Evaluate condition + cond_reg = self.generate_expression(stmt.condition) + self.emit(f" cmp {cond_reg}, zero") + self.free_register(cond_reg) + + if stmt.else_stmt: + self.emit(f" jeq {else_label}") + else: + self.emit(f" jeq {end_label}") + + # Then branch + self.generate_statement(stmt.then_stmt) + + if stmt.else_stmt: + self.emit(f" jmp {end_label}") + self.emit(f"{else_label}:") + self.generate_statement(stmt.else_stmt) + + self.emit(f"{end_label}:") + + def generate_while_stmt(self, stmt: WhileStmt): + start_label = self.new_label("while_start") + end_label = self.new_label("while_end") + + self.emit(f"{start_label}:") + + # Evaluate condition + cond_reg = self.generate_expression(stmt.condition) + self.emit(f" cmp {cond_reg}, zero") + self.free_register(cond_reg) + self.emit(f" jeq {end_label}") + + # Loop body + self.generate_statement(stmt.body) + self.emit(f" jmp {start_label}") + + self.emit(f"{end_label}:") + + def generate_return_stmt(self, stmt: ReturnStmt): + if stmt.expr: + reg = self.generate_expression(stmt.expr) + # Store return value at spr+8 according to calling convention + self.emit(f" stw {reg}, spr, 8") + self.free_register(reg) + self.emit(f" jmp {self.current_function}_end") + + def generate_expression(self, expr: Expression) -> str: + if isinstance(expr, NumberExpr): + reg = self.allocate_register() + if expr.value <= 0xFFFF and expr.value >= 0: + self.emit(f" lli {expr.value}, {reg}") + if expr.value > 0xFF: + self.emit(f" lui {expr.value >> 16}, {reg}") + else: + self.emit(f" lli {expr.value & 0xFFFF}, {reg}") + self.emit(f" lui {(expr.value >> 16) & 0xFFFF}, {reg}") + return reg + + elif isinstance(expr, VarExpr): + reg = self.allocate_register() + if expr.name in self.local_vars: + offset = self.local_vars[expr.name] + self.emit(f" ldw bpr, {reg}, {offset}") + elif expr.name in self.global_vars: + label = self.global_vars[expr.name] + self.emit(f" ldw {label}, {reg}") + else: + raise RuntimeError(f"Undefined variable: {expr.name}") + return reg + + elif isinstance(expr, AssignExpr): + value_reg = self.generate_expression(expr.value) + + if expr.name in self.local_vars: + offset = self.local_vars[expr.name] + self.emit(f" stw {value_reg}, bpr, {offset}") + elif expr.name in self.global_vars: + label = self.global_vars[expr.name] + self.emit(f" stw {value_reg}, {label}") + else: + # New local variable - allocate after params and return value space + # Start local variables at offset -4 from bpr (growing downward) + offset = -(len([v for v in self.local_vars.values() if v < 0]) + 1) * 4 + self.local_vars[expr.name] = offset + self.emit(f" stw {value_reg}, bpr, {offset}") + + return value_reg + + elif isinstance(expr, BinaryOp): + return self.generate_binary_op(expr) + + elif isinstance(expr, UnaryOp): + operand_reg = self.generate_expression(expr.operand) + result_reg = self.allocate_register() + + if expr.op == "-": + self.emit(f" lwi 0, {result_reg}") + self.emit(f" sub {result_reg}, {operand_reg}, {result_reg}") + else: # + + self.emit(f" mov {operand_reg}, {result_reg}") + + self.free_register(operand_reg) + return result_reg + + elif isinstance(expr, CallExpr): + # First, make space for return value (must be pushed BEFORE arguments) + temp_reg = self.allocate_register() + + # Then push arguments in reverse order + arg_regs = [] + for arg in reversed(expr.args): + reg = self.generate_expression(arg) + self.emit(f" push {reg}") + arg_regs.append(reg) + + # Call function + self.emit(f" call {expr.name}") + + # Get return value (it's now on top of stack) + self.emit(f" pop {temp_reg}") + + # Clean up remaining args + for i in range(len(arg_regs) - 1): + self.emit(f" pop zero") + + # Free the arg registers + for reg in arg_regs: + self.free_register(reg) + + return temp_reg + + else: + raise RuntimeError(f"Unknown expression type: {type(expr)}") + + def generate_binary_op(self, expr: BinaryOp) -> str: + # For operations that might contain function calls, we need to be careful + # about register allocation. Evaluate left, save it, evaluate right. + left_reg = self.generate_expression(expr.left) + + # If right side contains a function call, we need to save left_reg + # For now, always save to be safe + saved_reg = self.allocate_register() + self.emit(f" mov {left_reg}, {saved_reg}") + self.free_register(left_reg) + + right_reg = self.generate_expression(expr.right) + result_reg = self.allocate_register() + + if expr.op == "+": + self.emit(f" add {left_reg}, {right_reg}, {result_reg}") + elif expr.op == "-": + self.emit(f" sub {left_reg}, {right_reg}, {result_reg}") + elif expr.op == "*": + # Simple multiplication using loop + temp_label = self.new_label("mult") + end_label = self.new_label("mult_end") + self.emit(f" lli 0, {result_reg}") + self.emit(f"{temp_label}:") + self.emit(f" cmp {right_reg}, zero") + self.emit(f" jeq {end_label}") + self.emit(f" add {result_reg}, {left_reg}, {result_reg}") + self.emit(f" dec {right_reg}") + self.emit(f" jmp {temp_label}") + self.emit(f"{end_label}:") + elif expr.op == "/": + # Simple division using loop + temp_label = self.new_label("div") + end_label = self.new_label("div_end") + self.emit(f" lli 0, {result_reg}") + self.emit(f"{temp_label}:") + self.emit(f" cmp {left_reg}, {right_reg}") + self.emit(f" jlt {end_label}") + self.emit(f" sub {left_reg}, {right_reg}, {left_reg}") + self.emit(f" inc {result_reg}") + self.emit(f" jmp {temp_label}") + self.emit(f"{end_label}:") + elif expr.op in ["==", "!=", "<", ">", "<=", ">="]: + self.emit(f" cmp {left_reg}, {right_reg}") + + # Result is 1 if condition true, 0 otherwise + self.emit(f" lli 0, {result_reg}") + true_label = self.new_label("cmp_true") + end_label = self.new_label("cmp_end") + + if expr.op == "==": + self.emit(f" jeq {true_label}") + elif expr.op == "!=": + self.emit(f" jne {true_label}") + elif expr.op == "<": + self.emit(f" jlt {true_label}") + elif expr.op == ">": + self.emit(f" jgt {true_label}") + elif expr.op == "<=": + self.emit(f" jle {true_label}") + elif expr.op == ">=": + self.emit(f" jge {true_label}") + + self.emit(f" jmp {end_label}") + self.emit(f"{true_label}:") + self.emit(f" lli 1, {result_reg}") + self.emit(f"{end_label}:") + + self.free_register(left_reg) + self.free_register(right_reg) + return result_reg + + +def compile_c_to_asm(source: str) -> str: + """Compile C source code to DSA assembly.""" + lexer = Lexer(source) + tokens = lexer.tokenize() + + parser = Parser(tokens) + ast = parser.parse() + + codegen = CodeGenerator() + assembly = codegen.generate(ast) + + return assembly + + +def main(): + if len(sys.argv) < 2: + print("Usage: python compiler.py [output.dsa]") + sys.exit(1) + + input_file = sys.argv[1] + output_file = sys.argv[2] if len(sys.argv) > 2 else input_file.replace(".c", ".dsa") + + with open(input_file, "r") as f: + source = f.read() + + try: + assembly = compile_c_to_asm(source) + + with open(output_file, "w") as f: + f.write(assembly) + + print(f"Successfully compiled {input_file} to {output_file}") + except (SyntaxError, RuntimeError) as e: + print(f"Compilation error: {e}") + sys.exit(1) + + +if __name__ == "__main__": + main() + # # Example usage + # if len(sys.argv) > 1: + # example_c = sys.argv[1] + + # else: + # example_c = """ + # int factorial(int n) { + # if (n <= 1) { + # return 1; + # } + # return n * factorial(n - 1); + # } + + # int main() { + # int result; + # result = factorial(5); + # return result; + # } + # """ + + # print("Example C program:") + # print(example_c) + # print("\n" + "="*60 + "\n") + # print("Generated DSA assembly:") + # print(compile_c_to_asm(example_c)) diff --git a/c_compiler/src/codegen.rs b/c_compiler/src/codegen.rs new file mode 100644 index 0000000..06e030d --- /dev/null +++ b/c_compiler/src/codegen.rs @@ -0,0 +1,13 @@ +use crate::parser::Program; + +pub struct CodeGenerator; + +impl CodeGenerator { + pub fn new(ast: Program) -> Self { + CodeGenerator {} + } + + pub fn run(&mut self) -> Result { + Ok(String::new()) + } +} diff --git a/c_compiler/src/lexer.rs b/c_compiler/src/lexer.rs new file mode 100644 index 0000000..f6ccc74 --- /dev/null +++ b/c_compiler/src/lexer.rs @@ -0,0 +1,265 @@ +// ============================================================================ +// Token Types +// ============================================================================ + +#[derive(Debug, Clone, PartialEq)] +pub enum TokenType { + // Keywords + Int, + If, + Else, + While, + Return, + + // Identifiers and literals + Identifier(String), + Number(i32), + + // Operators + Plus, + Minus, + Star, + Slash, + Assign, + Eq, + Ne, + Lt, + Gt, + Le, + Ge, + + // Delimiters + LParen, + RParen, + LBrace, + RBrace, + Semicolon, + Comma, + + Eof, +} + +#[derive(Debug, Clone)] +pub struct Token { + pub token_type: TokenType, + pub line: usize, + pub col: usize, +} + +impl Token { + pub fn new(token_type: TokenType, line: usize, col: usize) -> Self { + Self { + token_type, + line, + col, + } + } +} + +// ============================================================================ +// Lexer +// ============================================================================ + +pub struct Lexer { + source: Vec, + pos: usize, + line: usize, + col: usize, +} + +impl Lexer { + pub fn new(source: &str) -> Self { + Self { + source: source.chars().collect(), + pos: 0, + line: 1, + col: 1, + } + } + + fn error(&self, msg: &str) -> String { + format!( + "Lexer error at line {}, col {}: {}", + self.line, self.col, msg + ) + } + + fn peek(&self, offset: usize) -> Option { + self.source.get(self.pos + offset).copied() + } + + fn advance(&mut self) -> Option { + if self.pos >= self.source.len() { + return None; + } + let ch = self.source[self.pos]; + self.pos += 1; + if ch == '\n' { + self.line += 1; + self.col = 1; + } else { + self.col += 1; + } + Some(ch) + } + + fn skip_whitespace(&mut self) { + while let Some(ch) = self.peek(0) { + if ch.is_whitespace() { + self.advance(); + } else { + break; + } + } + } + + fn skip_comment(&mut self) { + if self.peek(0) == Some('/') && self.peek(1) == Some('/') { + while let Some(ch) = self.peek(0) { + if ch == '\n' { + break; + } + self.advance(); + } + } + } + + fn read_number(&mut self) -> i32 { + let mut num_str = String::new(); + while let Some(ch) = self.peek(0) { + if ch.is_ascii_digit() { + num_str.push(ch); + self.advance(); + } else { + break; + } + } + num_str.parse().unwrap_or(0) + } + + fn read_identifier(&mut self) -> String { + let mut ident = String::new(); + while let Some(ch) = self.peek(0) { + if ch.is_alphanumeric() || ch == '_' { + ident.push(ch); + self.advance(); + } else { + break; + } + } + ident + } + + pub fn tokenize(&mut self) -> Result, String> { + let mut tokens = Vec::new(); + + loop { + self.skip_whitespace(); + self.skip_comment(); + + if self.pos >= self.source.len() { + break; + } + + let line = self.line; + let col = self.col; + let ch = self.peek(0).unwrap(); + + let token_type = if ch.is_ascii_digit() { + let num = self.read_number(); + TokenType::Number(num) + } else if ch.is_alphabetic() || ch == '_' { + let ident = self.read_identifier(); + match ident.as_str() { + "int" => TokenType::Int, + "if" => TokenType::If, + "else" => TokenType::Else, + "while" => TokenType::While, + "return" => TokenType::Return, + _ => TokenType::Identifier(ident), + } + } else { + match ch { + '=' if self.peek(1) == Some('=') => { + self.advance(); + self.advance(); + TokenType::Eq + } + '!' if self.peek(1) == Some('=') => { + self.advance(); + self.advance(); + TokenType::Ne + } + '<' if self.peek(1) == Some('=') => { + self.advance(); + self.advance(); + TokenType::Le + } + '>' if self.peek(1) == Some('=') => { + self.advance(); + self.advance(); + TokenType::Ge + } + '+' => { + self.advance(); + TokenType::Plus + } + '-' => { + self.advance(); + TokenType::Minus + } + '*' => { + self.advance(); + TokenType::Star + } + '/' => { + self.advance(); + TokenType::Slash + } + '=' => { + self.advance(); + TokenType::Assign + } + '<' => { + self.advance(); + TokenType::Lt + } + '>' => { + self.advance(); + TokenType::Gt + } + '(' => { + self.advance(); + TokenType::LParen + } + ')' => { + self.advance(); + TokenType::RParen + } + '{' => { + self.advance(); + TokenType::LBrace + } + '}' => { + self.advance(); + TokenType::RBrace + } + ';' => { + self.advance(); + TokenType::Semicolon + } + ',' => { + self.advance(); + TokenType::Comma + } + _ => return Err(self.error(&format!("Unexpected character: {}", ch))), + } + }; + + tokens.push(Token::new(token_type, line, col)); + } + + tokens.push(Token::new(TokenType::Eof, self.line, self.col)); + Ok(tokens) + } +} diff --git a/c_compiler/src/main.rs b/c_compiler/src/main.rs new file mode 100644 index 0000000..cd52c21 --- /dev/null +++ b/c_compiler/src/main.rs @@ -0,0 +1,72 @@ +use std::fmt; + +use crate::{codegen::CodeGenerator, lexer::Lexer, parser::Parser}; + +pub mod codegen; +pub mod lexer; +pub mod parser; + +// ============================================================================ +// Main & Tests +// ============================================================================ + +fn main() { + // read from input file: syntax "c_compiler [output.dsa]" + let args: Vec = std::env::args().collect(); + if args.len() < 2 { + eprintln!("Usage: c_compiler [output.dsa]"); + return; + } + + let input_file = &args[1]; + let output_file = if args.len() > 2 { + &args[2] + } else { + "output.dsa" + }; + + // read input + let input = std::fs::read_to_string(input_file).expect("Failed to read input file"); + + // Lexing + let mut lexer = Lexer::new(&input); + let tokens = match lexer.tokenize() { + Ok(tokens) => tokens, + Err(e) => { + eprintln!("Lexing error: {}", e); + return; + } + }; + + println!("Tokens:"); + for token in &tokens { + println!(" {:?}", token.token_type); + } + println!(); + + // Parsing + let mut parser = Parser::new(tokens); + let ast = match parser.parse() { + Ok(ast) => ast, + Err(e) => { + eprintln!("Parsing error: {}", e); + return; + } + }; + + println!("AST:"); + println!("{:#?}", ast); + + // Code Gen + let mut generator = CodeGenerator::new(ast); + let result = match generator.run() { + Ok(code) => code, + Err(e) => { + eprintln!("Parsing error: {}", e); + return; + } + }; + + println!("CODE:"); + println!("{:#?}", result); +} diff --git a/c_compiler/src/parser.rs b/c_compiler/src/parser.rs new file mode 100644 index 0000000..1ae587a --- /dev/null +++ b/c_compiler/src/parser.rs @@ -0,0 +1,558 @@ +// ============================================================================ +// AST Node Types +// ============================================================================ + +use std::fmt; + +use crate::lexer::{Token, TokenType}; + +#[derive(Debug, Clone)] +pub struct Program { + pub declarations: Vec, +} + +#[derive(Debug, Clone)] +pub enum Declaration { + Function { + name: String, + return_type: Type, + params: Vec, + body: Statement, + }, + Variable { + name: String, + init: Option, + }, +} + +#[derive(Debug, Clone)] +pub struct Parameter { + pub name: String, + pub param_type: Type, +} + +#[derive(Debug, Clone)] +pub enum Type { + Int, + Long, + Float, + Double, + Char, + Void, + Ptr(Box), + Array(Box, usize), + Struct(String), +} + +#[derive(Debug, Clone)] +pub enum Statement { + Compound { + statements: Vec, + }, + Assign { + // left side + name: String, + declare_type: Option, + + // right side + value: Option>, + }, + Expression { + expr: Expression, + }, + If { + condition: Expression, + then_stmt: Box, + else_stmt: Option>, + }, + While { + condition: Expression, + body: Box, + }, + Return { + expr: Option, + }, +} + +#[derive(Debug, Clone)] +pub enum Expression { + Empty, + Binary { + op: BinaryOperator, + left: Box, + right: Box, + }, + Unary { + op: UnaryOperator, + operand: Box, + }, + Variable { + name: String, + expr_type: Option, + }, + Number { + value: i32, + }, + Call { + name: String, + args: Vec, + }, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum BinaryOperator { + Add, + Sub, + Mul, + Div, + Eq, + Ne, + Lt, + Gt, + Le, + Ge, +} + +impl fmt::Display for BinaryOperator { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + BinaryOperator::Add => write!(f, "+"), + BinaryOperator::Sub => write!(f, "-"), + BinaryOperator::Mul => write!(f, "*"), + BinaryOperator::Div => write!(f, "/"), + BinaryOperator::Eq => write!(f, "=="), + BinaryOperator::Ne => write!(f, "!="), + BinaryOperator::Lt => write!(f, "<"), + BinaryOperator::Gt => write!(f, ">"), + BinaryOperator::Le => write!(f, "<="), + BinaryOperator::Ge => write!(f, ">="), + } + } +} + +#[derive(Debug, Clone, PartialEq)] +pub enum UnaryOperator { + Plus, + Minus, +} + +impl fmt::Display for UnaryOperator { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + UnaryOperator::Plus => write!(f, "+"), + UnaryOperator::Minus => write!(f, "-"), + } + } +} + +// ============================================================================ +// Parser +// ============================================================================ + +pub struct Parser { + tokens: Vec, + pos: usize, +} + +impl Parser { + pub fn new(tokens: Vec) -> Self { + Self { tokens, pos: 0 } + } + + fn error(&self, msg: &str) -> String { + let token = self.current(); + format!( + "Parser error at line {}, col {}: {}", + token.line, token.col, msg + ) + } + + fn current(&self) -> &Token { + self.tokens + .get(self.pos) + .unwrap_or_else(|| self.tokens.last().unwrap()) + } + + fn peek(&self, offset: usize) -> &Token { + self.tokens + .get(self.pos + offset) + .unwrap_or_else(|| self.tokens.last().unwrap()) + } + + fn advance(&mut self) -> &Token { + if self.pos < self.tokens.len() - 1 { + self.pos += 1; + } + self.current() + } + + fn expect(&mut self, expected: TokenType) -> Result { + let token = self.current().clone(); + if std::mem::discriminant(&token.token_type) != std::mem::discriminant(&expected) + { + return Err(self.error(&format!( + "Expected {:?}, got {:?}", + expected, token.token_type + ))); + } + self.advance(); + Ok(token) + } + + pub fn parse(&mut self) -> Result { + let mut declarations = Vec::new(); + + while !matches!(self.current().token_type, TokenType::Eof) { + declarations.push(self.parse_declaration()?); + } + + Ok(Program { declarations }) + } + + fn parse_declaration(&mut self) -> Result { + self.expect(TokenType::Int)?; + + let name = match &self.current().token_type { + TokenType::Identifier(s) => s.clone(), + _ => return Err(self.error("Expected identifier")), + }; + self.advance(); + + match &self.current().token_type { + TokenType::LParen => { + // Function declaration + self.advance(); + let mut params = Vec::::new(); + + if !matches!(self.current().token_type, TokenType::RParen) { + self.expect(TokenType::Int)?; + + match &self.current().token_type { + TokenType::Identifier(s) => { + params.push(Parameter { + name: s.clone(), + param_type: Type::Int, + }); + self.advance(); + } + _ => return Err(self.error("Expected parameter name")), + } + + while matches!(self.current().token_type, TokenType::Comma) { + self.advance(); + self.expect(TokenType::Int)?; + + match &self.current().token_type { + TokenType::Identifier(s) => { + params.push(Parameter { + name: s.clone(), + param_type: Type::Int, + }); + self.advance(); + } + _ => return Err(self.error("Expected parameter name")), + } + } + } + + self.expect(TokenType::RParen)?; + let body = self.parse_compound_stmt()?; + + Ok(Declaration::Function { + name, + params, + body, + return_type: Type::Int, + }) + } + _ => { + // Variable declaration + let init = if matches!(self.current().token_type, TokenType::Assign) { + self.advance(); + Some(self.parse_expression()?) + } else { + None + }; + + self.expect(TokenType::Semicolon)?; + Ok(Declaration::Variable { name, init }) + } + } + } + + fn parse_compound_stmt(&mut self) -> Result { + self.expect(TokenType::LBrace)?; + let mut statements = Vec::new(); + + while !matches!(self.current().token_type, TokenType::RBrace) { + statements.push(self.parse_statement()?); + } + + self.expect(TokenType::RBrace)?; + Ok(Statement::Compound { statements }) + } + + fn parse_statement(&mut self) -> Result { + match &self.current().token_type { + TokenType::LBrace => Ok(self.parse_compound_stmt()?), + TokenType::If => self.parse_if_stmt(), + TokenType::While => self.parse_while_stmt(), + TokenType::Return => self.parse_return_stmt(), + TokenType::Identifier(name) => { + let name = name.clone(); + + // peek ahead for open paren (func call expr) + if matches!(self.peek(1).token_type, TokenType::LParen) { + let expr = self.parse_expression()?; // a function call expr + self.expect(TokenType::Semicolon)?; + return Ok(Statement::Expression { expr }); + } + + self.advance(); // advance past identifier + + // assignment expression + if matches!(self.current().token_type, TokenType::Assign) { + self.advance(); + let expr = self.parse_expression()?; + + self.expect(TokenType::Semicolon)?; + Ok(Statement::Assign { + name, + value: Some(Box::new(expr)), + declare_type: None, + }) + } + // var expression + else { + self.expect(TokenType::Semicolon)?; + Ok(Statement::Expression { + expr: Expression::Variable { + name, + expr_type: None, + }, + }) + } + } + TokenType::Int => { + // Local variable declaration + self.advance(); + let name = match &self.current().token_type { + TokenType::Identifier(s) => s.clone(), + _ => return Err(self.error("Expected variable name")), + }; + self.advance(); + + let init = if matches!(self.current().token_type, TokenType::Assign) { + self.advance(); + Some(self.parse_expression()?) + } else { + None + }; + + self.expect(TokenType::Semicolon)?; + + // Convert to assignment expression statement + let expr = if let Some(init_expr) = init { + Statement::Assign { + name, + value: Some(Box::new(init_expr)), + declare_type: Some(Type::Int), + } + } else { + Statement::Assign { + name, + value: None, + declare_type: Some(Type::Int), + } + }; + + Ok(expr) + } + _ => { + let expr = if matches!(self.current().token_type, TokenType::Semicolon) { + Expression::Empty + } else { + self.parse_expression()? + }; + + self.expect(TokenType::Semicolon)?; + Ok(Statement::Expression { expr }) + } + } + } + + fn parse_if_stmt(&mut self) -> Result { + self.expect(TokenType::If)?; + self.expect(TokenType::LParen)?; + let condition = self.parse_expression()?; + self.expect(TokenType::RParen)?; + let then_stmt = Box::new(self.parse_statement()?); + + let else_stmt = if matches!(self.current().token_type, TokenType::Else) { + self.advance(); + Some(Box::new(self.parse_statement()?)) + } else { + None + }; + + Ok(Statement::If { + condition, + then_stmt, + else_stmt, + }) + } + + fn parse_while_stmt(&mut self) -> Result { + self.expect(TokenType::While)?; + self.expect(TokenType::LParen)?; + let condition = self.parse_expression()?; + self.expect(TokenType::RParen)?; + let body = Box::new(self.parse_statement()?); + + Ok(Statement::While { condition, body }) + } + + fn parse_return_stmt(&mut self) -> Result { + self.expect(TokenType::Return)?; + + let expr = if matches!(self.current().token_type, TokenType::Semicolon) { + None + } else { + Some(self.parse_expression()?) + }; + + self.expect(TokenType::Semicolon)?; + Ok(Statement::Return { expr }) + } + + fn parse_expression(&mut self) -> Result { + self.parse_comparison() + } + + fn parse_comparison(&mut self) -> Result { + let mut expr = self.parse_additive()?; + + while let Some(op) = match &self.current().token_type { + TokenType::Eq => Some(BinaryOperator::Eq), + TokenType::Ne => Some(BinaryOperator::Ne), + TokenType::Lt => Some(BinaryOperator::Lt), + TokenType::Gt => Some(BinaryOperator::Gt), + TokenType::Le => Some(BinaryOperator::Le), + TokenType::Ge => Some(BinaryOperator::Ge), + _ => None, + } { + self.advance(); + let right = Box::new(self.parse_additive()?); + expr = Expression::Binary { + op, + left: Box::new(expr), + right, + }; + } + + Ok(expr) + } + + fn parse_additive(&mut self) -> Result { + let mut expr = self.parse_multiplicative()?; + + while let Some(op) = match &self.current().token_type { + TokenType::Plus => Some(BinaryOperator::Add), + TokenType::Minus => Some(BinaryOperator::Sub), + _ => None, + } { + self.advance(); + let right = Box::new(self.parse_multiplicative()?); + expr = Expression::Binary { + op, + left: Box::new(expr), + right, + }; + } + + Ok(expr) + } + + fn parse_multiplicative(&mut self) -> Result { + let mut expr = self.parse_unary()?; + + while let Some(op) = match &self.current().token_type { + TokenType::Star => Some(BinaryOperator::Mul), + TokenType::Slash => Some(BinaryOperator::Div), + _ => None, + } { + self.advance(); + let right = Box::new(self.parse_unary()?); + expr = Expression::Binary { + op, + left: Box::new(expr), + right, + }; + } + + Ok(expr) + } + + fn parse_unary(&mut self) -> Result { + let op = match &self.current().token_type { + TokenType::Plus => Some(UnaryOperator::Plus), + TokenType::Minus => Some(UnaryOperator::Minus), + _ => None, + }; + + if let Some(op) = op { + self.advance(); + let operand = Box::new(self.parse_unary()?); + return Ok(Expression::Unary { op, operand }); + } + + self.parse_primary() + } + + fn parse_primary(&mut self) -> Result { + match &self.current().token_type.clone() { + TokenType::Number(n) => { + let value = *n; + self.advance(); + Ok(Expression::Number { value }) + } + TokenType::Identifier(name) => { + let name = name.clone(); + self.advance(); + + if matches!(self.current().token_type, TokenType::LParen) { + // Function call + self.advance(); + let mut args = Vec::new(); + + if !matches!(self.current().token_type, TokenType::RParen) { + args.push(self.parse_expression()?); + + while matches!(self.current().token_type, TokenType::Comma) { + self.advance(); + args.push(self.parse_expression()?); + } + } + + self.expect(TokenType::RParen)?; + Ok(Expression::Call { name, args }) + } else { + Ok(Expression::Variable { + name, + expr_type: None, + }) + } + } + TokenType::LParen => { + self.advance(); + let expr = self.parse_expression()?; + self.expect(TokenType::RParen)?; + Ok(expr) + } + _ => Err(self.error(&format!( + "Unexpected token: {:?}", + self.current().token_type + ))), + } + } +}