deleted the c compiler

This commit is contained in:
2026-02-05 01:07:59 +00:00
parent a1099249e9
commit 8d130a870c
14 changed files with 0 additions and 4441 deletions
-8
View File
@@ -1,8 +0,0 @@
[package]
name = "c_compiler"
version.workspace = true
edition.workspace = true
authors.workspace = true
[dependencies]
chrono = "0.4.42"
-14
View File
@@ -1,14 +0,0 @@
int var_x = 5;
int factorial(int n) {
if (n <= 1) {
return 1;
}
return n * factorial(n - 1);
}
int main() {
int result = var_x + factorial(5);
print(result);
return 0;
}
-926
View File
@@ -1,926 +0,0 @@
#!/usr/bin/env python3
"""
Simple C to DSA Assembly Compiler
Supports a subset of C including:
- int variables and functions
- Arithmetic operations (+, -, *, /)
- Comparisons (==, !=, <, >, <=, >=)
- If/else statements
- While loops
- Function calls
- Return statements
"""
import re
import sys
from typing import List, Dict, Optional, Tuple
from dataclasses import dataclass
from enum import Enum
from pprint import pprint
import json
class TokenType(Enum):
# Keywords
INT = "int"
IF = "if"
ELSE = "else"
WHILE = "while"
RETURN = "return"
# Identifiers and literals
IDENTIFIER = "IDENTIFIER"
NUMBER = "NUMBER"
# Operators
PLUS = "+"
MINUS = "-"
STAR = "*"
SLASH = "/"
ASSIGN = "="
EQ = "=="
NE = "!="
LT = "<"
GT = ">"
LE = "<="
GE = ">="
# Delimiters
LPAREN = "("
RPAREN = ")"
LBRACE = "{"
RBRACE = "}"
SEMICOLON = ";"
COMMA = ","
EOF = "EOF"
@dataclass
class Token:
type: TokenType
value: str
line: int
col: int
class Lexer:
def __init__(self, source: str):
self.source = source
self.pos = 0
self.line = 1
self.col = 1
self.tokens = []
def error(self, msg: str):
raise SyntaxError(f"Lexer error at line {self.line}, col {self.col}: {msg}")
def peek(self, offset: int = 0) -> Optional[str]:
pos = self.pos + offset
return self.source[pos] if pos < len(self.source) else None
def advance(self) -> Optional[str]:
if self.pos >= len(self.source):
return None
char = self.source[self.pos]
self.pos += 1
if char == "\n":
self.line += 1
self.col = 1
else:
self.col += 1
return char
def skip_whitespace(self):
while self.peek() and self.peek() in " \t\n\r":
self.advance()
def skip_comment(self):
if self.peek() == "/" and self.peek(1) == "/":
while self.peek() and self.peek() != "\n":
self.advance()
self.advance() # skip newline
def read_number(self) -> str:
num = ""
while self.peek() and self.peek().isdigit():
num += self.advance()
return num
def read_identifier(self) -> str:
ident = ""
while self.peek() and (self.peek().isalnum() or self.peek() == "_"):
ident += self.advance()
return ident
def tokenize(self) -> List[Token]:
keywords = {
"int": TokenType.INT,
"if": TokenType.IF,
"else": TokenType.ELSE,
"while": TokenType.WHILE,
"return": TokenType.RETURN,
}
while self.pos < len(self.source):
self.skip_whitespace()
self.skip_comment()
if self.pos >= len(self.source):
break
line, col = self.line, self.col
char = self.peek()
# Numbers
if char.isdigit():
num = self.read_number()
self.tokens.append(Token(TokenType.NUMBER, num, line, col))
# Identifiers and keywords
elif char.isalpha() or char == "_":
ident = self.read_identifier()
token_type = keywords.get(ident, TokenType.IDENTIFIER)
self.tokens.append(Token(token_type, ident, line, col))
# Two-character operators
elif char == "=" and self.peek(1) == "=":
self.advance()
self.advance()
self.tokens.append(Token(TokenType.EQ, "==", line, col))
elif char == "!" and self.peek(1) == "=":
self.advance()
self.advance()
self.tokens.append(Token(TokenType.NE, "!=", line, col))
elif char == "<" and self.peek(1) == "=":
self.advance()
self.advance()
self.tokens.append(Token(TokenType.LE, "<=", line, col))
elif char == ">" and self.peek(1) == "=":
self.advance()
self.advance()
self.tokens.append(Token(TokenType.GE, ">=", line, col))
# Single-character operators
elif char == "+":
self.advance()
self.tokens.append(Token(TokenType.PLUS, "+", line, col))
elif char == "-":
self.advance()
self.tokens.append(Token(TokenType.MINUS, "-", line, col))
elif char == "*":
self.advance()
self.tokens.append(Token(TokenType.STAR, "*", line, col))
elif char == "/":
self.advance()
self.tokens.append(Token(TokenType.SLASH, "/", line, col))
elif char == "=":
self.advance()
self.tokens.append(Token(TokenType.ASSIGN, "=", line, col))
elif char == "<":
self.advance()
self.tokens.append(Token(TokenType.LT, "<", line, col))
elif char == ">":
self.advance()
self.tokens.append(Token(TokenType.GT, ">", line, col))
elif char == "(":
self.advance()
self.tokens.append(Token(TokenType.LPAREN, "(", line, col))
elif char == ")":
self.advance()
self.tokens.append(Token(TokenType.RPAREN, ")", line, col))
elif char == "{":
self.advance()
self.tokens.append(Token(TokenType.LBRACE, "{", line, col))
elif char == "}":
self.advance()
self.tokens.append(Token(TokenType.RBRACE, "}", line, col))
elif char == ";":
self.advance()
self.tokens.append(Token(TokenType.SEMICOLON, ";", line, col))
elif char == ",":
self.advance()
self.tokens.append(Token(TokenType.COMMA, ",", line, col))
else:
self.error(f"Unexpected character: {char}")
self.tokens.append(Token(TokenType.EOF, "", self.line, self.col))
return self.tokens
# AST Node classes
@dataclass
class ASTNode:
pass
@dataclass
class Program(ASTNode):
declarations: List["Declaration"]
@dataclass
class Declaration(ASTNode):
pass
@dataclass
class FunctionDecl(Declaration):
name: str
params: List[str]
body: "CompoundStmt"
@dataclass
class VarDecl(Declaration):
name: str
init: Optional["Expression"] = None
@dataclass
class Statement(ASTNode):
pass
@dataclass
class CompoundStmt(Statement):
statements: List[Statement]
@dataclass
class ExprStmt(Statement):
expr: Optional["Expression"]
@dataclass
class IfStmt(Statement):
condition: "Expression"
then_stmt: Statement
else_stmt: Optional[Statement] = None
@dataclass
class WhileStmt(Statement):
condition: "Expression"
body: Statement
@dataclass
class ReturnStmt(Statement):
expr: Optional["Expression"]
@dataclass
class Expression(ASTNode):
pass
@dataclass
class BinaryOp(Expression):
op: str
left: Expression
right: Expression
@dataclass
class UnaryOp(Expression):
op: str
operand: Expression
@dataclass
class AssignExpr(Expression):
name: str
value: Expression
@dataclass
class VarExpr(Expression):
name: str
@dataclass
class NumberExpr(Expression):
value: int
@dataclass
class CallExpr(Expression):
name: str
args: List[Expression]
class Parser:
def __init__(self, tokens: List[Token]):
self.tokens = tokens
self.pos = 0
def error(self, msg: str):
token = self.current()
raise SyntaxError(f"Parser error at line {token.line}, col {token.col}: {msg}")
def current(self) -> Token:
return self.tokens[self.pos] if self.pos < len(self.tokens) else self.tokens[-1]
def peek(self, offset: int = 0) -> Token:
pos = self.pos + offset
return self.tokens[pos] if pos < len(self.tokens) else self.tokens[-1]
def advance(self) -> Token:
token = self.current()
if self.pos < len(self.tokens) - 1:
self.pos += 1
return token
def expect(self, token_type: TokenType) -> Token:
token = self.current()
if token.type != token_type:
self.error(f"Expected {token_type.value}, got {token.type.value}")
return self.advance()
def parse(self) -> Program:
declarations = []
while self.current().type != TokenType.EOF:
declarations.append(self.parse_declaration())
return Program(declarations)
def parse_declaration(self) -> Declaration:
self.expect(TokenType.INT)
name = self.expect(TokenType.IDENTIFIER).value
if self.current().type == TokenType.LPAREN:
# Function declaration
self.advance()
params = []
if self.current().type != TokenType.RPAREN:
self.expect(TokenType.INT)
params.append(self.expect(TokenType.IDENTIFIER).value)
while self.current().type == TokenType.COMMA:
self.advance()
self.expect(TokenType.INT)
params.append(self.expect(TokenType.IDENTIFIER).value)
self.expect(TokenType.RPAREN)
body = self.parse_compound_stmt()
return FunctionDecl(name, params, body)
else:
# Variable declaration
init = None
if self.current().type == TokenType.ASSIGN:
self.advance()
init = self.parse_expression()
self.expect(TokenType.SEMICOLON)
return VarDecl(name, init)
def parse_compound_stmt(self) -> CompoundStmt:
self.expect(TokenType.LBRACE)
statements = []
while self.current().type != TokenType.RBRACE:
statements.append(self.parse_statement())
self.expect(TokenType.RBRACE)
return CompoundStmt(statements)
def parse_statement(self) -> Statement:
token = self.current()
if token.type == TokenType.LBRACE:
return self.parse_compound_stmt()
elif token.type == TokenType.IF:
return self.parse_if_stmt()
elif token.type == TokenType.WHILE:
return self.parse_while_stmt()
elif token.type == TokenType.RETURN:
return self.parse_return_stmt()
elif token.type == TokenType.INT:
# Local variable declaration
self.advance()
name = self.expect(TokenType.IDENTIFIER).value
init = None
if self.current().type == TokenType.ASSIGN:
self.advance()
init = self.parse_expression()
self.expect(TokenType.SEMICOLON)
return ExprStmt(AssignExpr(name, init) if init else None)
else:
expr = (
self.parse_expression()
if self.current().type != TokenType.SEMICOLON
else None
)
self.expect(TokenType.SEMICOLON)
return ExprStmt(expr)
def parse_if_stmt(self) -> IfStmt:
self.expect(TokenType.IF)
self.expect(TokenType.LPAREN)
condition = self.parse_expression()
self.expect(TokenType.RPAREN)
then_stmt = self.parse_statement()
else_stmt = None
if self.current().type == TokenType.ELSE:
self.advance()
else_stmt = self.parse_statement()
return IfStmt(condition, then_stmt, else_stmt)
def parse_while_stmt(self) -> WhileStmt:
self.expect(TokenType.WHILE)
self.expect(TokenType.LPAREN)
condition = self.parse_expression()
self.expect(TokenType.RPAREN)
body = self.parse_statement()
return WhileStmt(condition, body)
def parse_return_stmt(self) -> ReturnStmt:
self.expect(TokenType.RETURN)
expr = None
if self.current().type != TokenType.SEMICOLON:
expr = self.parse_expression()
self.expect(TokenType.SEMICOLON)
return ReturnStmt(expr)
def parse_expression(self) -> Expression:
return self.parse_assignment()
def parse_assignment(self) -> Expression:
expr = self.parse_comparison()
if self.current().type == TokenType.ASSIGN:
if not isinstance(expr, VarExpr):
self.error("Invalid assignment target")
self.advance()
value = self.parse_assignment()
return AssignExpr(expr.name, value)
return expr
def parse_comparison(self) -> Expression:
expr = self.parse_additive()
while self.current().type in [
TokenType.EQ,
TokenType.NE,
TokenType.LT,
TokenType.GT,
TokenType.LE,
TokenType.GE,
]:
op = self.advance().value
right = self.parse_additive()
expr = BinaryOp(op, expr, right)
return expr
def parse_additive(self) -> Expression:
expr = self.parse_multiplicative()
while self.current().type in [TokenType.PLUS, TokenType.MINUS]:
op = self.advance().value
right = self.parse_multiplicative()
expr = BinaryOp(op, expr, right)
return expr
def parse_multiplicative(self) -> Expression:
expr = self.parse_unary()
while self.current().type in [TokenType.STAR, TokenType.SLASH]:
op = self.advance().value
right = self.parse_unary()
expr = BinaryOp(op, expr, right)
return expr
def parse_unary(self) -> Expression:
if self.current().type in [TokenType.PLUS, TokenType.MINUS]:
op = self.advance().value
operand = self.parse_unary()
return UnaryOp(op, operand)
return self.parse_primary()
def parse_primary(self) -> Expression:
token = self.current()
if token.type == TokenType.NUMBER:
self.advance()
return NumberExpr(int(token.value))
elif token.type == TokenType.IDENTIFIER:
name = self.advance().value
if self.current().type == TokenType.LPAREN:
# Function call
self.advance()
args = []
if self.current().type != TokenType.RPAREN:
args.append(self.parse_expression())
while self.current().type == TokenType.COMMA:
self.advance()
args.append(self.parse_expression())
self.expect(TokenType.RPAREN)
return CallExpr(name, args)
else:
return VarExpr(name)
elif token.type == TokenType.LPAREN:
self.advance()
expr = self.parse_expression()
self.expect(TokenType.RPAREN)
return expr
else:
self.error(f"Unexpected token: {token.type.value}")
class CodeGenerator:
def __init__(self):
self.output = []
self.label_counter = 0
self.string_counter = 0
self.functions = {}
self.current_function = None
self.local_vars = {}
self.global_vars = {}
self.register_pool = [f"rg{i:x}" for i in range(16)]
self.used_registers = set()
def new_label(self, prefix: str = "L") -> str:
label = f"{prefix}{self.label_counter}"
self.label_counter += 1
return label
def allocate_register(self) -> str:
for reg in self.register_pool:
if reg not in self.used_registers:
self.used_registers.add(reg)
return reg
raise RuntimeError("Out of registers")
def free_register(self, reg: str):
self.used_registers.discard(reg)
def emit(self, code: str):
self.output.append(code)
def generate(self, program: Program) -> str:
# Emit data section
self.emit("// Global variables")
for decl in program.declarations:
if isinstance(decl, VarDecl):
self.global_vars[decl.name] = f"var_{decl.name}"
if decl.init:
if isinstance(decl.init, NumberExpr):
self.emit(f"dw var_{decl.name}: {decl.init.value}")
else:
self.emit(f"dw var_{decl.name}: 0")
else:
self.emit(f"dw var_{decl.name}: 0")
self.emit("")
self.emit("// Entry point")
self.emit("dw stack_bottom: 0x10000")
self.emit("")
self.emit("init:")
self.emit(" ldw stack_bottom, spr")
self.emit(" mov spr, bpr")
self.emit(" push zero")
self.emit(" call main")
self.emit(" pop rg0")
self.emit(" hlt")
self.emit("")
# Emit functions
for decl in program.declarations:
if isinstance(decl, FunctionDecl):
self.generate_function(decl)
return "\n".join(self.output)
def generate_function(self, func: FunctionDecl):
self.current_function = func.name
self.functions[func.name] = func
self.local_vars = {}
# Map parameters to stack offsets
# Parameters start at bpr+8 (after return addr at bpr+4)
for i, param in enumerate(func.params):
self.local_vars[param] = 8 + (i * 4)
self.emit(f"{func.name}:")
self.emit(" push bpr")
self.emit(" mov spr, bpr")
self.emit("")
# Generate function body
self.generate_compound_stmt(func.body)
# Default return if no explicit return
self.emit("// default return")
self.emit(f"{func.name}_end:")
self.emit(" mov bpr, spr")
self.emit(" pop bpr")
self.emit(" return")
self.emit("")
def generate_compound_stmt(self, stmt: CompoundStmt):
for s in stmt.statements:
self.generate_statement(s)
def generate_statement(self, stmt: Statement):
if isinstance(stmt, CompoundStmt):
self.generate_compound_stmt(stmt)
elif isinstance(stmt, ExprStmt):
if stmt.expr:
reg = self.generate_expression(stmt.expr)
self.free_register(reg)
elif isinstance(stmt, IfStmt):
self.generate_if_stmt(stmt)
elif isinstance(stmt, WhileStmt):
self.generate_while_stmt(stmt)
elif isinstance(stmt, ReturnStmt):
self.generate_return_stmt(stmt)
def generate_if_stmt(self, stmt: IfStmt):
else_label = self.new_label("else")
end_label = self.new_label("endif")
# Evaluate condition
cond_reg = self.generate_expression(stmt.condition)
self.emit(f" cmp {cond_reg}, zero")
self.free_register(cond_reg)
if stmt.else_stmt:
self.emit(f" jeq {else_label}")
else:
self.emit(f" jeq {end_label}")
# Then branch
self.generate_statement(stmt.then_stmt)
if stmt.else_stmt:
self.emit(f" jmp {end_label}")
self.emit(f"{else_label}:")
self.generate_statement(stmt.else_stmt)
self.emit(f"{end_label}:")
def generate_while_stmt(self, stmt: WhileStmt):
start_label = self.new_label("while_start")
end_label = self.new_label("while_end")
self.emit(f"{start_label}:")
# Evaluate condition
cond_reg = self.generate_expression(stmt.condition)
self.emit(f" cmp {cond_reg}, zero")
self.free_register(cond_reg)
self.emit(f" jeq {end_label}")
# Loop body
self.generate_statement(stmt.body)
self.emit(f" jmp {start_label}")
self.emit(f"{end_label}:")
def generate_return_stmt(self, stmt: ReturnStmt):
if stmt.expr:
reg = self.generate_expression(stmt.expr)
# Store return value at spr+8 according to calling convention
self.emit(f" stw {reg}, spr, 8")
self.free_register(reg)
self.emit(f" jmp {self.current_function}_end")
def generate_expression(self, expr: Expression) -> str:
if isinstance(expr, NumberExpr):
reg = self.allocate_register()
if expr.value <= 0xFFFF and expr.value >= 0:
self.emit(f" lli {expr.value}, {reg}")
if expr.value > 0xFF:
self.emit(f" lui {expr.value >> 16}, {reg}")
else:
self.emit(f" lli {expr.value & 0xFFFF}, {reg}")
self.emit(f" lui {(expr.value >> 16) & 0xFFFF}, {reg}")
return reg
elif isinstance(expr, VarExpr):
reg = self.allocate_register()
if expr.name in self.local_vars:
offset = self.local_vars[expr.name]
self.emit(f" ldw bpr, {reg}, {offset}")
elif expr.name in self.global_vars:
label = self.global_vars[expr.name]
self.emit(f" ldw {label}, {reg}")
else:
raise RuntimeError(f"Undefined variable: {expr.name}")
return reg
elif isinstance(expr, AssignExpr):
value_reg = self.generate_expression(expr.value)
if expr.name in self.local_vars:
offset = self.local_vars[expr.name]
self.emit(f" stw {value_reg}, bpr, {offset}")
elif expr.name in self.global_vars:
label = self.global_vars[expr.name]
self.emit(f" stw {value_reg}, {label}")
else:
# New local variable - allocate after params and return value space
# Start local variables at offset -4 from bpr (growing downward)
offset = -(len([v for v in self.local_vars.values() if v < 0]) + 1) * 4
self.local_vars[expr.name] = offset
self.emit(f" stw {value_reg}, bpr, {offset}")
return value_reg
elif isinstance(expr, BinaryOp):
return self.generate_binary_op(expr)
elif isinstance(expr, UnaryOp):
operand_reg = self.generate_expression(expr.operand)
result_reg = self.allocate_register()
if expr.op == "-":
self.emit(f" lwi 0, {result_reg}")
self.emit(f" sub {result_reg}, {operand_reg}, {result_reg}")
else: # +
self.emit(f" mov {operand_reg}, {result_reg}")
self.free_register(operand_reg)
return result_reg
elif isinstance(expr, CallExpr):
# First, make space for return value (must be pushed BEFORE arguments)
temp_reg = self.allocate_register()
# Then push arguments in reverse order
arg_regs = []
for arg in reversed(expr.args):
reg = self.generate_expression(arg)
self.emit(f" push {reg}")
arg_regs.append(reg)
# Call function
self.emit(f" call {expr.name}")
# Get return value (it's now on top of stack)
self.emit(f" pop {temp_reg}")
# Clean up remaining args
for i in range(len(arg_regs) - 1):
self.emit(f" pop zero")
# Free the arg registers
for reg in arg_regs:
self.free_register(reg)
return temp_reg
else:
raise RuntimeError(f"Unknown expression type: {type(expr)}")
def generate_binary_op(self, expr: BinaryOp) -> str:
# For operations that might contain function calls, we need to be careful
# about register allocation. Evaluate left, save it, evaluate right.
left_reg = self.generate_expression(expr.left)
# If right side contains a function call, we need to save left_reg
# For now, always save to be safe
saved_reg = self.allocate_register()
self.emit(f" mov {left_reg}, {saved_reg}")
self.free_register(left_reg)
right_reg = self.generate_expression(expr.right)
result_reg = self.allocate_register()
if expr.op == "+":
self.emit(f" add {left_reg}, {right_reg}, {result_reg}")
elif expr.op == "-":
self.emit(f" sub {left_reg}, {right_reg}, {result_reg}")
elif expr.op == "*":
# Simple multiplication using loop
temp_label = self.new_label("mult")
end_label = self.new_label("mult_end")
self.emit(f" lli 0, {result_reg}")
self.emit(f"{temp_label}:")
self.emit(f" cmp {right_reg}, zero")
self.emit(f" jeq {end_label}")
self.emit(f" add {result_reg}, {left_reg}, {result_reg}")
self.emit(f" dec {right_reg}")
self.emit(f" jmp {temp_label}")
self.emit(f"{end_label}:")
elif expr.op == "/":
# Simple division using loop
temp_label = self.new_label("div")
end_label = self.new_label("div_end")
self.emit(f" lli 0, {result_reg}")
self.emit(f"{temp_label}:")
self.emit(f" cmp {left_reg}, {right_reg}")
self.emit(f" jlt {end_label}")
self.emit(f" sub {left_reg}, {right_reg}, {left_reg}")
self.emit(f" inc {result_reg}")
self.emit(f" jmp {temp_label}")
self.emit(f"{end_label}:")
elif expr.op in ["==", "!=", "<", ">", "<=", ">="]:
self.emit(f" cmp {left_reg}, {right_reg}")
# Result is 1 if condition true, 0 otherwise
self.emit(f" lli 0, {result_reg}")
true_label = self.new_label("cmp_true")
end_label = self.new_label("cmp_end")
if expr.op == "==":
self.emit(f" jeq {true_label}")
elif expr.op == "!=":
self.emit(f" jne {true_label}")
elif expr.op == "<":
self.emit(f" jlt {true_label}")
elif expr.op == ">":
self.emit(f" jgt {true_label}")
elif expr.op == "<=":
self.emit(f" jle {true_label}")
elif expr.op == ">=":
self.emit(f" jge {true_label}")
self.emit(f" jmp {end_label}")
self.emit(f"{true_label}:")
self.emit(f" lli 1, {result_reg}")
self.emit(f"{end_label}:")
self.free_register(left_reg)
self.free_register(right_reg)
return result_reg
def compile_c_to_asm(source: str) -> str:
"""Compile C source code to DSA assembly."""
lexer = Lexer(source)
tokens = lexer.tokenize()
parser = Parser(tokens)
ast = parser.parse()
codegen = CodeGenerator()
assembly = codegen.generate(ast)
return assembly
def main():
if len(sys.argv) < 2:
print("Usage: python compiler.py <input.c> [output.dsa]")
sys.exit(1)
input_file = sys.argv[1]
output_file = sys.argv[2] if len(sys.argv) > 2 else input_file.replace(".c", ".dsa")
with open(input_file, "r") as f:
source = f.read()
try:
assembly = compile_c_to_asm(source)
with open(output_file, "w") as f:
f.write(assembly)
print(f"Successfully compiled {input_file} to {output_file}")
except (SyntaxError, RuntimeError) as e:
print(f"Compilation error: {e}")
sys.exit(1)
if __name__ == "__main__":
main()
# # Example usage
# if len(sys.argv) > 1:
# example_c = sys.argv[1]
# else:
# example_c = """
# int factorial(int n) {
# if (n <= 1) {
# return 1;
# }
# return n * factorial(n - 1);
# }
# int main() {
# int result;
# result = factorial(5);
# return result;
# }
# """
# print("Example C program:")
# print(example_c)
# print("\n" + "="*60 + "\n")
# print("Generated DSA assembly:")
# print(compile_c_to_asm(example_c))
-12
View File
@@ -1,12 +0,0 @@
int factorial(int n) {
if (n <= 1) {
return 1;
}
return n * factorial(n - 1);
}
int main() {
int res = factorial(3);
printnum(res);
return 0;
}
-25
View File
@@ -1,25 +0,0 @@
include print: "lib/io/print.dsa"
int factorial(int n) {
if (n <= 1) {
return 1;
}
return n * factorial(n - 1);
}
int add_(int a, int b) {
return a + b;
}
int greater(int a, int b) {
if (a + a > b + b) {
return a;
} else {
return b + a;
}
}
int main() {
printnum(-5);
return 0;
}
-5
View File
@@ -1,5 +0,0 @@
// Imports
include maths: "./lib/maths/core.dsa"
// Reserved Memory
-106
View File
@@ -1,106 +0,0 @@
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
#[non_exhaustive]
pub enum Register {
// general purpose registers
Rg0,
Rg1,
Rg2,
Rg3,
Rg4,
Rg5,
Rg6,
Rg7,
Rg8,
Rg9,
Rga,
Rgb,
Rgc,
Rgd,
Rge,
Rgf,
// special purpose registers
Acc,
Spr,
Bpr,
Ret,
Idr,
Mmr,
Zero,
NoReg,
// system registers - can't be written to by instructions.
Mar,
Mdr,
Sts,
Cir,
Pcx,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u8)]
#[non_exhaustive]
/// A list of all current instructions in the DSA Assembly language.
pub enum Instruction {
// No-op
Nop = 0x0,
// Data transfer instructions
Mov(Register, Register) = 0x1,
Movs(Register, Register) = 0x2,
Ldb(Register, Register, Option<u32>) = 0x3,
Ldbs(Register, Register, Option<u32>) = 0x4,
Ldh(Register, Register, Option<u32>) = 0x5,
Ldhs(Register, Register, Option<u32>) = 0x6,
Ldw(Register, Register, Option<u32>) = 0x7,
Stb(Register, Register, Option<u32>) = 0x8,
Sth(Register, Register, Option<u32>) = 0x9,
Stw(Register, Register, Option<u32>) = 0xA,
Lli(u16, Register) = 0xB,
Lui(u16, Register) = 0xC,
// Jump Instructions
Jump(u16, Register) = 0xD,
JumpEq(u16, Register) = 0xE,
JumpNeq(u16, Register) = 0xF,
JumpGt(u16, Register) = 0x10,
JumpGe(u16, Register) = 0x11,
JumpLt(u16, Register) = 0x12,
JumpLe(u16, Register) = 0x13,
// Comparison
Compare(Register, Register) = 0x14,
// // Arithmetic
// Add(args::RTypeArgs) = 0x19,
// Sub(args::RTypeArgs) = 0x1A,
// Increment(args::RTypeArgs) = 0x15,
// Decrement(args::RTypeArgs) = 0x16,
// ShiftLeft(args::RTypeArgs) = 0x17,
// ShiftRight(args::RTypeArgs) = 0x18,
// // Logical
// And(args::RTypeArgs) = 0x1B,
// Or(args::RTypeArgs) = 0x1C,
// Not(args::RTypeArgs) = 0x1D,
// Xor(args::RTypeArgs) = 0x1E,
// Nand(args::RTypeArgs) = 0x1F,
// Nor(args::RTypeArgs) = 0x20,
// Xnor(args::RTypeArgs) = 0x21,
// // Misc
// Interrupt(Interrupt) = 0x22,
// IntReturn = 0x23,
// Halt = 0x24,
// // Immediate Arithmetic
// AddImmediate(args::ITypeArgs) = 0x25,
// SubImmediate(args::ITypeArgs) = 0x26,
// Fake Instructions
Data(u32) = 0x3E,
Segment(u32) = 0x3F,
}
-599
View File
@@ -1,599 +0,0 @@
use std::collections::HashMap;
use std::hash::Hash;
use std::sync::LazyLock;
use std::sync::atomic::AtomicU32;
use std::time::SystemTime;
use chrono::{DateTime, Local};
use crate::registers::RegisterAllocator;
use crate::{block, cmd, comment, dsa};
use crate::parser::{
BinaryOperator, ConstExpr, Declaration, Expression, Parameter, Program, Statement,
UnaryOperator,
};
pub struct CodeGenerator {
ast: Program,
imports: HashMap<String, String>,
globals: Vec<String>,
functions: Vec<String>,
symbols: Vec<String>,
allocator: RegisterAllocator,
}
static GLOBAL_METHODS: LazyLock<HashMap<&str, &str>> = LazyLock::new(|| {
HashMap::from([("print", "print::print"), ("printnum", "print::print_num")])
});
fn import(name: &str, path: &str) -> String {
format!("include {name}: \"{}\"", path)
}
impl CodeGenerator {
const RET: &'static str = "\tjmp _ret";
pub fn new(ast: Program) -> Self {
CodeGenerator {
ast,
imports: HashMap::new(),
globals: Vec::new(),
functions: Vec::new(),
symbols: Vec::new(),
allocator: RegisterAllocator::new(),
}
}
pub fn include(&mut self, name: &str, path: &str) {
self.imports.insert(name.to_string(), path.to_string());
}
pub fn generate(&mut self) -> Result<String, String> {
// always include the print library for debugging!
self.include("print", "./lib/io/print.dsa");
for block in self.ast.clone().declarations {
match block {
Declaration::Variable { name, .. } => self.symbols.push(name),
Declaration::Function { name, .. } => self.symbols.push(name),
Declaration::Import { name, .. } => self.symbols.push(name),
}
}
for block in self.ast.clone().declarations {
self.generate_block(block.clone())?;
}
self.generate_layout()
}
fn generate_layout(&mut self) -> Result<String, String> {
let datetime: DateTime<Local> = SystemTime::now().into();
Ok(dsa![
"",
comment!("GENERATED BY DSA-C COMPILER"),
comment!(format!(
"Generated at {}",
datetime.format("%Y-%m-%d %H:%M:%S")
)),
"",
// imports
comment!("Imports"),
self.imports
.iter()
.map(|(k, v)| import(k, v))
.collect::<Vec<String>>()
.join("\n"),
"",
// reserved memory
comment!("Globals & Reserved Memory"),
self.globals.join("\n"),
"",
// entry point
comment!("Entry Point"),
"dw stack: 0x10000",
"db message: \"Process Exited with code:\"",
block! [ "_init"
dsa![ldw stack, bpr],
dsa![mov bpr, spr],
dsa![push zero],
dsa![call main],
dsa![call print::print_newline],
dsa![lwi message, rg0],
dsa![push rg0],
dsa![call print::print],
dsa![pop zero],
dsa![call print::print_hex_word],
dsa![pop zero],
dsa![hlt]
],
"",
comment!("Function return boilerplate"),
block! [ "_ret"
dsa![mov bpr, spr],
dsa![pop bpr],
dsa![return]
],
// block! [ "main"
// dsa![push bpr],
// dsa![mov spr, bpr],
// dsa![lwi 67, rg1],
// dsa![stw rg1, spr, 8],
// dsa![mov bpr, spr],
// dsa![pop bpr],
// dsa![return]
// ],
"",
self.functions.join("\n"),
])
}
fn generate_global(&mut self, name: &str, init: Option<ConstExpr>) {
self.globals.push(format!(
"dw {}: {}",
name,
init.unwrap_or(ConstExpr::Number(0))
))
}
fn generate_block(&mut self, block: Declaration) -> Result<(), String> {
match block {
Declaration::Variable { name, init } => self.generate_global(&name, init),
Declaration::Function {
name,
return_type,
params,
body,
} => {
let func = self.generate_function(&name, &params, &body).join("\n");
self.functions.push(format!("{func}\n"));
}
Declaration::Import { name, path } => {
self.imports.insert(name, path);
}
};
Ok(())
}
// Example: Generate code for a function
fn generate_function(
&mut self,
name: &str,
params: &[Parameter],
body: &[Statement],
) -> Vec<String> {
let mut code = Vec::new();
// Reset allocator for new function
self.allocator.reset();
// Function prologue
code.push(format!("{}:", name));
code.push("\tpush bpr".to_string());
code.push("\tmov spr, bpr".to_string());
code.push(String::new());
// Allocate parameters to registers or stack locations
for (i, param) in params.iter().enumerate() {
let offset = 8 + (i as i32 * 4); // Parameters start at bpr+8
// Track that this parameter is at a stack location
let (reg, load_code) = self.allocator.alloc_var(&param.name).unwrap();
code.extend(load_code);
code.push(format!("\tldw bpr, {}, {}", reg, offset));
}
// Generate code for function body
for stmt in body {
let stmt_code = self.generate_statement(stmt).unwrap();
code.extend(stmt_code);
}
// automatically return at function end
if let Some(x) = code.last()
&& x == Self::RET
{
} else {
code.push(Self::RET.to_string());
}
code
}
// Example: Generate code for a statement
fn generate_statement(&mut self, stmt: &Statement) -> Result<Vec<String>, String> {
let mut code = Vec::new();
match stmt {
Statement::Assign {
name,
declare_type,
value,
} => {
if let Some(expr) = value {
// Evaluate expression
let (result_reg, expr_code) = self.generate_expression(expr)?;
code.extend(expr_code);
// Store result in variable
let store_code = self.allocator.store_var(name, &result_reg);
code.extend(store_code);
// Free temporary register
self.allocator.free_temp(&result_reg);
} else {
// Just declaring variable without initialization
self.allocator.alloc_var(name)?;
}
}
Statement::Return { expr } => {
if let Some(e) = expr {
let (result_reg, expr_code) = self.generate_expression(e)?;
code.extend(expr_code);
code.push(format!("\tstw {}, bpr, 8", result_reg));
code.push(format!("\tjmp _ret"));
self.allocator.free_temp(&result_reg);
}
}
Statement::If {
condition,
then_stmt,
else_stmt,
} => {
// Generate condition
let (cond_reg, cond_code) = self.generate_expression(condition)?;
code.extend(cond_code);
// Compare with zero
code.push(format!("\tcmp {}, zero", cond_reg));
self.allocator.free_temp(&cond_reg);
// Generate unique labels
let then_label = format!("_then_{}", self.get_unique_label());
let else_label = format!("_else_{}", self.get_unique_label());
let end_label = format!("_end_{}", self.get_unique_label());
// Jump to else if condition is false (equal to zero)
code.push(format!("\tjeq {}", else_label));
// Then block
code.push(format!("{}:", then_label));
for s in then_stmt {
code.extend(self.generate_statement(s)?);
}
if then_stmt.len() == 0 {
code.push("\tnop".to_string());
}
code.push(format!("\tjmp {}", end_label));
// Else block
code.push(format!("{}:", else_label));
for s in else_stmt {
code.extend(self.generate_statement(s)?);
}
if else_stmt.len() == 0 {
code.push("\tnop".to_string());
}
code.push(format!("{}:", end_label));
}
Statement::While { condition, body } => {
let loop_start = format!("_while_start_{}", self.get_unique_label());
let loop_end = format!("_while_end_{}", self.get_unique_label());
code.push(format!("{}:", loop_start));
// Generate condition
let (cond_reg, cond_code) = self.generate_expression(condition)?;
code.extend(cond_code);
code.push(format!("\tcmp {}, zero", cond_reg));
self.allocator.free_temp(&cond_reg);
code.push(format!("\tjeq {}", loop_end));
// Loop body
for s in body {
code.extend(self.generate_statement(s)?);
}
code.push(format!("\tjmp {}", loop_start));
code.push(format!("{}:", loop_end));
}
Statement::Expression { expr } => {
let (result_reg, expr_code) = self.generate_expression(expr)?;
code.extend(expr_code);
self.allocator.free_temp(&result_reg);
}
Statement::Block(statements) => {
for s in statements {
code.extend(self.generate_statement(s)?);
}
}
}
Ok(code)
}
// Example: Generate code for an expression
// Returns (register containing result, assembly code)
fn generate_expression(
&mut self,
expr: &Expression,
) -> Result<(String, Vec<String>), String> {
let mut code = Vec::new();
match expr {
Expression::Number { value } => {
let (reg, alloc_code) = self.allocator.alloc_temp()?;
code.extend(alloc_code);
// Load immediate value
code.push(format!("\tlli {}, {}", value & 0xFFFF, reg));
if *value > 0xFFFF || *value < 0 {
code.push(format!("\tlui {}, {}", (value >> 16) & 0xFFFF, reg));
}
Ok((reg, code))
}
Expression::Variable { name, .. } => {
let (reg, load_code) = self.allocator.load_var(name)?;
code.extend(load_code);
Ok((reg, code))
}
Expression::Binary { op, left, right } => {
// Evaluate left operand
let (left_reg, left_code) = self.generate_expression(left)?;
code.extend(left_code);
// Evaluate right operand
let (right_reg, right_code) = self.generate_expression(right)?;
code.extend(right_code);
// Allocate result register
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
code.extend(result_alloc);
// Generate operation
match op {
BinaryOperator::Add => {
code.push(format!(
"\tadd {}, {}, {}",
left_reg, right_reg, result_reg
));
}
BinaryOperator::Sub => {
code.push(format!(
"\tsub {}, {}, {}",
left_reg, right_reg, result_reg
));
}
BinaryOperator::Mul => {
self.include("maths", "./lib/maths/core.dsa");
// Call multiply function
code.push(format!("\tpush {}", right_reg));
code.push(format!("\tpush {}", left_reg));
code.push("\tcall maths::multiply".to_string());
code.push(format!("\tpop {}", result_reg));
code.push("\tpop zero".to_string());
}
// Comparison operators - return 1 (true) or 0 (false)
BinaryOperator::Eq => {
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
code.push(format!("\tlli 0, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjne {}", end_label)); // If not equal, skip setting to 1
code.push(format!("\tlli 1, {}", result_reg));
code.push(format!("{}:", end_label));
}
BinaryOperator::Ne => {
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
code.push(format!("\tlli 0, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjeq {}", end_label)); // If equal, skip setting to 1
code.push(format!("\tlli 1, {}", result_reg));
code.push(format!("{}:", end_label));
}
BinaryOperator::Lt => {
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
code.push(format!("\tlli 0, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjge {}", end_label)); // If greater or equal, skip setting to 1
code.push(format!("\tlli 1, {}", result_reg));
code.push(format!("{}:", end_label));
}
BinaryOperator::Le => {
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
code.push(format!("\tlli 0, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjgt {}", end_label)); // If greater than, skip setting to 1
code.push(format!("\tlli 1, {}", result_reg));
code.push(format!("{}:", end_label));
}
BinaryOperator::Gt => {
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
code.push(format!("\tlli 0, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjle {}", end_label)); // If less or equal, skip setting to 1
code.push(format!("\tlli 1, {}", result_reg));
code.push(format!("{}:", end_label));
}
BinaryOperator::Ge => {
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
code.push(format!("\tlli 0, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjlt {}", end_label)); // If less than, skip setting to 1
code.push(format!("\tlli 1, {}", result_reg));
code.push(format!("{}:", end_label));
}
_ => return Err(format!("Unsupported binary operator: {:?}", op)),
}
// Free operand registers (allocator will protect variables)
self.allocator.free_temp(&left_reg);
self.allocator.free_temp(&right_reg);
Ok((result_reg, code))
}
Expression::Call { name, args } => {
// Save caller-saved registers and track which ones we saved
let saved_regs = self.allocator.get_caller_saved_registers();
for reg in &saved_regs {
code.push(format!("\tpush {}", reg));
}
// Evaluate and push arguments in reverse order
let mut arg_regs = Vec::new();
for arg in args.iter().rev() {
let (arg_reg, arg_code) = self.generate_expression(arg)?;
code.extend(arg_code);
code.push(format!("\tpush {}", arg_reg));
arg_regs.push(arg_reg);
}
if GLOBAL_METHODS.contains_key(name.as_str()) {
code.push(format!("\tcall {}", GLOBAL_METHODS[name.as_str()]));
} else if self.symbols.contains(name) {
// Call local function
code.push(format!("\tcall {}", name));
} else {
return Err(format!("undefined function {name}"));
}
// Result is in rg0, allocate a register and move it
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
code.extend(result_alloc);
code.push(format!("\tpop {}", result_reg));
// Clean up arguments
if args.len() > 1 {
for _ in 0..(args.len() - 1) {
code.push("\tpop zero".to_string());
}
}
// Restore caller-saved registers in reverse order (LIFO)
for reg in saved_regs.iter().rev() {
code.push(format!("\tpop {}", reg));
}
// Free argument registers
for reg in arg_regs {
self.allocator.free_temp(&reg);
}
Ok((result_reg, code))
}
Expression::Unary { op, operand } => {
let (operand_reg, operand_code) = self.generate_expression(operand)?;
code.extend(operand_code);
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
code.extend(result_alloc);
match op {
UnaryOperator::Minus => {
// Negate: result = 0 - operand
code.push(format!("\tsub zero, {}, {}", operand_reg, result_reg));
}
UnaryOperator::Plus => {
// Just move
code.push(format!("\tmov {}, {}", operand_reg, result_reg));
}
}
self.allocator.free_temp(&operand_reg);
Ok((result_reg, code))
}
Expression::Empty => Ok(("zero".to_string(), code)),
}
}
// Helper for generating unique labels
fn get_unique_label(&mut self) -> String {
// You'd implement a counter here
static COUNTER: AtomicU32 = AtomicU32::new(0);
let val = COUNTER.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
(val + 1).to_string()
}
}
/// Build a single string from any number of arguments.
/// Each argument must implement `Display` or be convertible to a string.
#[macro_export]
macro_rules! dsa {
($($arg:expr),* $(,)?) => {{
// Start with an empty String well grow it as we go.
use std::fmt::Write;
let mut s = ::std::string::String::new();
$(
// `write!` is cheaper than `format!` for each element
// because it reuses the same buffer.
write!(s, "{}\n", $arg).expect("write to String failed");
)*
s
}};
}
// ──────────────────────── dsa! ────────────────────────
// A tiny helper that just turns its tokenstream into a string.
// The trailing comma is kept its part of the syntax you want.
#[macro_export]
macro_rules! cmd {
($($tokens:tt)*) => {{
// Well just stringify the tokens and return a String.
format!("{}", concat!(stringify!($tokens), "\n"))
}};
}
// ──────────────────────── block! ────────────────────────
// Usage:
//
// let asm = block![ "name"
// dsa![mov rg0, rg1],
// dsa![add rg1, rg1]
// ];
//
// `asm` is a `&'static str` containing:
//
// name:
// mov rg0, rg1
// add rg1, rg1
//
#[macro_export]
macro_rules! block {
// The first token must be a string literal thats the label.
($label:literal $(dsa![$($ins:tt)*]),* ) => {{
// Build a single string at compile time.
const CODE: &str = concat!(
$label, ":\n",
// Each `dsa!` call yields a string like `"mov rg0, rg1"`.
// We add a newline after each one to get the desired layout.
$(concat!("\t", stringify!($($ins)*), "\n")),*
);
CODE
}};
}
#[macro_export]
macro_rules! comment {
($text:expr) => {{ format!("// {}", $text) }};
}
-335
View File
@@ -1,335 +0,0 @@
// ============================================================================
// Token Types
// ============================================================================
#[derive(Debug, Clone, PartialEq)]
pub enum TokenType {
// Keywords
Int,
If,
Else,
While,
Return,
Include,
// Identifiers and literals
Identifier(String),
Number(i32),
String(String),
Char(char),
// Operators
Plus,
Minus,
Star,
Slash,
Assign,
Eq,
Ne,
Lt,
Gt,
Le,
Ge,
// Delimiters
LParen,
RParen,
LBrace,
RBrace,
Semicolon,
Comma,
Colon,
Namespace,
Eof,
}
pub enum Type {
Int32,
Int16,
Int8,
Uint32,
Uint16,
Uint8,
Char,
}
#[derive(Debug, Clone)]
pub struct Token {
pub token_type: TokenType,
pub line: usize,
pub col: usize,
}
impl Token {
pub fn new(token_type: TokenType, line: usize, col: usize) -> Self {
Self {
token_type,
line,
col,
}
}
}
// ============================================================================
// Lexer
// ============================================================================
pub struct Lexer {
source: Vec<char>,
pos: usize,
line: usize,
col: usize,
}
impl Lexer {
pub fn new(source: &str) -> Self {
Self {
source: source.chars().collect(),
pos: 0,
line: 1,
col: 1,
}
}
fn error(&self, msg: &str) -> String {
format!(
"Lexer error at line {}, col {}: {}",
self.line, self.col, msg
)
}
fn peek(&self, offset: usize) -> Option<char> {
self.source.get(self.pos + offset).copied()
}
fn advance(&mut self) -> Option<char> {
if self.pos >= self.source.len() {
return None;
}
let ch = self.source[self.pos];
self.pos += 1;
if ch == '\n' {
self.line += 1;
self.col = 1;
} else {
self.col += 1;
}
Some(ch)
}
fn skip_whitespace(&mut self) {
while let Some(ch) = self.peek(0) {
if ch.is_whitespace() {
self.advance();
} else {
break;
}
}
}
fn skip_comment(&mut self) {
if self.peek(0) == Some('/') && self.peek(1) == Some('/') {
while let Some(ch) = self.peek(0) {
if ch == '\n' {
break;
}
self.advance();
}
}
}
fn read_number(&mut self) -> i32 {
let mut num_str = String::new();
while let Some(ch) = self.peek(0) {
if ch.is_ascii_digit() {
num_str.push(ch);
self.advance();
} else {
break;
}
}
num_str.parse().unwrap_or(0)
}
fn read_identifier(&mut self) -> String {
let mut ident = String::new();
while let Some(ch) = self.peek(0) {
if ch.is_alphanumeric() || ch == '_' {
ident.push(ch);
self.advance();
} else {
break;
}
}
ident
}
fn read_string(&mut self) -> Result<String, String> {
let mut string = String::new();
self.advance(); // Consume the opening quote
while let Some(ch) = self.peek(0) {
if ch == '"' {
self.advance(); // Consume the closing quote
return Ok(string);
} else if ch == '\\' {
self.advance(); // Consume the backslash
if let Some(escaped_char) = self.peek(0) {
string.push(escaped_char);
self.advance();
}
} else {
string.push(ch);
self.advance();
}
}
Err(String::from("Unexpected EOF"))
}
fn read_char(&mut self) -> Result<char, String> {
self.advance(); // Consume the opening quote
if let Some(ch) = self.peek(0) {
self.advance();
if self.peek(0) == Some('\'') {
self.advance();
return Ok(ch);
} else {
Err(String::from("expected closing quote"))
}
} else {
Err(String::from("expected character"))
}
}
pub fn tokenize(&mut self) -> Result<Vec<Token>, String> {
let mut tokens = Vec::new();
loop {
self.skip_whitespace();
self.skip_comment();
if self.pos >= self.source.len() {
break;
}
let line = self.line;
let col = self.col;
let ch = self.peek(0).unwrap();
let token_type = if ch.is_ascii_digit() {
let num = self.read_number();
TokenType::Number(num)
} else if ch == '"' {
let string = self.read_string()?;
TokenType::String(string)
} else if ch == '\'' {
let char = self.read_char()?;
TokenType::Char(char)
} else if ch.is_alphabetic() || ch == '_' {
let ident = self.read_identifier();
match ident.as_str() {
"int" => TokenType::Int,
"if" => TokenType::If,
"else" => TokenType::Else,
"while" => TokenType::While,
"return" => TokenType::Return,
"include" => TokenType::Include,
_ => TokenType::Identifier(ident),
}
} else {
match ch {
':' if self.peek(1) == Some(':') => {
self.advance();
self.advance();
TokenType::Namespace
}
':' => {
self.advance();
TokenType::Colon
}
'=' if self.peek(1) == Some('=') => {
self.advance();
self.advance();
TokenType::Eq
}
'!' if self.peek(1) == Some('=') => {
self.advance();
self.advance();
TokenType::Ne
}
'<' if self.peek(1) == Some('=') => {
self.advance();
self.advance();
TokenType::Le
}
'>' if self.peek(1) == Some('=') => {
self.advance();
self.advance();
TokenType::Ge
}
'+' => {
self.advance();
TokenType::Plus
}
'-' => {
self.advance();
TokenType::Minus
}
'*' => {
self.advance();
TokenType::Star
}
'/' => {
self.advance();
TokenType::Slash
}
'=' => {
self.advance();
TokenType::Assign
}
'<' => {
self.advance();
TokenType::Lt
}
'>' => {
self.advance();
TokenType::Gt
}
'(' => {
self.advance();
TokenType::LParen
}
')' => {
self.advance();
TokenType::RParen
}
'{' => {
self.advance();
TokenType::LBrace
}
'}' => {
self.advance();
TokenType::RBrace
}
';' => {
self.advance();
TokenType::Semicolon
}
',' => {
self.advance();
TokenType::Comma
}
_ => return Err(self.error(&format!("Unexpected character: {}", ch))),
}
};
tokens.push(Token::new(token_type, line, col));
}
tokens.push(Token::new(TokenType::Eof, self.line, self.col));
Ok(tokens)
}
}
-74
View File
@@ -1,74 +0,0 @@
use std::fmt;
use crate::{codegen::CodeGenerator, lexer::Lexer, parser::Parser};
// mod assembly;
pub mod codegen;
pub mod lexer;
pub mod parser;
mod registers;
// ============================================================================
// Main & Tests
// ============================================================================
fn main() {
// read from input file: syntax "c_compiler <src.c> [output.dsa]"
let args: Vec<String> = std::env::args().collect();
if args.len() < 2 {
eprintln!("Usage: c_compiler <src.c> [output.dsa]");
return;
}
let input_file = &args[1];
let output_file = if args.len() > 2 {
&args[2]
} else {
"output.dsa"
};
// read input
let input = std::fs::read_to_string(input_file).expect("Failed to read input file");
// Lexing
let mut lexer = Lexer::new(&input);
let tokens = match lexer.tokenize() {
Ok(tokens) => tokens,
Err(e) => {
eprintln!("Lexing error: {}", e);
return;
}
};
println!("Tokens:");
for token in &tokens {
println!(" {:?}", token.token_type);
}
println!();
// Parsing
let mut parser = Parser::new(tokens);
let ast = match parser.parse() {
Ok(ast) => ast,
Err(e) => {
eprintln!("Parsing error: {}", e);
return;
}
};
println!("AST:");
println!("{:#?}", ast);
// Code Gen
let mut generator = CodeGenerator::new(ast);
let result = match generator.generate() {
Ok(code) => code,
Err(e) => {
eprintln!("Parsing error: {}", e);
return;
}
};
std::fs::write(output_file, &result).expect("Failed to write output");
println!("Result written to {}", output_file);
}
-610
View File
@@ -1,610 +0,0 @@
// ============================================================================
// AST Node Types
// ============================================================================
use std::fmt;
use crate::lexer::{Token, TokenType};
#[derive(Debug, Clone)]
pub struct Program {
pub declarations: Vec<Declaration>,
}
#[derive(Debug, Clone)]
pub enum Declaration {
Function {
name: String,
return_type: Type,
params: Vec<Parameter>,
body: Block,
},
Variable {
name: String,
init: Option<ConstExpr>,
},
Import {
name: String,
path: String,
},
}
#[derive(Debug, Clone)]
pub struct Parameter {
pub name: String,
pub param_type: Type,
}
#[derive(Debug, Clone)]
pub enum Type {
Int,
Long,
Float,
Double,
Char,
Void,
Ptr(Box<Type>),
Array(Box<Type>, usize),
Struct(String),
}
pub type Block = Vec<Statement>;
#[derive(Debug, Clone)]
pub enum Statement {
Block(Block),
Assign {
// left side
name: String,
declare_type: Option<Type>,
// right side
value: Option<Box<Expression>>,
},
Expression {
expr: Expression,
},
If {
condition: Expression,
then_stmt: Block,
else_stmt: Block,
},
While {
condition: Expression,
body: Vec<Statement>,
},
Return {
expr: Option<Expression>,
},
}
#[derive(Debug, Clone)]
pub enum ConstExpr {
Number(i32),
String(String),
}
impl fmt::Display for ConstExpr {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
ConstExpr::Number(n) => write!(f, "{}", n),
ConstExpr::String(s) => write!(f, "\"{}\"", s),
}
}
}
#[derive(Debug, Clone)]
pub enum Expression {
Empty,
Binary {
op: BinaryOperator,
left: Box<Expression>,
right: Box<Expression>,
},
Unary {
op: UnaryOperator,
operand: Box<Expression>,
},
Variable {
name: String,
expr_type: Option<Type>,
},
Number {
value: i32,
},
Call {
name: String,
args: Vec<Expression>,
},
}
#[derive(Debug, Clone, PartialEq)]
pub enum BinaryOperator {
Add,
Sub,
Mul,
Div,
Eq,
Ne,
Lt,
Gt,
Le,
Ge,
}
impl fmt::Display for BinaryOperator {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
BinaryOperator::Add => write!(f, "+"),
BinaryOperator::Sub => write!(f, "-"),
BinaryOperator::Mul => write!(f, "*"),
BinaryOperator::Div => write!(f, "/"),
BinaryOperator::Eq => write!(f, "=="),
BinaryOperator::Ne => write!(f, "!="),
BinaryOperator::Lt => write!(f, "<"),
BinaryOperator::Gt => write!(f, ">"),
BinaryOperator::Le => write!(f, "<="),
BinaryOperator::Ge => write!(f, ">="),
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum UnaryOperator {
Plus,
Minus,
}
impl fmt::Display for UnaryOperator {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
UnaryOperator::Plus => write!(f, "+"),
UnaryOperator::Minus => write!(f, "-"),
}
}
}
// ============================================================================
// Parser
// ============================================================================
pub struct Parser {
tokens: Vec<Token>,
pos: usize,
}
impl Parser {
pub fn new(tokens: Vec<Token>) -> Self {
Self { tokens, pos: 0 }
}
fn error(&self, msg: &str) -> String {
let token = self.current();
format!(
"Parser error at line {}, col {}: {}",
token.line, token.col, msg
)
}
fn current(&self) -> &Token {
self.tokens
.get(self.pos)
.unwrap_or_else(|| self.tokens.last().unwrap())
}
fn peek(&self, offset: usize) -> &Token {
self.tokens
.get(self.pos + offset)
.unwrap_or_else(|| self.tokens.last().unwrap())
}
fn advance(&mut self) -> &Token {
if self.pos < self.tokens.len() - 1 {
self.pos += 1;
}
self.current()
}
fn expect(&mut self, expected: TokenType) -> Result<Token, String> {
let token = self.current().clone();
if std::mem::discriminant(&token.token_type) != std::mem::discriminant(&expected)
{
return Err(self.error(&format!(
"Expected {:?}, got {:?}",
expected, token.token_type
)));
}
self.advance();
Ok(token)
}
pub fn parse(&mut self) -> Result<Program, String> {
let mut declarations = Vec::new();
while !matches!(self.current().token_type, TokenType::Eof) {
declarations.push(self.parse_declaration()?);
}
Ok(Program { declarations })
}
fn parse_declaration(&mut self) -> Result<Declaration, String> {
// check for an import
if let TokenType::Include = self.current().token_type {
self.advance();
let name =
if let TokenType::Identifier(id) = self.current().clone().token_type {
Some(id)
} else {
None
}
.ok_or(String::from("Expected identifier"))?;
self.advance();
self.expect(TokenType::Colon)?;
let path = if let TokenType::String(id) = self.current().clone().token_type {
Some(id)
} else {
None
}
.ok_or(String::from("Expected string literal"))?;
self.advance();
return Ok(Declaration::Import { name, path });
}
self.expect(TokenType::Int)?;
let name = match &self.current().token_type {
TokenType::Identifier(s) => s.clone(),
_ => return Err(self.error("Expected identifier")),
};
self.advance();
match &self.current().token_type {
TokenType::LParen => {
// Function declaration
self.advance();
let mut params = Vec::<Parameter>::new();
if !matches!(self.current().token_type, TokenType::RParen) {
self.expect(TokenType::Int)?;
match &self.current().token_type {
TokenType::Identifier(s) => {
params.push(Parameter {
name: s.clone(),
param_type: Type::Int,
});
self.advance();
}
_ => return Err(self.error("Expected parameter name")),
}
while matches!(self.current().token_type, TokenType::Comma) {
self.advance();
self.expect(TokenType::Int)?;
match &self.current().token_type {
TokenType::Identifier(s) => {
params.push(Parameter {
name: s.clone(),
param_type: Type::Int,
});
self.advance();
}
_ => return Err(self.error("Expected parameter name")),
}
}
}
self.expect(TokenType::RParen)?;
let body = self.parse_block()?;
Ok(Declaration::Function {
name,
params,
body,
return_type: Type::Int,
})
}
_ => {
// Variable declaration
let init = if matches!(self.current().token_type, TokenType::Assign) {
self.advance();
if let TokenType::Number(n) = self.current().token_type {
self.advance();
Some(ConstExpr::Number(n))
} else {
return Err(self
.error("Expected constant in global variable declaration"));
}
} else {
None
};
self.expect(TokenType::Semicolon)?;
Ok(Declaration::Variable { name, init })
}
}
}
fn parse_block(&mut self) -> Result<Block, String> {
self.expect(TokenType::LBrace)?;
let mut statements = Vec::new();
while !matches!(self.current().token_type, TokenType::RBrace) {
statements.push(self.parse_statement()?);
}
self.expect(TokenType::RBrace)?;
Ok(statements)
}
fn parse_statement(&mut self) -> Result<Statement, String> {
match &self.current().token_type {
TokenType::LBrace => Ok(Statement::Block(self.parse_block()?)),
TokenType::If => self.parse_if_stmt(),
TokenType::While => self.parse_while_stmt(),
TokenType::Return => self.parse_return_stmt(),
TokenType::Identifier(name) => {
let name = name.clone();
// peek ahead for open paren (func call expr)
if matches!(self.peek(1).token_type, TokenType::LParen) {
let expr = self.parse_expression()?; // a function call expr
self.expect(TokenType::Semicolon)?;
return Ok(Statement::Expression { expr });
}
self.advance(); // advance past identifier
// assignment expression
if matches!(self.current().token_type, TokenType::Assign) {
self.advance();
let expr = self.parse_expression()?;
self.expect(TokenType::Semicolon)?;
Ok(Statement::Assign {
name,
value: Some(Box::new(expr)),
declare_type: None,
})
}
// var expression
else {
self.expect(TokenType::Semicolon)?;
Ok(Statement::Expression {
expr: Expression::Variable {
name,
expr_type: None,
},
})
}
}
TokenType::Int => {
// Local variable declaration
self.advance();
let name = match &self.current().token_type {
TokenType::Identifier(s) => s.clone(),
_ => return Err(self.error("Expected variable name")),
};
self.advance();
let init = if matches!(self.current().token_type, TokenType::Assign) {
self.advance();
Some(self.parse_expression()?)
} else {
None
};
self.expect(TokenType::Semicolon)?;
// Convert to assignment expression statement
let expr = if let Some(init_expr) = init {
Statement::Assign {
name,
value: Some(Box::new(init_expr)),
declare_type: Some(Type::Int),
}
} else {
Statement::Assign {
name,
value: None,
declare_type: Some(Type::Int),
}
};
Ok(expr)
}
_ => {
let expr = if matches!(self.current().token_type, TokenType::Semicolon) {
Expression::Empty
} else {
self.parse_expression()?
};
self.expect(TokenType::Semicolon)?;
Ok(Statement::Expression { expr })
}
}
}
fn parse_if_stmt(&mut self) -> Result<Statement, String> {
self.expect(TokenType::If)?;
self.expect(TokenType::LParen)?;
let condition = self.parse_expression()?;
self.expect(TokenType::RParen)?;
let then_stmt = self.parse_block()?;
let else_stmt = if matches!(self.current().token_type, TokenType::Else) {
self.advance();
self.parse_block()?
} else {
Vec::new()
};
Ok(Statement::If {
condition,
then_stmt,
else_stmt,
})
}
fn parse_while_stmt(&mut self) -> Result<Statement, String> {
self.expect(TokenType::While)?;
self.expect(TokenType::LParen)?;
let condition = self.parse_expression()?;
self.expect(TokenType::RParen)?;
let body = self.parse_block()?;
Ok(Statement::While { condition, body })
}
fn parse_return_stmt(&mut self) -> Result<Statement, String> {
self.expect(TokenType::Return)?;
let expr = if matches!(self.current().token_type, TokenType::Semicolon) {
None
} else {
Some(self.parse_expression()?)
};
self.expect(TokenType::Semicolon)?;
Ok(Statement::Return { expr })
}
fn parse_expression(&mut self) -> Result<Expression, String> {
self.parse_comparison()
}
fn parse_comparison(&mut self) -> Result<Expression, String> {
let mut expr = self.parse_additive()?;
while let Some(op) = match &self.current().token_type {
TokenType::Eq => Some(BinaryOperator::Eq),
TokenType::Ne => Some(BinaryOperator::Ne),
TokenType::Lt => Some(BinaryOperator::Lt),
TokenType::Gt => Some(BinaryOperator::Gt),
TokenType::Le => Some(BinaryOperator::Le),
TokenType::Ge => Some(BinaryOperator::Ge),
_ => None,
} {
self.advance();
let right = Box::new(self.parse_additive()?);
expr = Expression::Binary {
op,
left: Box::new(expr),
right,
};
}
Ok(expr)
}
fn parse_additive(&mut self) -> Result<Expression, String> {
let mut expr = self.parse_multiplicative()?;
while let Some(op) = match &self.current().token_type {
TokenType::Plus => Some(BinaryOperator::Add),
TokenType::Minus => Some(BinaryOperator::Sub),
_ => None,
} {
self.advance();
let right = Box::new(self.parse_multiplicative()?);
expr = Expression::Binary {
op,
left: Box::new(expr),
right,
};
}
Ok(expr)
}
fn parse_multiplicative(&mut self) -> Result<Expression, String> {
let mut expr = self.parse_unary()?;
while let Some(op) = match &self.current().token_type {
TokenType::Star => Some(BinaryOperator::Mul),
TokenType::Slash => Some(BinaryOperator::Div),
_ => None,
} {
self.advance();
let right = Box::new(self.parse_unary()?);
expr = Expression::Binary {
op,
left: Box::new(expr),
right,
};
}
Ok(expr)
}
fn parse_unary(&mut self) -> Result<Expression, String> {
let op = match &self.current().token_type {
TokenType::Plus => Some(UnaryOperator::Plus),
TokenType::Minus => Some(UnaryOperator::Minus),
_ => None,
};
if let Some(op) = op {
self.advance();
let operand = Box::new(self.parse_unary()?);
return Ok(Expression::Unary { op, operand });
}
self.parse_primary()
}
fn parse_primary(&mut self) -> Result<Expression, String> {
match &self.current().token_type.clone() {
TokenType::Number(n) => {
let value = *n;
self.advance();
Ok(Expression::Number { value })
}
TokenType::Identifier(name) => {
let name = name.clone();
self.advance();
if matches!(self.current().token_type, TokenType::LParen) {
// Function call
self.advance();
let mut args = Vec::new();
if !matches!(self.current().token_type, TokenType::RParen) {
args.push(self.parse_expression()?);
while matches!(self.current().token_type, TokenType::Comma) {
self.advance();
args.push(self.parse_expression()?);
}
}
self.expect(TokenType::RParen)?;
Ok(Expression::Call { name, args })
} else {
Ok(Expression::Variable {
name,
expr_type: None,
})
}
}
TokenType::LParen => {
self.advance();
let expr = self.parse_expression()?;
self.expect(TokenType::RParen)?;
Ok(expr)
}
_ => Err(self.error(&format!(
"Unexpected token: {:?}",
self.current().token_type
))),
}
}
}
-344
View File
@@ -1,344 +0,0 @@
use std::collections::HashMap;
/// Register allocator for DSA assembly generation
/// Manages general-purpose registers (rg0-rgf) and handles stack spilling
pub struct RegisterAllocator {
/// Available general-purpose registers
available_registers: Vec<String>,
/// Maps variable names to their current location (register or stack offset)
variable_locations: HashMap<String, Location>,
/// Maps registers to the variables they currently hold
register_contents: HashMap<String, String>,
/// Current stack offset for local variables (relative to bpr)
/// Starts at -4 (going downward from base pointer)
stack_offset: i32,
/// Track which registers are currently in use
in_use: HashMap<String, bool>,
}
#[derive(Debug, Clone)]
pub enum Location {
Register(String),
Stack(i32), // offset from bpr
}
impl RegisterAllocator {
pub fn new() -> Self {
// Initialize with available GP registers (rg0-rgf = 16 registers)
let registers = vec![
"rg0", "rg1", "rg2", "rg3", "rg4", "rg5", "rg6", "rg7", "rg8", "rg9", "rga",
"rgb", "rgc", "rgd", "rge", "rgf",
]
.into_iter()
.map(String::from)
.collect();
RegisterAllocator {
available_registers: registers,
variable_locations: HashMap::new(),
register_contents: HashMap::new(),
stack_offset: -4, // Start at -4 (first local below saved bpr)
in_use: HashMap::new(),
}
}
/// Allocate a temporary register for expression evaluation
/// Returns the register name and optionally assembly code to save it
pub fn alloc_temp(&mut self) -> Result<(String, Vec<String>), String> {
let mut code = Vec::new();
// Try to find an unused register
for reg in &self.available_registers {
if !self.in_use.get(reg).unwrap_or(&false) {
self.in_use.insert(reg.clone(), true);
return Ok((reg.clone(), code));
}
}
// All registers in use - need to spill one
// Choose the first register with a variable we can spill
// Find a register to spill
let reg_to_spill = self
.available_registers
.iter()
.find(|reg| self.register_contents.contains_key(*reg))
.cloned();
if let Some(reg) = reg_to_spill {
// Spill this variable to stack
let spill_code = self.spill_register(&reg)?;
code.extend(spill_code);
self.in_use.insert(reg.clone(), true);
return Ok((reg, code));
}
Err("No registers available and nothing to spill".to_string())
}
/// Free a temporary register after use
/// NOTE: This will NOT free registers that contain variables!
/// Variables persist throughout their scope and must not be freed
pub fn free_temp(&mut self, reg: &str) {
// Check if this register contains a variable
if self.register_contents.contains_key(reg) {
// This register holds a variable - don't free it!
// Variables are only freed when they go out of scope via free_var()
return;
}
// This is a true temporary - safe to free
self.in_use.insert(reg.to_string(), false);
}
/// Allocate a register for a named variable
/// Returns the register and any necessary assembly code
pub fn alloc_var(&mut self, var_name: &str) -> Result<(String, Vec<String>), String> {
// Check if variable already has a location
if let Some(location) = self.variable_locations.get(var_name).cloned() {
match location {
Location::Register(reg) => {
return Ok((reg.clone(), Vec::new()));
}
Location::Stack(offset) => {
// Variable is on stack, load it into a register
let (reg, mut code) = self.alloc_temp()?;
code.push(format!("\tldw bpr, {}, {}", reg, offset));
// Update location to register
self.variable_locations
.insert(var_name.to_string(), Location::Register(reg.clone()));
self.register_contents
.insert(reg.clone(), var_name.to_string());
return Ok((reg, code));
}
}
}
// Variable doesn't have a location yet, allocate a new register
let (reg, code) = self.alloc_temp()?;
self.variable_locations
.insert(var_name.to_string(), Location::Register(reg.clone()));
self.register_contents
.insert(reg.clone(), var_name.to_string());
Ok((reg, code))
}
/// Get the current location of a variable
pub fn get_var_location(&self, var_name: &str) -> Option<&Location> {
self.variable_locations.get(var_name)
}
/// Load a variable into a register (allocating if necessary)
/// Returns the register and assembly code to load it
pub fn load_var(&mut self, var_name: &str) -> Result<(String, Vec<String>), String> {
self.alloc_var(var_name)
}
/// Store a value from a register into a variable
/// Updates tracking and returns any necessary assembly code
pub fn store_var(&mut self, var_name: &str, source_reg: &str) -> Vec<String> {
let mut code = Vec::new();
// Check if variable already has a location
if let Some(location) = self.variable_locations.get(var_name) {
match location {
Location::Register(dest_reg) => {
if dest_reg != source_reg {
code.push(format!("\tmov {}, {}", source_reg, dest_reg));
}
}
Location::Stack(offset) => {
code.push(format!("\tstw {}, bpr, {}", source_reg, offset));
}
}
} else {
// Variable doesn't exist yet - try to allocate a register
if let Some(free_reg) = self.find_free_register() {
if &free_reg != source_reg {
code.push(format!("\tmov {}, {}", source_reg, free_reg));
}
self.variable_locations
.insert(var_name.to_string(), Location::Register(free_reg.clone()));
self.register_contents
.insert(free_reg.clone(), var_name.to_string());
self.in_use.insert(free_reg, true);
} else {
// No free registers - allocate on stack
code.push(format!("\tstw {}, bpr, {}", source_reg, self.stack_offset));
self.variable_locations
.insert(var_name.to_string(), Location::Stack(self.stack_offset));
self.stack_offset -= 4; // Move to next stack slot
}
}
code
}
/// Spill a register to the stack
/// Returns assembly code to perform the spill
fn spill_register(&mut self, reg: &str) -> Result<Vec<String>, String> {
let mut code = Vec::new();
if let Some(var_name) = self.register_contents.get(reg).cloned() {
// Store register content to stack
code.push(format!("\tstw {}, bpr, {}", reg, self.stack_offset));
// Update variable location
self.variable_locations
.insert(var_name.clone(), Location::Stack(self.stack_offset));
// Remove from register tracking
self.register_contents.remove(reg);
// Move to next stack slot
self.stack_offset -= 4;
}
Ok(code)
}
/// Find a free register (not currently in use)
fn find_free_register(&self) -> Option<String> {
for reg in &self.available_registers {
if !self.in_use.get(reg).unwrap_or(&false) {
return Some(reg.clone());
}
}
None
}
/// Spill all registers to stack (useful before function calls)
pub fn spill_all(&mut self) -> Vec<String> {
let mut code = Vec::new();
let regs_to_spill: Vec<String> = self.register_contents.keys().cloned().collect();
for reg in regs_to_spill {
if let Ok(spill_code) = self.spill_register(&reg) {
code.extend(spill_code);
}
}
code
}
/// Get the total stack space needed for local variables
pub fn get_stack_size(&self) -> i32 {
-self.stack_offset // Convert negative offset to positive size
}
/// Reset allocator for a new function
pub fn reset(&mut self) {
self.variable_locations.clear();
self.register_contents.clear();
self.stack_offset = -4;
self.in_use.clear();
}
/// Mark a variable as dead (no longer needed)
/// Frees its register if it's in one
pub fn free_var(&mut self, var_name: &str) {
if let Some(Location::Register(reg)) = self.variable_locations.get(var_name) {
let reg = reg.clone();
self.register_contents.remove(&reg);
self.in_use.insert(reg, false);
}
self.variable_locations.remove(var_name);
}
/// Get list of registers that contain variables and are in use
/// These need to be saved before function calls
pub fn get_caller_saved_registers(&self) -> Vec<String> {
self.register_contents
.iter()
.filter(|(reg, _)| *self.in_use.get(*reg).unwrap_or(&false))
.map(|(reg, _)| reg.clone())
.collect()
}
/// Save caller-saved registers before a function call
/// Returns assembly code to save them
pub fn save_caller_saved(&mut self) -> Vec<String> {
let mut code = Vec::new();
// For simplicity, save all currently used registers
// In a more sophisticated compiler, you'd only save registers that are live
for (reg, var_name) in self.register_contents.clone() {
if *self.in_use.get(&reg).unwrap_or(&false) {
code.push(format!("\tpush {}", reg));
}
}
code
}
/// Restore caller-saved registers after a function call
/// Returns assembly code to restore them
pub fn restore_caller_saved(&mut self, saved_regs: &[String]) -> Vec<String> {
let mut code = Vec::new();
// Restore in reverse order (LIFO)
for reg in saved_regs.iter().rev() {
code.push(format!("\tpop {}", reg));
}
code
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_basic_allocation() {
let mut allocator = RegisterAllocator::new();
let (reg1, code1) = allocator.alloc_temp().unwrap();
assert_eq!(code1.len(), 0); // No spill needed
assert_eq!(reg1, "rg0");
let (reg2, code2) = allocator.alloc_temp().unwrap();
assert_eq!(code2.len(), 0);
assert_eq!(reg2, "rg1");
allocator.free_temp(&reg1);
let (reg3, code3) = allocator.alloc_temp().unwrap();
assert_eq!(code3.len(), 0);
assert_eq!(reg3, "rg0"); // Reuses freed register
}
#[test]
fn test_variable_allocation() {
let mut allocator = RegisterAllocator::new();
let (reg, _) = allocator.alloc_var("x").unwrap();
assert_eq!(reg, "rg0");
// Requesting same variable again should return same register
let (reg2, _) = allocator.alloc_var("x").unwrap();
assert_eq!(reg2, "rg0");
}
#[test]
fn test_stack_allocation() {
let mut allocator = RegisterAllocator::new();
// Allocate all 16 registers
for i in 0..16 {
allocator.alloc_var(&format!("var{}", i)).unwrap();
}
// Next allocation should spill to stack
let (reg, code) = allocator.alloc_var("var16").unwrap();
assert!(code.len() > 0); // Should have spill code
}
}
-756
View File
@@ -1,756 +0,0 @@
use std::collections::HashMap;
use std::hash::Hash;
use std::sync::LazyLock;
use std::sync::atomic::AtomicU32;
use std::time::SystemTime;
use chrono::{DateTime, Local};
use crate::registers::{Location, RegisterAllocator};
use crate::{block, cmd, comment, dsa};
use crate::parser::{
BinaryOperator, CompilerError, ConstExpr, Declaration, Dependency, Expression,
Program, Statement, UnaryOperator, Variable,
};
pub struct CodeGenerator {
ast: Program,
imports: HashMap<String, String>,
globals: Vec<String>,
functions: Vec<String>,
symbols: Vec<String>,
allocator: RegisterAllocator,
}
static GLOBAL_METHODS: LazyLock<HashMap<&str, &str>> = LazyLock::new(|| {
HashMap::from([
// ("print", "print::print"),
// ("println", "print::println"),
// ("printnum", "print::print_num"),
// ("print_space", "print::print_whitespace"),
// ("print_newline", "print::print_newline"),
// ("print_char", "print::print_byte"),
// ("print_word", "print::print_word"),
// ("print_hex", "print::print_hex_word"),
])
});
fn import(name: &str, path: &str) -> String {
format!("include {name}: \"{}\"", path)
}
impl CodeGenerator {
const RET: &'static str = "\tjmp _ret";
pub fn new(ast: Program) -> Self {
CodeGenerator {
ast,
imports: HashMap::new(),
globals: Vec::new(),
functions: Vec::new(),
symbols: Vec::new(),
allocator: RegisterAllocator::new(),
}
}
pub fn include(&mut self, name: &str, path: &str) {
self.imports.insert(name.to_string(), path.to_string());
}
fn is_global(&self, name: &str) -> bool {
// Check if this variable is in the globals list
self.globals
.iter()
.any(|g| g.contains(&format!("dw {}:", name)))
}
pub fn generate(&mut self) -> Result<String, CompilerError> {
// always include the print library for debugging!
self.include("print", "./lib/io/print.dsa");
for block in self.ast.clone().declarations {
match block {
Declaration::Variable {
var: Variable { name, .. },
..
} => self.symbols.push(name),
Declaration::Function { name, .. } => self.symbols.push(name),
Declaration::Dependency(Dependency { name, .. }) => {
self.symbols.push(name)
}
}
}
for block in self.ast.clone().declarations {
self.generate_block(block.clone())?;
}
self.generate_layout()
}
fn generate_layout(&mut self) -> Result<String, CompilerError> {
let datetime: DateTime<Local> = SystemTime::now().into();
Ok(dsa![
"",
comment!("GENERATED BY DSC COMPILER"),
comment!(format!(
"Generated at {}",
datetime.format("%Y-%m-%d %H:%M:%S")
)),
"",
// imports
comment!("Imports"),
self.imports
.iter()
.map(|(k, v)| import(k, v))
.collect::<Vec<String>>()
.join("\n"),
"",
// reserved memory
comment!("Globals & Reserved Memory"),
self.globals.join("\n"),
"",
// entry point
comment!("Entry Point"),
"dw stack: 0x10000",
"db message: \"Process Exited with code:\"",
block! [ "_init"
dsa![ldw stack, bpr],
dsa![mov bpr, spr],
dsa![push zero],
dsa![call main],
dsa![call print::print_newline],
dsa![lwi message, rg0],
dsa![push rg0],
dsa![call print::print],
dsa![pop zero],
dsa![call print::print_hex_word],
dsa![pop zero],
dsa![hlt]
],
"",
comment!("Return"),
block! [ "_ret"
dsa![mov bpr, spr],
dsa![pop bpr],
dsa![return]
],
comment!("Compiled Code Starts..."),
// block! [ "main"
// dsa![push bpr],
// dsa![mov spr, bpr],
// dsa![lwi 67, rg1],
// dsa![stw rg1, spr, 8],
// dsa![mov bpr, spr],
// dsa![pop bpr],
// dsa![return]
// ],
self.functions.join("\n"),
])
}
fn generate_global(&mut self, name: &str, init: Option<ConstExpr>) {
self.globals.push(format!(
"dw {}: {}",
name,
init.unwrap_or(ConstExpr::Number(0))
))
}
fn generate_block(&mut self, block: Declaration) -> Result<(), CompilerError> {
match block {
Declaration::Variable { var, init, .. } => {
self.generate_global(&var.name, init)
}
Declaration::Function {
name,
return_type,
params,
body,
} => {
let func = self.generate_function(&name, &params, &body).join("\n");
self.functions.push(format!("{func}\n"));
}
Declaration::Dependency(Dependency { name, path }) => {
self.imports.insert(name, path);
}
};
Ok(())
}
// Example: Generate code for a function
fn generate_function(
&mut self,
name: &str,
params: &[Variable],
body: &[Statement],
) -> Vec<String> {
let mut code = Vec::new();
// Reset allocator for new function
self.allocator.reset();
// Function prologue
code.push(format!("{}:", name));
code.push("\tpush bpr".to_string());
code.push("\tmov spr, bpr".to_string());
code.push(String::new());
// Allocate parameters to registers or stack locations
for (i, param) in params.iter().enumerate() {
let offset = 8 + (i as i32 * 4); // Parameters start at bpr+8
// Track that this parameter is at a stack location
let (reg, load_code) = self.allocator.alloc_var(&param.name).unwrap();
code.extend(load_code);
code.push(format!("\tldw bpr, {}, {}", reg, offset));
}
// Generate code for function body
for stmt in body {
let stmt_code = self.generate_statement(stmt).unwrap();
code.extend(stmt_code);
}
// automatically return at function end
if let Some(x) = code.last()
&& x == Self::RET
{
} else {
code.push(Self::RET.to_string());
}
code
}
// Example: Generate code for a statement
fn generate_statement(
&mut self,
stmt: &Statement,
) -> Result<Vec<String>, CompilerError> {
let mut code = Vec::new();
match stmt {
Statement::Declaration { var, value } => {
if let Some(expr) = value {
// Evaluate expression
let (result_reg, expr_code) = self.generate_expression(expr, true)?;
code.extend(expr_code);
// Store result in variable
let store_code = self.allocator.store_var(&var.name, &result_reg);
code.extend(store_code);
// Free temporary register
self.allocator.free_temp(&result_reg);
} else {
// Just declaring variable without initialization
self.allocator.alloc_var(&var.name)?;
}
}
Statement::Break => unimplemented!(),
Statement::Continue => unimplemented!(),
Statement::PtrWrite { ptr, value } => {
let (result_reg, expr_code) = self.generate_expression(value, true)?;
code.extend(expr_code);
let (ptr_reg, ptr_code) = self.generate_expression(ptr, true)?;
code.extend(ptr_code);
code.push(format!("\tstw {}, {}", result_reg, ptr_reg));
self.allocator.free_temp(&result_reg);
self.allocator.free_temp(&ptr_reg);
}
Statement::Assign { varname, value } => {
// Evaluate expression
let (result_reg, expr_code) = self.generate_expression(value, true)?;
code.extend(expr_code);
// Check if this is a global variable
if self.is_global(varname) {
// Store to global label
code.push(format!("\tstw {}, {}", result_reg, varname));
} else {
// Store result in local variable
let store_code = self.allocator.store_var(varname, &result_reg);
code.extend(store_code);
}
// Free temporary register
self.allocator.free_temp(&result_reg);
}
Statement::Return(expr) => {
if let Some(e) = expr {
let (result_reg, expr_code) = self.generate_expression(e, true)?;
code.extend(expr_code);
code.push(format!("\tstw {}, bpr, 8", result_reg));
code.push(format!("\tjmp _ret"));
self.allocator.free_temp(&result_reg);
}
}
Statement::If {
condition,
then_stmt,
else_stmt,
} => {
// Generate condition
let (cond_reg, cond_code) = self.generate_expression(condition, true)?;
code.extend(cond_code);
// Compare with zero
code.push(format!("\tcmp {}, zero", cond_reg));
self.allocator.free_temp(&cond_reg);
// Generate unique labels
let then_label = format!("_then_{}", self.get_unique_label());
let else_label = format!("_else_{}", self.get_unique_label());
let end_label = format!("_end_{}", self.get_unique_label());
// Jump to else if condition is false (equal to zero)
code.push(format!("\tjeq {}", else_label));
// Then block
code.push(format!("{}:", then_label));
for s in then_stmt {
code.extend(self.generate_statement(s)?);
}
if then_stmt.len() == 0 {
code.push("\tnop".to_string());
}
code.push(format!("\tjmp {}", end_label));
// Else block
code.push(format!("{}:", else_label));
for s in else_stmt {
code.extend(self.generate_statement(s)?);
}
if else_stmt.len() == 0 {
code.push("\tnop".to_string());
}
code.push(format!("{}:", end_label));
}
Statement::While { condition, body } => {
let loop_start = format!("_while_start_{}", self.get_unique_label());
let loop_end = format!("_while_end_{}", self.get_unique_label());
code.push(format!("{}:", loop_start));
// Generate condition
let (cond_reg, cond_code) = self.generate_expression(condition, true)?;
code.extend(cond_code);
code.push(format!("\tcmp {}, zero", cond_reg));
self.allocator.free_temp(&cond_reg);
code.push(format!("\tjeq {}", loop_end));
// Loop body
for s in body {
code.extend(self.generate_statement(s)?);
}
code.push(format!("\tjmp {}", loop_start));
code.push(format!("{}:", loop_end));
}
Statement::Loop(body) => {
let loop_start = format!("_loop_start_{}", self.get_unique_label());
code.push(format!("{}:", loop_start));
for s in body {
code.extend(self.generate_statement(s)?);
}
code.push(format!("\tjmp {}", loop_start));
}
Statement::Expression { expr } => {
let (result_reg, expr_code) = self.generate_expression(expr, false)?;
code.extend(expr_code);
self.allocator.free_temp(&result_reg);
}
Statement::Block(statements) => {
for s in statements {
code.extend(self.generate_statement(s)?);
}
}
}
Ok(code)
}
// Example: Generate code for an expression
// Returns (register containing result, assembly code)
fn generate_expression(
&mut self,
expr: &Expression,
use_result: bool,
) -> Result<(String, Vec<String>), CompilerError> {
let mut code = Vec::new();
// optimisation to prevent generating dead code!
if expr.is_pure() && !use_result {
return Ok((String::new(), code));
}
match expr {
Expression::StringLiteral(value) => {
let (reg, alloc_code) = self.allocator.alloc_temp()?;
code.extend(alloc_code);
// write string into memory
let uuid = self.get_unique_label();
code.push(format!("\tdb str_{uuid}: \"{value}\""));
// Load pointer to string
code.push(format!("\tlwi str_{uuid}, {reg}"));
Ok((reg, code))
}
Expression::CharLiteral(value) => {
let (reg, alloc_code) = self.allocator.alloc_temp()?;
code.extend(alloc_code);
// Load immediate value
code.push(format!("\tlli {}, {} // '{value}'", *value as u8, reg));
Ok((reg, code))
}
Expression::Number(value) => {
let (reg, alloc_code) = self.allocator.alloc_temp()?;
code.extend(alloc_code);
// Load immediate value
code.push(format!("\tlli {}, {}", value & 0xFFFF, reg));
if *value > 0xFFFF || *value < 0 {
code.push(format!("\tlui {}, {}", (value >> 16) & 0xFFFF, reg));
}
Ok((reg, code))
}
Expression::Variable { name, .. } => {
if self.is_global(&name.name) {
// Allocate a temporary register for the global
let (reg, alloc_code) = self.allocator.alloc_temp()?;
code.extend(alloc_code);
// Load from global label
code.push(format!("\tldw {}, {}", name.name, reg));
Ok((reg, code))
} else {
// Local variable - use existing allocator logic
let (reg, load_code) = self.allocator.load_var(&name.name)?;
code.extend(load_code);
Ok((reg, code))
}
}
Expression::Binary { op, left, right } => {
// Evaluate left operand
let (left_reg, left_code) = self.generate_expression(left, true)?;
code.extend(left_code);
// Evaluate right operand
let (right_reg, right_code) = self.generate_expression(right, true)?;
code.extend(right_code);
// Allocate result register
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
code.extend(result_alloc);
// Generate operation
match op {
BinaryOperator::Add => {
code.push(format!(
"\tadd {}, {}, {}",
left_reg, right_reg, result_reg
));
}
BinaryOperator::Sub => {
code.push(format!(
"\tsub {}, {}, {}",
left_reg, right_reg, result_reg
));
}
BinaryOperator::Mul => {
self.include("maths", "./lib/maths/core.dsa");
// Call multiply function
code.push(format!("\tpush {}", right_reg));
code.push(format!("\tpush {}", left_reg));
code.push("\tcall maths::multiply".to_string());
code.push(format!("\tpop {}", result_reg));
code.push("\tpop zero".to_string());
}
// Comparison operators - return 1 (true) or 0 (false)
BinaryOperator::Eq => {
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
code.push(format!("\tlli 0, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjne {}", end_label)); // If not equal, skip setting to 1
code.push(format!("\tlli 1, {}", result_reg));
code.push(format!("{}:", end_label));
}
BinaryOperator::Ne => {
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
code.push(format!("\tlli 0, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjeq {}", end_label)); // If equal, skip setting to 1
code.push(format!("\tlli 1, {}", result_reg));
code.push(format!("{}:", end_label));
}
BinaryOperator::Lt => {
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
code.push(format!("\tlli 0, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjge {}", end_label)); // If greater or equal, skip setting to 1
code.push(format!("\tlli 1, {}", result_reg));
code.push(format!("{}:", end_label));
}
BinaryOperator::Le => {
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
code.push(format!("\tlli 0, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjgt {}", end_label)); // If greater than, skip setting to 1
code.push(format!("\tlli 1, {}", result_reg));
code.push(format!("{}:", end_label));
}
BinaryOperator::Gt => {
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
code.push(format!("\tlli 0, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjle {}", end_label)); // If less or equal, skip setting to 1
code.push(format!("\tlli 1, {}", result_reg));
code.push(format!("{}:", end_label));
}
BinaryOperator::Ge => {
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
code.push(format!("\tlli 0, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjlt {}", end_label)); // If less than, skip setting to 1
code.push(format!("\tlli 1, {}", result_reg));
code.push(format!("{}:", end_label));
}
_ => unimplemented!(),
}
// Free operand registers (allocator will protect variables)
self.allocator.free_temp(&left_reg);
self.allocator.free_temp(&right_reg);
Ok((result_reg, code))
}
Expression::Call { name, args } => {
// first evaluate all the args we're going to need
let mut arg_regs = Vec::new();
for arg in args.iter().rev() {
let (arg_reg, arg_code) = self.generate_expression(arg, true)?;
code.extend(arg_code);
arg_regs.push(arg_reg);
}
// Save caller-saved registers and track which ones we saved
// old method, inefficient.
// let saved_regs = self.allocator.get_caller_saved_registers();
// for reg in &saved_regs {
// code.push(format!("\tpush {}", reg));
// }
// Save caller-saved registers and track which ones we saved
let saved_regs = self.allocator.get_caller_saved_registers();
for reg in &saved_regs {
// spill variables to stack
code.extend(self.allocator.spill_register(reg).unwrap());
}
// Evaluate and push arguments in reverse order
for (i, arg_reg) in arg_regs.iter().enumerate() {
code.push(format!(
"\tpush {} // push arg {}",
arg_reg,
args.len() - 1 - i
));
}
// if GLOBAL_METHODS.contains_key(name.name.as_str()) {
// code.push(format!("\tcall {}",
// GLOBAL_METHODS[name.name.as_str()])); } else
if self.symbols.contains(&name.name) {
// Call local function
code.push(format!("\tcall {}", name));
} else if let Some(ns) = name.namespace.clone()
&& self.imports.contains_key(&ns)
{
code.push(format!("\tcall {}", name));
} else {
return Err(CompilerError::Undefined(name.clone()));
}
let result_reg: String;
if use_result {
let (temp_result_reg, result_alloc) = self.allocator.alloc_temp()?;
result_reg = temp_result_reg;
code.extend(result_alloc);
code.push(format!("\tpop {}", result_reg));
// Clean up arguments
if args.len() > 1 {
for _ in 0..(args.len() - 1) {
code.push("\tpop zero".to_string());
}
}
} else {
result_reg = "zero".to_string();
// Clean up arguments
if args.len() > 0 {
for _ in 0..(args.len()) {
code.push("\tpop zero".to_string());
}
}
}
// Restore caller-saved registers in reverse order (LIFO)
// for reg in saved_regs.iter().rev() {
// code.push(format!("\tpop {}", reg));
// }
// Free argument registers
for reg in arg_regs {
self.allocator.free_temp(&reg);
}
Ok((result_reg, code))
}
Expression::Unary { op, operand } => {
let (operand_reg, operand_code) =
self.generate_expression(operand, true)?;
code.extend(operand_code);
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
code.extend(result_alloc);
match op {
UnaryOperator::Minus => {
// Negate: result = 0 - operand
code.push(format!("\tsub zero, {}, {}", operand_reg, result_reg));
}
UnaryOperator::Plus => {
// Just move
code.push(format!("\tmov {}, {}", operand_reg, result_reg));
}
UnaryOperator::Dereference => {
code.push(format!("\tldw {}, {}", operand_reg, result_reg));
}
UnaryOperator::Reference => {
code.extend(self.allocator.spill_register(&operand_reg)?);
code.push(format!(
"\tsubi bpr {} {}",
-(4 + self.allocator.get_stack_offset()),
result_reg
))
}
}
self.allocator.free_temp(&operand_reg);
Ok((result_reg, code))
}
Expression::Empty => Ok(("zero".to_string(), code)),
}
}
// Helper for generating unique labels
fn get_unique_label(&mut self) -> String {
// You'd implement a counter here
static COUNTER: AtomicU32 = AtomicU32::new(0);
let val = COUNTER.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
(val + 1).to_string()
}
}
/// Build a single string from any number of arguments.
/// Each argument must implement `Display` or be convertible to a string.
#[macro_export]
macro_rules! dsa {
($($arg:expr),* $(,)?) => {{
// Start with an empty String well grow it as we go.
use std::fmt::Write;
let mut s = ::std::string::String::new();
$(
// `write!` is cheaper than `format!` for each element
// because it reuses the same buffer.
write!(s, "{}\n", $arg).expect("write to String failed");
)*
s
}};
}
// ──────────────────────── dsa! ────────────────────────
// A tiny helper that just turns its tokenstream into a string.
// The trailing comma is kept its part of the syntax you want.
#[macro_export]
macro_rules! cmd {
($($tokens:tt)*) => {{
// Well just stringify the tokens and return a String.
format!("{}", concat!(stringify!($tokens), "\n"))
}};
}
// ──────────────────────── block! ────────────────────────
// Usage:
//
// let asm = block![ "name"
// dsa![mov rg0, rg1],
// dsa![add rg1, rg1]
// ];
//
// `asm` is a `&'static str` containing:
//
// name:
// mov rg0, rg1
// add rg1, rg1
//
#[macro_export]
macro_rules! block {
// The first token must be a string literal thats the label.
($label:literal $(dsa![$($ins:tt)*]),* ) => {{
// Build a single string at compile time.
const CODE: &str = concat!(
$label, ":\n",
// Each `dsa!` call yields a string like `"mov rg0, rg1"`.
// We add a newline after each one to get the desired layout.
$(concat!("\t", stringify!($($ins)*), "\n")),*
);
CODE
}};
}
#[macro_export]
macro_rules! comment {
($text:expr) => {{ format!("// {}", $text) }};
}
-627
View File
@@ -1,627 +0,0 @@
use std::iter::Peekable;
use std::str::Chars;
#[derive(Debug, PartialEq, Clone)]
pub enum Token {
// Keywords
Fn,
Let,
If,
Else,
Loop,
While,
Break,
Return,
Continue,
Include,
Static,
Const,
// Identifiers and literals
Identifier(Name),
String(String),
Integer(u64),
Char(char),
// Symbols
LeftParen, // (
RightParen, // )
LeftBrace, // {
RightBrace, // }
Semicolon, // ;
Colon, // :
Comma, // ,
// Operators
Plus, // +
Minus, // -
Star, // *
Amphersand, // &
Slash, // /
Assign, // =
EqualEqual, // ==
Bang, // !
BangEqual, // !=
Less, // <
LessEqual, // <=
Greater, // >
GreaterEqual, // >=
RightArrow, // ->
// Special
Eof,
}
#[derive(Debug, PartialEq, Clone)]
pub struct Name {
pub name: String,
pub namespace: Option<String>,
}
use std::fmt;
impl fmt::Display for Name {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Some(ref ns) = self.namespace {
write!(f, "{}::{}", ns, self.name)
} else {
write!(f, "{}", self.name)
}
}
}
impl Token {
pub fn tt(&self) -> &str {
match self {
Token::Const => "Const",
Token::Static => "Static",
Token::Include => "Include",
Token::Fn => "Fn",
Token::If => "If",
Token::Let => "Let",
Token::Else => "Else",
Token::Loop => "Loop",
Token::While => "While",
Token::Break => "Break",
Token::Return => "Return",
Token::Continue => "Continue",
Token::Identifier(_) => "Identifier",
Token::String(_) => "String",
Token::Integer(_) => "UnsignedInt",
Token::Char(_) => "Char",
Token::LeftParen => "LeftParen",
Token::RightParen => "RightParen",
Token::LeftBrace => "LeftBrace",
Token::RightBrace => "RightBrace",
Token::Semicolon => "Semicolon",
Token::Colon => "Colon",
Token::Comma => "Comma",
Token::RightArrow => "RightArrow",
Token::Plus => "Plus",
Token::Minus => "Minus",
Token::Star => "Star",
Token::Amphersand => "Amphersand",
Token::Slash => "Slash",
Token::Assign => "Assign",
Token::EqualEqual => "EqualEqual",
Token::Bang => "Bang",
Token::BangEqual => "BangEqual",
Token::Less => "Less",
Token::LessEqual => "LessEqual",
Token::Greater => "Greater",
Token::GreaterEqual => "GreaterEqual",
Token::Eof => "Eof",
}
}
}
#[derive(Debug)]
pub struct Lexer<'a> {
chars: Peekable<Chars<'a>>,
current: Option<char>,
line: usize,
}
impl<'a> Lexer<'a> {
pub fn new(input: &'a str) -> Self {
let mut chars = input.chars().peekable();
let current = chars.next();
Lexer {
chars,
current,
line: 1,
}
}
fn advance(&mut self) -> Option<char> {
self.current = self.chars.next();
self.current
}
fn peek(&mut self) -> Option<&char> {
self.chars.peek()
}
fn skip_whitespace(&mut self) {
while let Some(c) = self.current {
if !c.is_whitespace() {
break;
}
if c == '\n' {
self.line += 1;
}
self.advance();
}
}
fn skip_line_comment(&mut self) {
// Skip the two slashes
self.advance(); // first /
self.advance(); // second /
// Skip until newline or EOF
while let Some(c) = self.current {
if c == '\n' {
self.line += 1;
self.advance();
break;
}
self.advance();
}
}
fn skip_block_comment(&mut self) -> Result<(), String> {
// Skip the /*
self.advance(); // /
self.advance(); // *
let start_line = self.line;
// Look for */
while let Some(c) = self.current {
if c == '\n' {
self.line += 1;
}
if c == '*' {
if let Some(&next) = self.peek() {
if next == '/' {
self.advance(); // *
self.advance(); // /
return Ok(());
}
}
}
self.advance();
}
Err(format!(
"Unterminated block comment starting at line {}",
start_line
))
}
fn skip_whitespace_and_comments(&mut self) {
loop {
self.skip_whitespace();
// Check for comments
if let Some('/') = self.current {
if let Some(&next) = self.peek() {
match next {
'/' => {
self.skip_line_comment();
continue;
}
'*' => {
if let Err(e) = self.skip_block_comment() {
eprintln!("Lexer error: {}", e);
}
continue;
}
_ => break,
}
}
}
break;
}
}
fn read_identifier(&mut self) -> String {
let mut ident = String::new();
// Include the current character if it's valid
if let Some(c) = self.current {
if c.is_alphabetic() || c == '_' {
ident.push(c);
}
}
// Read remaining characters
while let Some(&c) = self.peek() {
if c.is_alphanumeric() || c == '_' {
self.advance();
ident.push(c);
} else {
break;
}
}
ident
}
fn keyword_or_identifier(&mut self) -> Token {
let first_ident = self.read_identifier();
// Check if it's a keyword first (keywords can't have namespaces)
let keyword = match first_ident.as_str() {
"fn" => Some(Token::Fn),
"if" => Some(Token::If),
"else" => Some(Token::Else),
"while" => Some(Token::While),
"loop" => Some(Token::Loop),
"break" => Some(Token::Break),
"return" => Some(Token::Return),
"continue" => Some(Token::Continue),
"include" => Some(Token::Include),
"let" => Some(Token::Let),
"const" => Some(Token::Const),
"static" => Some(Token::Static),
_ => None,
};
if let Some(kw) = keyword {
return kw;
}
// Not a keyword - check for namespace separator (::)
// We need to peek TWO characters ahead without consuming anything
if let Some(&':') = self.peek() {
// We see one colon, but we need to check if there's another one after it
// We can't peek two ahead directly, so we need a different approach
// Save the current position by using a temporary peekable iterator
// Actually, we can't do that easily. Instead, let's just check:
// If we see ':', temporarily advance and check the next char
// Create a temporary check
let mut temp_chars = self.chars.clone();
let first_peek = temp_chars.next(); // This is the ':' we already saw
let second_peek = temp_chars.peek();
if let Some(&':') = second_peek {
// It's :: - consume both colons
self.advance(); // consume first :
self.advance(); // consume second :
// Read the second identifier (the actual name)
let second_ident = self.read_identifier();
// Return namespaced identifier
return Token::Identifier(Name {
namespace: Some(first_ident),
name: second_ident,
});
}
// else: It's a single colon (type annotation) - DON'T consume it
// Just fall through and return the identifier
}
// No namespace separator - just a regular identifier
Token::Identifier(Name {
namespace: None,
name: first_ident,
})
}
fn read_number(&mut self) -> Result<u64, String> {
let current = self.current.unwrap();
// Check for hex (0x) or binary (0b) prefix
if current == '0' {
if let Some(&next_char) = self.peek() {
match next_char {
'x' | 'X' => {
self.advance(); // consume '0'
self.advance(); // consume 'x'
return self.read_hex_number();
}
'b' | 'B' => {
self.advance(); // consume '0'
self.advance(); // consume 'b'
return self.read_binary_number();
}
_ => {}
}
}
}
// Read decimal number
self.read_decimal_number()
}
fn read_decimal_number(&mut self) -> Result<u64, String> {
let mut num_str = String::new();
if let Some(c) = self.current {
num_str.push(c);
}
while let Some(&c) = self.peek() {
if c.is_ascii_digit() {
self.advance();
num_str.push(c);
} else {
break;
}
}
num_str
.parse::<u64>()
.map_err(|_| format!("Invalid decimal number: {}", num_str))
}
fn read_hex_number(&mut self) -> Result<u64, String> {
let mut num_str = String::new();
// Read current character if it's a hex digit
if let Some(c) = self.current {
if c.is_ascii_hexdigit() {
num_str.push(c);
}
}
while let Some(&c) = self.peek() {
if c.is_ascii_hexdigit() {
self.advance();
num_str.push(c);
} else {
break;
}
}
if num_str.is_empty() {
return Err("Invalid hexadecimal number: no digits after 0x".to_string());
}
u64::from_str_radix(&num_str, 16)
.map_err(|_| format!("Invalid hexadecimal number: {}", num_str))
}
fn read_binary_number(&mut self) -> Result<u64, String> {
let mut num_str = String::new();
// Read current character if it's a binary digit
if let Some(c) = self.current {
if c == '0' || c == '1' {
num_str.push(c);
}
}
while let Some(&c) = self.peek() {
if c == '0' || c == '1' {
self.advance();
num_str.push(c);
} else {
break;
}
}
if num_str.is_empty() {
return Err("Invalid binary number: no digits after 0b".to_string());
}
u64::from_str_radix(&num_str, 2)
.map_err(|_| format!("Invalid binary number: {}", num_str))
}
fn read_string(&mut self) -> Result<String, String> {
self.advance(); // Skip the opening quote
let mut s = String::new();
while let Some(c) = self.current {
if c == '"' {
return Ok(s);
}
// Handle escape sequences
if c == '\\' {
self.advance();
if let Some(escaped) = self.current {
let escaped_char = match escaped {
'n' => '\n',
't' => '\t',
'r' => '\r',
'\\' => '\\',
'"' => '"',
_ => escaped, // For now, just use the character as-is
};
s.push(escaped_char);
} else {
return Err("Unexpected end of string after escape".to_string());
}
} else {
s.push(c);
}
self.advance();
}
Err("Unterminated string literal".to_string())
}
fn match_next(&mut self, expected: char) -> bool {
match self.peek() {
Some(&c) if c == expected => {
self.advance();
true
}
_ => false,
}
}
fn scan_single_char_token(&mut self, c: char) -> Option<Token> {
match c {
'(' => Some(Token::LeftParen),
')' => Some(Token::RightParen),
'{' => Some(Token::LeftBrace),
'}' => Some(Token::RightBrace),
';' => Some(Token::Semicolon),
',' => Some(Token::Comma),
'&' => Some(Token::Amphersand),
'+' => Some(Token::Plus),
'*' => Some(Token::Star),
_ => None,
}
}
fn scan_operator(&mut self, c: char) -> Option<Token> {
match c {
'-' => Some(if self.match_next('>') {
Token::RightArrow
} else {
Token::Minus
}),
'!' => Some(if self.match_next('=') {
Token::BangEqual
} else {
Token::Bang
}),
'=' => Some(if self.match_next('=') {
Token::EqualEqual
} else {
Token::Assign
}),
'<' => Some(if self.match_next('=') {
Token::LessEqual
} else {
Token::Less
}),
'>' => Some(if self.match_next('=') {
Token::GreaterEqual
} else {
Token::Greater
}),
':' => {
// Single colon (for type annotations)
// Note: :: is handled in keyword_or_identifier for namespaces
Some(Token::Colon)
}
'/' => {
// Check if it's a comment or division
if let Some(&next) = self.peek() {
if next == '/' || next == '*' {
// It's a comment, don't consume it here
// Let skip_whitespace_and_comments handle it
None
} else {
Some(Token::Slash)
}
} else {
Some(Token::Slash)
}
}
_ => None,
}
}
pub fn next_token(&mut self) -> Token {
self.skip_whitespace_and_comments();
let Some(c) = self.current else {
return Token::Eof;
};
// Try single-character tokens first
if let Some(token) = self.scan_single_char_token(c) {
self.advance();
return token;
}
// Try operators (may be multi-character)
if let Some(token) = self.scan_operator(c) {
self.advance();
return token;
}
// String literals
if c == '"' {
let token = match self.read_string() {
Ok(s) => Token::String(s),
Err(e) => {
eprintln!("Lexer error on line {}: {}", self.line, e);
// Skip to next quote or end
while let Some(ch) = self.current {
if ch == '"' || ch == '\n' {
break;
}
self.advance();
}
Token::String(String::new())
}
};
self.advance();
return token;
}
// Identifiers and keywords (including namespaced identifiers)
if c.is_alphabetic() || c == '_' {
let token = self.keyword_or_identifier();
self.advance();
return token;
}
// Numbers (decimal, hex, binary)
if c.is_ascii_digit() {
let token = match self.read_number() {
Ok(num) => Token::Integer(num),
Err(e) => {
eprintln!("Lexer error on line {}: {}", self.line, e);
// Skip invalid number
while let Some(&ch) = self.peek() {
if !ch.is_alphanumeric() {
break;
}
self.advance();
}
Token::Integer(0)
}
};
self.advance();
return token;
}
// Unknown character - skip it
eprintln!(
"Lexer warning on line {}: Skipping unknown character '{}'",
self.line, c
);
self.advance();
self.next_token()
}
}
impl<'a> Iterator for Lexer<'a> {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
match self.next_token() {
Token::Eof => None,
token => Some(token),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_basic() {
// Placeholder test
assert!(true);
}
}