deleted the c compiler
This commit is contained in:
@@ -1,8 +0,0 @@
|
||||
[package]
|
||||
name = "c_compiler"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
authors.workspace = true
|
||||
|
||||
[dependencies]
|
||||
chrono = "0.4.42"
|
||||
@@ -1,14 +0,0 @@
|
||||
int var_x = 5;
|
||||
|
||||
int factorial(int n) {
|
||||
if (n <= 1) {
|
||||
return 1;
|
||||
}
|
||||
return n * factorial(n - 1);
|
||||
}
|
||||
|
||||
int main() {
|
||||
int result = var_x + factorial(5);
|
||||
print(result);
|
||||
return 0;
|
||||
}
|
||||
@@ -1,926 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Simple C to DSA Assembly Compiler
|
||||
Supports a subset of C including:
|
||||
- int variables and functions
|
||||
- Arithmetic operations (+, -, *, /)
|
||||
- Comparisons (==, !=, <, >, <=, >=)
|
||||
- If/else statements
|
||||
- While loops
|
||||
- Function calls
|
||||
- Return statements
|
||||
"""
|
||||
|
||||
import re
|
||||
import sys
|
||||
from typing import List, Dict, Optional, Tuple
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from pprint import pprint
|
||||
import json
|
||||
|
||||
|
||||
class TokenType(Enum):
|
||||
# Keywords
|
||||
INT = "int"
|
||||
IF = "if"
|
||||
ELSE = "else"
|
||||
WHILE = "while"
|
||||
RETURN = "return"
|
||||
|
||||
# Identifiers and literals
|
||||
IDENTIFIER = "IDENTIFIER"
|
||||
NUMBER = "NUMBER"
|
||||
|
||||
# Operators
|
||||
PLUS = "+"
|
||||
MINUS = "-"
|
||||
STAR = "*"
|
||||
SLASH = "/"
|
||||
ASSIGN = "="
|
||||
EQ = "=="
|
||||
NE = "!="
|
||||
LT = "<"
|
||||
GT = ">"
|
||||
LE = "<="
|
||||
GE = ">="
|
||||
|
||||
# Delimiters
|
||||
LPAREN = "("
|
||||
RPAREN = ")"
|
||||
LBRACE = "{"
|
||||
RBRACE = "}"
|
||||
SEMICOLON = ";"
|
||||
COMMA = ","
|
||||
|
||||
EOF = "EOF"
|
||||
|
||||
|
||||
@dataclass
|
||||
class Token:
|
||||
type: TokenType
|
||||
value: str
|
||||
line: int
|
||||
col: int
|
||||
|
||||
|
||||
class Lexer:
|
||||
def __init__(self, source: str):
|
||||
self.source = source
|
||||
self.pos = 0
|
||||
self.line = 1
|
||||
self.col = 1
|
||||
self.tokens = []
|
||||
|
||||
def error(self, msg: str):
|
||||
raise SyntaxError(f"Lexer error at line {self.line}, col {self.col}: {msg}")
|
||||
|
||||
def peek(self, offset: int = 0) -> Optional[str]:
|
||||
pos = self.pos + offset
|
||||
return self.source[pos] if pos < len(self.source) else None
|
||||
|
||||
def advance(self) -> Optional[str]:
|
||||
if self.pos >= len(self.source):
|
||||
return None
|
||||
char = self.source[self.pos]
|
||||
self.pos += 1
|
||||
if char == "\n":
|
||||
self.line += 1
|
||||
self.col = 1
|
||||
else:
|
||||
self.col += 1
|
||||
return char
|
||||
|
||||
def skip_whitespace(self):
|
||||
while self.peek() and self.peek() in " \t\n\r":
|
||||
self.advance()
|
||||
|
||||
def skip_comment(self):
|
||||
if self.peek() == "/" and self.peek(1) == "/":
|
||||
while self.peek() and self.peek() != "\n":
|
||||
self.advance()
|
||||
self.advance() # skip newline
|
||||
|
||||
def read_number(self) -> str:
|
||||
num = ""
|
||||
while self.peek() and self.peek().isdigit():
|
||||
num += self.advance()
|
||||
return num
|
||||
|
||||
def read_identifier(self) -> str:
|
||||
ident = ""
|
||||
while self.peek() and (self.peek().isalnum() or self.peek() == "_"):
|
||||
ident += self.advance()
|
||||
return ident
|
||||
|
||||
def tokenize(self) -> List[Token]:
|
||||
keywords = {
|
||||
"int": TokenType.INT,
|
||||
"if": TokenType.IF,
|
||||
"else": TokenType.ELSE,
|
||||
"while": TokenType.WHILE,
|
||||
"return": TokenType.RETURN,
|
||||
}
|
||||
|
||||
while self.pos < len(self.source):
|
||||
self.skip_whitespace()
|
||||
self.skip_comment()
|
||||
|
||||
if self.pos >= len(self.source):
|
||||
break
|
||||
|
||||
line, col = self.line, self.col
|
||||
char = self.peek()
|
||||
|
||||
# Numbers
|
||||
if char.isdigit():
|
||||
num = self.read_number()
|
||||
self.tokens.append(Token(TokenType.NUMBER, num, line, col))
|
||||
|
||||
# Identifiers and keywords
|
||||
elif char.isalpha() or char == "_":
|
||||
ident = self.read_identifier()
|
||||
token_type = keywords.get(ident, TokenType.IDENTIFIER)
|
||||
self.tokens.append(Token(token_type, ident, line, col))
|
||||
|
||||
# Two-character operators
|
||||
elif char == "=" and self.peek(1) == "=":
|
||||
self.advance()
|
||||
self.advance()
|
||||
self.tokens.append(Token(TokenType.EQ, "==", line, col))
|
||||
elif char == "!" and self.peek(1) == "=":
|
||||
self.advance()
|
||||
self.advance()
|
||||
self.tokens.append(Token(TokenType.NE, "!=", line, col))
|
||||
elif char == "<" and self.peek(1) == "=":
|
||||
self.advance()
|
||||
self.advance()
|
||||
self.tokens.append(Token(TokenType.LE, "<=", line, col))
|
||||
elif char == ">" and self.peek(1) == "=":
|
||||
self.advance()
|
||||
self.advance()
|
||||
self.tokens.append(Token(TokenType.GE, ">=", line, col))
|
||||
|
||||
# Single-character operators
|
||||
elif char == "+":
|
||||
self.advance()
|
||||
self.tokens.append(Token(TokenType.PLUS, "+", line, col))
|
||||
elif char == "-":
|
||||
self.advance()
|
||||
self.tokens.append(Token(TokenType.MINUS, "-", line, col))
|
||||
elif char == "*":
|
||||
self.advance()
|
||||
self.tokens.append(Token(TokenType.STAR, "*", line, col))
|
||||
elif char == "/":
|
||||
self.advance()
|
||||
self.tokens.append(Token(TokenType.SLASH, "/", line, col))
|
||||
elif char == "=":
|
||||
self.advance()
|
||||
self.tokens.append(Token(TokenType.ASSIGN, "=", line, col))
|
||||
elif char == "<":
|
||||
self.advance()
|
||||
self.tokens.append(Token(TokenType.LT, "<", line, col))
|
||||
elif char == ">":
|
||||
self.advance()
|
||||
self.tokens.append(Token(TokenType.GT, ">", line, col))
|
||||
elif char == "(":
|
||||
self.advance()
|
||||
self.tokens.append(Token(TokenType.LPAREN, "(", line, col))
|
||||
elif char == ")":
|
||||
self.advance()
|
||||
self.tokens.append(Token(TokenType.RPAREN, ")", line, col))
|
||||
elif char == "{":
|
||||
self.advance()
|
||||
self.tokens.append(Token(TokenType.LBRACE, "{", line, col))
|
||||
elif char == "}":
|
||||
self.advance()
|
||||
self.tokens.append(Token(TokenType.RBRACE, "}", line, col))
|
||||
elif char == ";":
|
||||
self.advance()
|
||||
self.tokens.append(Token(TokenType.SEMICOLON, ";", line, col))
|
||||
elif char == ",":
|
||||
self.advance()
|
||||
self.tokens.append(Token(TokenType.COMMA, ",", line, col))
|
||||
else:
|
||||
self.error(f"Unexpected character: {char}")
|
||||
|
||||
self.tokens.append(Token(TokenType.EOF, "", self.line, self.col))
|
||||
return self.tokens
|
||||
|
||||
|
||||
# AST Node classes
|
||||
@dataclass
|
||||
class ASTNode:
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class Program(ASTNode):
|
||||
declarations: List["Declaration"]
|
||||
|
||||
|
||||
@dataclass
|
||||
class Declaration(ASTNode):
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class FunctionDecl(Declaration):
|
||||
name: str
|
||||
params: List[str]
|
||||
body: "CompoundStmt"
|
||||
|
||||
|
||||
@dataclass
|
||||
class VarDecl(Declaration):
|
||||
name: str
|
||||
init: Optional["Expression"] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class Statement(ASTNode):
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class CompoundStmt(Statement):
|
||||
statements: List[Statement]
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExprStmt(Statement):
|
||||
expr: Optional["Expression"]
|
||||
|
||||
|
||||
@dataclass
|
||||
class IfStmt(Statement):
|
||||
condition: "Expression"
|
||||
then_stmt: Statement
|
||||
else_stmt: Optional[Statement] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class WhileStmt(Statement):
|
||||
condition: "Expression"
|
||||
body: Statement
|
||||
|
||||
|
||||
@dataclass
|
||||
class ReturnStmt(Statement):
|
||||
expr: Optional["Expression"]
|
||||
|
||||
|
||||
@dataclass
|
||||
class Expression(ASTNode):
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class BinaryOp(Expression):
|
||||
op: str
|
||||
left: Expression
|
||||
right: Expression
|
||||
|
||||
|
||||
@dataclass
|
||||
class UnaryOp(Expression):
|
||||
op: str
|
||||
operand: Expression
|
||||
|
||||
|
||||
@dataclass
|
||||
class AssignExpr(Expression):
|
||||
name: str
|
||||
value: Expression
|
||||
|
||||
|
||||
@dataclass
|
||||
class VarExpr(Expression):
|
||||
name: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class NumberExpr(Expression):
|
||||
value: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class CallExpr(Expression):
|
||||
name: str
|
||||
args: List[Expression]
|
||||
|
||||
|
||||
class Parser:
|
||||
def __init__(self, tokens: List[Token]):
|
||||
self.tokens = tokens
|
||||
self.pos = 0
|
||||
|
||||
def error(self, msg: str):
|
||||
token = self.current()
|
||||
raise SyntaxError(f"Parser error at line {token.line}, col {token.col}: {msg}")
|
||||
|
||||
def current(self) -> Token:
|
||||
return self.tokens[self.pos] if self.pos < len(self.tokens) else self.tokens[-1]
|
||||
|
||||
def peek(self, offset: int = 0) -> Token:
|
||||
pos = self.pos + offset
|
||||
return self.tokens[pos] if pos < len(self.tokens) else self.tokens[-1]
|
||||
|
||||
def advance(self) -> Token:
|
||||
token = self.current()
|
||||
if self.pos < len(self.tokens) - 1:
|
||||
self.pos += 1
|
||||
return token
|
||||
|
||||
def expect(self, token_type: TokenType) -> Token:
|
||||
token = self.current()
|
||||
if token.type != token_type:
|
||||
self.error(f"Expected {token_type.value}, got {token.type.value}")
|
||||
return self.advance()
|
||||
|
||||
def parse(self) -> Program:
|
||||
declarations = []
|
||||
while self.current().type != TokenType.EOF:
|
||||
declarations.append(self.parse_declaration())
|
||||
return Program(declarations)
|
||||
|
||||
def parse_declaration(self) -> Declaration:
|
||||
self.expect(TokenType.INT)
|
||||
name = self.expect(TokenType.IDENTIFIER).value
|
||||
|
||||
if self.current().type == TokenType.LPAREN:
|
||||
# Function declaration
|
||||
self.advance()
|
||||
params = []
|
||||
|
||||
if self.current().type != TokenType.RPAREN:
|
||||
self.expect(TokenType.INT)
|
||||
params.append(self.expect(TokenType.IDENTIFIER).value)
|
||||
|
||||
while self.current().type == TokenType.COMMA:
|
||||
self.advance()
|
||||
self.expect(TokenType.INT)
|
||||
params.append(self.expect(TokenType.IDENTIFIER).value)
|
||||
|
||||
self.expect(TokenType.RPAREN)
|
||||
body = self.parse_compound_stmt()
|
||||
return FunctionDecl(name, params, body)
|
||||
else:
|
||||
# Variable declaration
|
||||
init = None
|
||||
if self.current().type == TokenType.ASSIGN:
|
||||
self.advance()
|
||||
init = self.parse_expression()
|
||||
self.expect(TokenType.SEMICOLON)
|
||||
return VarDecl(name, init)
|
||||
|
||||
def parse_compound_stmt(self) -> CompoundStmt:
|
||||
self.expect(TokenType.LBRACE)
|
||||
statements = []
|
||||
|
||||
while self.current().type != TokenType.RBRACE:
|
||||
statements.append(self.parse_statement())
|
||||
|
||||
self.expect(TokenType.RBRACE)
|
||||
return CompoundStmt(statements)
|
||||
|
||||
def parse_statement(self) -> Statement:
|
||||
token = self.current()
|
||||
|
||||
if token.type == TokenType.LBRACE:
|
||||
return self.parse_compound_stmt()
|
||||
elif token.type == TokenType.IF:
|
||||
return self.parse_if_stmt()
|
||||
elif token.type == TokenType.WHILE:
|
||||
return self.parse_while_stmt()
|
||||
elif token.type == TokenType.RETURN:
|
||||
return self.parse_return_stmt()
|
||||
elif token.type == TokenType.INT:
|
||||
# Local variable declaration
|
||||
self.advance()
|
||||
name = self.expect(TokenType.IDENTIFIER).value
|
||||
init = None
|
||||
if self.current().type == TokenType.ASSIGN:
|
||||
self.advance()
|
||||
init = self.parse_expression()
|
||||
self.expect(TokenType.SEMICOLON)
|
||||
return ExprStmt(AssignExpr(name, init) if init else None)
|
||||
else:
|
||||
expr = (
|
||||
self.parse_expression()
|
||||
if self.current().type != TokenType.SEMICOLON
|
||||
else None
|
||||
)
|
||||
self.expect(TokenType.SEMICOLON)
|
||||
return ExprStmt(expr)
|
||||
|
||||
def parse_if_stmt(self) -> IfStmt:
|
||||
self.expect(TokenType.IF)
|
||||
self.expect(TokenType.LPAREN)
|
||||
condition = self.parse_expression()
|
||||
self.expect(TokenType.RPAREN)
|
||||
then_stmt = self.parse_statement()
|
||||
|
||||
else_stmt = None
|
||||
if self.current().type == TokenType.ELSE:
|
||||
self.advance()
|
||||
else_stmt = self.parse_statement()
|
||||
|
||||
return IfStmt(condition, then_stmt, else_stmt)
|
||||
|
||||
def parse_while_stmt(self) -> WhileStmt:
|
||||
self.expect(TokenType.WHILE)
|
||||
self.expect(TokenType.LPAREN)
|
||||
condition = self.parse_expression()
|
||||
self.expect(TokenType.RPAREN)
|
||||
body = self.parse_statement()
|
||||
return WhileStmt(condition, body)
|
||||
|
||||
def parse_return_stmt(self) -> ReturnStmt:
|
||||
self.expect(TokenType.RETURN)
|
||||
expr = None
|
||||
if self.current().type != TokenType.SEMICOLON:
|
||||
expr = self.parse_expression()
|
||||
self.expect(TokenType.SEMICOLON)
|
||||
return ReturnStmt(expr)
|
||||
|
||||
def parse_expression(self) -> Expression:
|
||||
return self.parse_assignment()
|
||||
|
||||
def parse_assignment(self) -> Expression:
|
||||
expr = self.parse_comparison()
|
||||
|
||||
if self.current().type == TokenType.ASSIGN:
|
||||
if not isinstance(expr, VarExpr):
|
||||
self.error("Invalid assignment target")
|
||||
self.advance()
|
||||
value = self.parse_assignment()
|
||||
return AssignExpr(expr.name, value)
|
||||
|
||||
return expr
|
||||
|
||||
def parse_comparison(self) -> Expression:
|
||||
expr = self.parse_additive()
|
||||
|
||||
while self.current().type in [
|
||||
TokenType.EQ,
|
||||
TokenType.NE,
|
||||
TokenType.LT,
|
||||
TokenType.GT,
|
||||
TokenType.LE,
|
||||
TokenType.GE,
|
||||
]:
|
||||
op = self.advance().value
|
||||
right = self.parse_additive()
|
||||
expr = BinaryOp(op, expr, right)
|
||||
|
||||
return expr
|
||||
|
||||
def parse_additive(self) -> Expression:
|
||||
expr = self.parse_multiplicative()
|
||||
|
||||
while self.current().type in [TokenType.PLUS, TokenType.MINUS]:
|
||||
op = self.advance().value
|
||||
right = self.parse_multiplicative()
|
||||
expr = BinaryOp(op, expr, right)
|
||||
|
||||
return expr
|
||||
|
||||
def parse_multiplicative(self) -> Expression:
|
||||
expr = self.parse_unary()
|
||||
|
||||
while self.current().type in [TokenType.STAR, TokenType.SLASH]:
|
||||
op = self.advance().value
|
||||
right = self.parse_unary()
|
||||
expr = BinaryOp(op, expr, right)
|
||||
|
||||
return expr
|
||||
|
||||
def parse_unary(self) -> Expression:
|
||||
if self.current().type in [TokenType.PLUS, TokenType.MINUS]:
|
||||
op = self.advance().value
|
||||
operand = self.parse_unary()
|
||||
return UnaryOp(op, operand)
|
||||
|
||||
return self.parse_primary()
|
||||
|
||||
def parse_primary(self) -> Expression:
|
||||
token = self.current()
|
||||
|
||||
if token.type == TokenType.NUMBER:
|
||||
self.advance()
|
||||
return NumberExpr(int(token.value))
|
||||
|
||||
elif token.type == TokenType.IDENTIFIER:
|
||||
name = self.advance().value
|
||||
|
||||
if self.current().type == TokenType.LPAREN:
|
||||
# Function call
|
||||
self.advance()
|
||||
args = []
|
||||
|
||||
if self.current().type != TokenType.RPAREN:
|
||||
args.append(self.parse_expression())
|
||||
while self.current().type == TokenType.COMMA:
|
||||
self.advance()
|
||||
args.append(self.parse_expression())
|
||||
|
||||
self.expect(TokenType.RPAREN)
|
||||
return CallExpr(name, args)
|
||||
else:
|
||||
return VarExpr(name)
|
||||
|
||||
elif token.type == TokenType.LPAREN:
|
||||
self.advance()
|
||||
expr = self.parse_expression()
|
||||
self.expect(TokenType.RPAREN)
|
||||
return expr
|
||||
|
||||
else:
|
||||
self.error(f"Unexpected token: {token.type.value}")
|
||||
|
||||
|
||||
class CodeGenerator:
|
||||
def __init__(self):
|
||||
self.output = []
|
||||
self.label_counter = 0
|
||||
self.string_counter = 0
|
||||
self.functions = {}
|
||||
self.current_function = None
|
||||
self.local_vars = {}
|
||||
self.global_vars = {}
|
||||
self.register_pool = [f"rg{i:x}" for i in range(16)]
|
||||
self.used_registers = set()
|
||||
|
||||
def new_label(self, prefix: str = "L") -> str:
|
||||
label = f"{prefix}{self.label_counter}"
|
||||
self.label_counter += 1
|
||||
return label
|
||||
|
||||
def allocate_register(self) -> str:
|
||||
for reg in self.register_pool:
|
||||
if reg not in self.used_registers:
|
||||
self.used_registers.add(reg)
|
||||
return reg
|
||||
raise RuntimeError("Out of registers")
|
||||
|
||||
def free_register(self, reg: str):
|
||||
self.used_registers.discard(reg)
|
||||
|
||||
def emit(self, code: str):
|
||||
self.output.append(code)
|
||||
|
||||
def generate(self, program: Program) -> str:
|
||||
# Emit data section
|
||||
self.emit("// Global variables")
|
||||
for decl in program.declarations:
|
||||
if isinstance(decl, VarDecl):
|
||||
self.global_vars[decl.name] = f"var_{decl.name}"
|
||||
if decl.init:
|
||||
if isinstance(decl.init, NumberExpr):
|
||||
self.emit(f"dw var_{decl.name}: {decl.init.value}")
|
||||
else:
|
||||
self.emit(f"dw var_{decl.name}: 0")
|
||||
else:
|
||||
self.emit(f"dw var_{decl.name}: 0")
|
||||
|
||||
self.emit("")
|
||||
self.emit("// Entry point")
|
||||
self.emit("dw stack_bottom: 0x10000")
|
||||
self.emit("")
|
||||
self.emit("init:")
|
||||
self.emit(" ldw stack_bottom, spr")
|
||||
self.emit(" mov spr, bpr")
|
||||
|
||||
self.emit(" push zero")
|
||||
self.emit(" call main")
|
||||
self.emit(" pop rg0")
|
||||
self.emit(" hlt")
|
||||
self.emit("")
|
||||
|
||||
# Emit functions
|
||||
for decl in program.declarations:
|
||||
if isinstance(decl, FunctionDecl):
|
||||
self.generate_function(decl)
|
||||
|
||||
return "\n".join(self.output)
|
||||
|
||||
def generate_function(self, func: FunctionDecl):
|
||||
self.current_function = func.name
|
||||
self.functions[func.name] = func
|
||||
self.local_vars = {}
|
||||
|
||||
# Map parameters to stack offsets
|
||||
# Parameters start at bpr+8 (after return addr at bpr+4)
|
||||
for i, param in enumerate(func.params):
|
||||
self.local_vars[param] = 8 + (i * 4)
|
||||
|
||||
self.emit(f"{func.name}:")
|
||||
self.emit(" push bpr")
|
||||
self.emit(" mov spr, bpr")
|
||||
self.emit("")
|
||||
|
||||
# Generate function body
|
||||
self.generate_compound_stmt(func.body)
|
||||
|
||||
# Default return if no explicit return
|
||||
self.emit("// default return")
|
||||
self.emit(f"{func.name}_end:")
|
||||
self.emit(" mov bpr, spr")
|
||||
self.emit(" pop bpr")
|
||||
self.emit(" return")
|
||||
self.emit("")
|
||||
|
||||
def generate_compound_stmt(self, stmt: CompoundStmt):
|
||||
for s in stmt.statements:
|
||||
self.generate_statement(s)
|
||||
|
||||
def generate_statement(self, stmt: Statement):
|
||||
if isinstance(stmt, CompoundStmt):
|
||||
self.generate_compound_stmt(stmt)
|
||||
elif isinstance(stmt, ExprStmt):
|
||||
if stmt.expr:
|
||||
reg = self.generate_expression(stmt.expr)
|
||||
self.free_register(reg)
|
||||
elif isinstance(stmt, IfStmt):
|
||||
self.generate_if_stmt(stmt)
|
||||
elif isinstance(stmt, WhileStmt):
|
||||
self.generate_while_stmt(stmt)
|
||||
elif isinstance(stmt, ReturnStmt):
|
||||
self.generate_return_stmt(stmt)
|
||||
|
||||
def generate_if_stmt(self, stmt: IfStmt):
|
||||
else_label = self.new_label("else")
|
||||
end_label = self.new_label("endif")
|
||||
|
||||
# Evaluate condition
|
||||
cond_reg = self.generate_expression(stmt.condition)
|
||||
self.emit(f" cmp {cond_reg}, zero")
|
||||
self.free_register(cond_reg)
|
||||
|
||||
if stmt.else_stmt:
|
||||
self.emit(f" jeq {else_label}")
|
||||
else:
|
||||
self.emit(f" jeq {end_label}")
|
||||
|
||||
# Then branch
|
||||
self.generate_statement(stmt.then_stmt)
|
||||
|
||||
if stmt.else_stmt:
|
||||
self.emit(f" jmp {end_label}")
|
||||
self.emit(f"{else_label}:")
|
||||
self.generate_statement(stmt.else_stmt)
|
||||
|
||||
self.emit(f"{end_label}:")
|
||||
|
||||
def generate_while_stmt(self, stmt: WhileStmt):
|
||||
start_label = self.new_label("while_start")
|
||||
end_label = self.new_label("while_end")
|
||||
|
||||
self.emit(f"{start_label}:")
|
||||
|
||||
# Evaluate condition
|
||||
cond_reg = self.generate_expression(stmt.condition)
|
||||
self.emit(f" cmp {cond_reg}, zero")
|
||||
self.free_register(cond_reg)
|
||||
self.emit(f" jeq {end_label}")
|
||||
|
||||
# Loop body
|
||||
self.generate_statement(stmt.body)
|
||||
self.emit(f" jmp {start_label}")
|
||||
|
||||
self.emit(f"{end_label}:")
|
||||
|
||||
def generate_return_stmt(self, stmt: ReturnStmt):
|
||||
if stmt.expr:
|
||||
reg = self.generate_expression(stmt.expr)
|
||||
# Store return value at spr+8 according to calling convention
|
||||
self.emit(f" stw {reg}, spr, 8")
|
||||
self.free_register(reg)
|
||||
self.emit(f" jmp {self.current_function}_end")
|
||||
|
||||
def generate_expression(self, expr: Expression) -> str:
|
||||
if isinstance(expr, NumberExpr):
|
||||
reg = self.allocate_register()
|
||||
if expr.value <= 0xFFFF and expr.value >= 0:
|
||||
self.emit(f" lli {expr.value}, {reg}")
|
||||
if expr.value > 0xFF:
|
||||
self.emit(f" lui {expr.value >> 16}, {reg}")
|
||||
else:
|
||||
self.emit(f" lli {expr.value & 0xFFFF}, {reg}")
|
||||
self.emit(f" lui {(expr.value >> 16) & 0xFFFF}, {reg}")
|
||||
return reg
|
||||
|
||||
elif isinstance(expr, VarExpr):
|
||||
reg = self.allocate_register()
|
||||
if expr.name in self.local_vars:
|
||||
offset = self.local_vars[expr.name]
|
||||
self.emit(f" ldw bpr, {reg}, {offset}")
|
||||
elif expr.name in self.global_vars:
|
||||
label = self.global_vars[expr.name]
|
||||
self.emit(f" ldw {label}, {reg}")
|
||||
else:
|
||||
raise RuntimeError(f"Undefined variable: {expr.name}")
|
||||
return reg
|
||||
|
||||
elif isinstance(expr, AssignExpr):
|
||||
value_reg = self.generate_expression(expr.value)
|
||||
|
||||
if expr.name in self.local_vars:
|
||||
offset = self.local_vars[expr.name]
|
||||
self.emit(f" stw {value_reg}, bpr, {offset}")
|
||||
elif expr.name in self.global_vars:
|
||||
label = self.global_vars[expr.name]
|
||||
self.emit(f" stw {value_reg}, {label}")
|
||||
else:
|
||||
# New local variable - allocate after params and return value space
|
||||
# Start local variables at offset -4 from bpr (growing downward)
|
||||
offset = -(len([v for v in self.local_vars.values() if v < 0]) + 1) * 4
|
||||
self.local_vars[expr.name] = offset
|
||||
self.emit(f" stw {value_reg}, bpr, {offset}")
|
||||
|
||||
return value_reg
|
||||
|
||||
elif isinstance(expr, BinaryOp):
|
||||
return self.generate_binary_op(expr)
|
||||
|
||||
elif isinstance(expr, UnaryOp):
|
||||
operand_reg = self.generate_expression(expr.operand)
|
||||
result_reg = self.allocate_register()
|
||||
|
||||
if expr.op == "-":
|
||||
self.emit(f" lwi 0, {result_reg}")
|
||||
self.emit(f" sub {result_reg}, {operand_reg}, {result_reg}")
|
||||
else: # +
|
||||
self.emit(f" mov {operand_reg}, {result_reg}")
|
||||
|
||||
self.free_register(operand_reg)
|
||||
return result_reg
|
||||
|
||||
elif isinstance(expr, CallExpr):
|
||||
# First, make space for return value (must be pushed BEFORE arguments)
|
||||
temp_reg = self.allocate_register()
|
||||
|
||||
# Then push arguments in reverse order
|
||||
arg_regs = []
|
||||
for arg in reversed(expr.args):
|
||||
reg = self.generate_expression(arg)
|
||||
self.emit(f" push {reg}")
|
||||
arg_regs.append(reg)
|
||||
|
||||
# Call function
|
||||
self.emit(f" call {expr.name}")
|
||||
|
||||
# Get return value (it's now on top of stack)
|
||||
self.emit(f" pop {temp_reg}")
|
||||
|
||||
# Clean up remaining args
|
||||
for i in range(len(arg_regs) - 1):
|
||||
self.emit(f" pop zero")
|
||||
|
||||
# Free the arg registers
|
||||
for reg in arg_regs:
|
||||
self.free_register(reg)
|
||||
|
||||
return temp_reg
|
||||
|
||||
else:
|
||||
raise RuntimeError(f"Unknown expression type: {type(expr)}")
|
||||
|
||||
def generate_binary_op(self, expr: BinaryOp) -> str:
|
||||
# For operations that might contain function calls, we need to be careful
|
||||
# about register allocation. Evaluate left, save it, evaluate right.
|
||||
left_reg = self.generate_expression(expr.left)
|
||||
|
||||
# If right side contains a function call, we need to save left_reg
|
||||
# For now, always save to be safe
|
||||
saved_reg = self.allocate_register()
|
||||
self.emit(f" mov {left_reg}, {saved_reg}")
|
||||
self.free_register(left_reg)
|
||||
|
||||
right_reg = self.generate_expression(expr.right)
|
||||
result_reg = self.allocate_register()
|
||||
|
||||
if expr.op == "+":
|
||||
self.emit(f" add {left_reg}, {right_reg}, {result_reg}")
|
||||
elif expr.op == "-":
|
||||
self.emit(f" sub {left_reg}, {right_reg}, {result_reg}")
|
||||
elif expr.op == "*":
|
||||
# Simple multiplication using loop
|
||||
temp_label = self.new_label("mult")
|
||||
end_label = self.new_label("mult_end")
|
||||
self.emit(f" lli 0, {result_reg}")
|
||||
self.emit(f"{temp_label}:")
|
||||
self.emit(f" cmp {right_reg}, zero")
|
||||
self.emit(f" jeq {end_label}")
|
||||
self.emit(f" add {result_reg}, {left_reg}, {result_reg}")
|
||||
self.emit(f" dec {right_reg}")
|
||||
self.emit(f" jmp {temp_label}")
|
||||
self.emit(f"{end_label}:")
|
||||
elif expr.op == "/":
|
||||
# Simple division using loop
|
||||
temp_label = self.new_label("div")
|
||||
end_label = self.new_label("div_end")
|
||||
self.emit(f" lli 0, {result_reg}")
|
||||
self.emit(f"{temp_label}:")
|
||||
self.emit(f" cmp {left_reg}, {right_reg}")
|
||||
self.emit(f" jlt {end_label}")
|
||||
self.emit(f" sub {left_reg}, {right_reg}, {left_reg}")
|
||||
self.emit(f" inc {result_reg}")
|
||||
self.emit(f" jmp {temp_label}")
|
||||
self.emit(f"{end_label}:")
|
||||
elif expr.op in ["==", "!=", "<", ">", "<=", ">="]:
|
||||
self.emit(f" cmp {left_reg}, {right_reg}")
|
||||
|
||||
# Result is 1 if condition true, 0 otherwise
|
||||
self.emit(f" lli 0, {result_reg}")
|
||||
true_label = self.new_label("cmp_true")
|
||||
end_label = self.new_label("cmp_end")
|
||||
|
||||
if expr.op == "==":
|
||||
self.emit(f" jeq {true_label}")
|
||||
elif expr.op == "!=":
|
||||
self.emit(f" jne {true_label}")
|
||||
elif expr.op == "<":
|
||||
self.emit(f" jlt {true_label}")
|
||||
elif expr.op == ">":
|
||||
self.emit(f" jgt {true_label}")
|
||||
elif expr.op == "<=":
|
||||
self.emit(f" jle {true_label}")
|
||||
elif expr.op == ">=":
|
||||
self.emit(f" jge {true_label}")
|
||||
|
||||
self.emit(f" jmp {end_label}")
|
||||
self.emit(f"{true_label}:")
|
||||
self.emit(f" lli 1, {result_reg}")
|
||||
self.emit(f"{end_label}:")
|
||||
|
||||
self.free_register(left_reg)
|
||||
self.free_register(right_reg)
|
||||
return result_reg
|
||||
|
||||
|
||||
def compile_c_to_asm(source: str) -> str:
|
||||
"""Compile C source code to DSA assembly."""
|
||||
lexer = Lexer(source)
|
||||
tokens = lexer.tokenize()
|
||||
|
||||
parser = Parser(tokens)
|
||||
ast = parser.parse()
|
||||
|
||||
codegen = CodeGenerator()
|
||||
assembly = codegen.generate(ast)
|
||||
|
||||
return assembly
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python compiler.py <input.c> [output.dsa]")
|
||||
sys.exit(1)
|
||||
|
||||
input_file = sys.argv[1]
|
||||
output_file = sys.argv[2] if len(sys.argv) > 2 else input_file.replace(".c", ".dsa")
|
||||
|
||||
with open(input_file, "r") as f:
|
||||
source = f.read()
|
||||
|
||||
try:
|
||||
assembly = compile_c_to_asm(source)
|
||||
|
||||
with open(output_file, "w") as f:
|
||||
f.write(assembly)
|
||||
|
||||
print(f"Successfully compiled {input_file} to {output_file}")
|
||||
except (SyntaxError, RuntimeError) as e:
|
||||
print(f"Compilation error: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
# # Example usage
|
||||
# if len(sys.argv) > 1:
|
||||
# example_c = sys.argv[1]
|
||||
|
||||
# else:
|
||||
# example_c = """
|
||||
# int factorial(int n) {
|
||||
# if (n <= 1) {
|
||||
# return 1;
|
||||
# }
|
||||
# return n * factorial(n - 1);
|
||||
# }
|
||||
|
||||
# int main() {
|
||||
# int result;
|
||||
# result = factorial(5);
|
||||
# return result;
|
||||
# }
|
||||
# """
|
||||
|
||||
# print("Example C program:")
|
||||
# print(example_c)
|
||||
# print("\n" + "="*60 + "\n")
|
||||
# print("Generated DSA assembly:")
|
||||
# print(compile_c_to_asm(example_c))
|
||||
@@ -1,12 +0,0 @@
|
||||
int factorial(int n) {
|
||||
if (n <= 1) {
|
||||
return 1;
|
||||
}
|
||||
return n * factorial(n - 1);
|
||||
}
|
||||
|
||||
int main() {
|
||||
int res = factorial(3);
|
||||
printnum(res);
|
||||
return 0;
|
||||
}
|
||||
@@ -1,25 +0,0 @@
|
||||
include print: "lib/io/print.dsa"
|
||||
|
||||
int factorial(int n) {
|
||||
if (n <= 1) {
|
||||
return 1;
|
||||
}
|
||||
return n * factorial(n - 1);
|
||||
}
|
||||
|
||||
int add_(int a, int b) {
|
||||
return a + b;
|
||||
}
|
||||
|
||||
int greater(int a, int b) {
|
||||
if (a + a > b + b) {
|
||||
return a;
|
||||
} else {
|
||||
return b + a;
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
printnum(-5);
|
||||
return 0;
|
||||
}
|
||||
@@ -1,5 +0,0 @@
|
||||
// Imports
|
||||
include maths: "./lib/maths/core.dsa"
|
||||
|
||||
// Reserved Memory
|
||||
|
||||
@@ -1,106 +0,0 @@
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
#[non_exhaustive]
|
||||
pub enum Register {
|
||||
// general purpose registers
|
||||
Rg0,
|
||||
Rg1,
|
||||
Rg2,
|
||||
Rg3,
|
||||
Rg4,
|
||||
Rg5,
|
||||
Rg6,
|
||||
Rg7,
|
||||
Rg8,
|
||||
Rg9,
|
||||
Rga,
|
||||
Rgb,
|
||||
Rgc,
|
||||
Rgd,
|
||||
Rge,
|
||||
Rgf,
|
||||
|
||||
// special purpose registers
|
||||
Acc,
|
||||
Spr,
|
||||
Bpr,
|
||||
Ret,
|
||||
Idr,
|
||||
Mmr,
|
||||
Zero,
|
||||
NoReg,
|
||||
|
||||
// system registers - can't be written to by instructions.
|
||||
Mar,
|
||||
Mdr,
|
||||
Sts,
|
||||
Cir,
|
||||
Pcx,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
#[repr(u8)]
|
||||
#[non_exhaustive]
|
||||
/// A list of all current instructions in the DSA Assembly language.
|
||||
pub enum Instruction {
|
||||
// No-op
|
||||
Nop = 0x0,
|
||||
|
||||
// Data transfer instructions
|
||||
Mov(Register, Register) = 0x1,
|
||||
Movs(Register, Register) = 0x2,
|
||||
|
||||
Ldb(Register, Register, Option<u32>) = 0x3,
|
||||
Ldbs(Register, Register, Option<u32>) = 0x4,
|
||||
Ldh(Register, Register, Option<u32>) = 0x5,
|
||||
Ldhs(Register, Register, Option<u32>) = 0x6,
|
||||
Ldw(Register, Register, Option<u32>) = 0x7,
|
||||
|
||||
Stb(Register, Register, Option<u32>) = 0x8,
|
||||
Sth(Register, Register, Option<u32>) = 0x9,
|
||||
Stw(Register, Register, Option<u32>) = 0xA,
|
||||
|
||||
Lli(u16, Register) = 0xB,
|
||||
Lui(u16, Register) = 0xC,
|
||||
|
||||
// Jump Instructions
|
||||
Jump(u16, Register) = 0xD,
|
||||
JumpEq(u16, Register) = 0xE,
|
||||
JumpNeq(u16, Register) = 0xF,
|
||||
JumpGt(u16, Register) = 0x10,
|
||||
JumpGe(u16, Register) = 0x11,
|
||||
JumpLt(u16, Register) = 0x12,
|
||||
JumpLe(u16, Register) = 0x13,
|
||||
|
||||
// Comparison
|
||||
Compare(Register, Register) = 0x14,
|
||||
|
||||
// // Arithmetic
|
||||
// Add(args::RTypeArgs) = 0x19,
|
||||
// Sub(args::RTypeArgs) = 0x1A,
|
||||
// Increment(args::RTypeArgs) = 0x15,
|
||||
// Decrement(args::RTypeArgs) = 0x16,
|
||||
// ShiftLeft(args::RTypeArgs) = 0x17,
|
||||
// ShiftRight(args::RTypeArgs) = 0x18,
|
||||
|
||||
// // Logical
|
||||
// And(args::RTypeArgs) = 0x1B,
|
||||
// Or(args::RTypeArgs) = 0x1C,
|
||||
// Not(args::RTypeArgs) = 0x1D,
|
||||
// Xor(args::RTypeArgs) = 0x1E,
|
||||
// Nand(args::RTypeArgs) = 0x1F,
|
||||
// Nor(args::RTypeArgs) = 0x20,
|
||||
// Xnor(args::RTypeArgs) = 0x21,
|
||||
|
||||
// // Misc
|
||||
// Interrupt(Interrupt) = 0x22,
|
||||
// IntReturn = 0x23,
|
||||
// Halt = 0x24,
|
||||
|
||||
// // Immediate Arithmetic
|
||||
// AddImmediate(args::ITypeArgs) = 0x25,
|
||||
// SubImmediate(args::ITypeArgs) = 0x26,
|
||||
|
||||
// Fake Instructions
|
||||
Data(u32) = 0x3E,
|
||||
Segment(u32) = 0x3F,
|
||||
}
|
||||
@@ -1,599 +0,0 @@
|
||||
use std::collections::HashMap;
|
||||
use std::hash::Hash;
|
||||
use std::sync::LazyLock;
|
||||
use std::sync::atomic::AtomicU32;
|
||||
use std::time::SystemTime;
|
||||
|
||||
use chrono::{DateTime, Local};
|
||||
|
||||
use crate::registers::RegisterAllocator;
|
||||
use crate::{block, cmd, comment, dsa};
|
||||
|
||||
use crate::parser::{
|
||||
BinaryOperator, ConstExpr, Declaration, Expression, Parameter, Program, Statement,
|
||||
UnaryOperator,
|
||||
};
|
||||
|
||||
pub struct CodeGenerator {
|
||||
ast: Program,
|
||||
imports: HashMap<String, String>,
|
||||
globals: Vec<String>,
|
||||
functions: Vec<String>,
|
||||
symbols: Vec<String>,
|
||||
allocator: RegisterAllocator,
|
||||
}
|
||||
|
||||
static GLOBAL_METHODS: LazyLock<HashMap<&str, &str>> = LazyLock::new(|| {
|
||||
HashMap::from([("print", "print::print"), ("printnum", "print::print_num")])
|
||||
});
|
||||
|
||||
fn import(name: &str, path: &str) -> String {
|
||||
format!("include {name}: \"{}\"", path)
|
||||
}
|
||||
|
||||
impl CodeGenerator {
|
||||
const RET: &'static str = "\tjmp _ret";
|
||||
|
||||
pub fn new(ast: Program) -> Self {
|
||||
CodeGenerator {
|
||||
ast,
|
||||
imports: HashMap::new(),
|
||||
globals: Vec::new(),
|
||||
functions: Vec::new(),
|
||||
symbols: Vec::new(),
|
||||
allocator: RegisterAllocator::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn include(&mut self, name: &str, path: &str) {
|
||||
self.imports.insert(name.to_string(), path.to_string());
|
||||
}
|
||||
|
||||
pub fn generate(&mut self) -> Result<String, String> {
|
||||
// always include the print library for debugging!
|
||||
self.include("print", "./lib/io/print.dsa");
|
||||
|
||||
for block in self.ast.clone().declarations {
|
||||
match block {
|
||||
Declaration::Variable { name, .. } => self.symbols.push(name),
|
||||
Declaration::Function { name, .. } => self.symbols.push(name),
|
||||
Declaration::Import { name, .. } => self.symbols.push(name),
|
||||
}
|
||||
}
|
||||
|
||||
for block in self.ast.clone().declarations {
|
||||
self.generate_block(block.clone())?;
|
||||
}
|
||||
|
||||
self.generate_layout()
|
||||
}
|
||||
|
||||
fn generate_layout(&mut self) -> Result<String, String> {
|
||||
let datetime: DateTime<Local> = SystemTime::now().into();
|
||||
Ok(dsa![
|
||||
"",
|
||||
comment!("GENERATED BY DSA-C COMPILER"),
|
||||
comment!(format!(
|
||||
"Generated at {}",
|
||||
datetime.format("%Y-%m-%d %H:%M:%S")
|
||||
)),
|
||||
"",
|
||||
// imports
|
||||
comment!("Imports"),
|
||||
self.imports
|
||||
.iter()
|
||||
.map(|(k, v)| import(k, v))
|
||||
.collect::<Vec<String>>()
|
||||
.join("\n"),
|
||||
"",
|
||||
// reserved memory
|
||||
comment!("Globals & Reserved Memory"),
|
||||
self.globals.join("\n"),
|
||||
"",
|
||||
// entry point
|
||||
comment!("Entry Point"),
|
||||
"dw stack: 0x10000",
|
||||
"db message: \"Process Exited with code:\"",
|
||||
block! [ "_init"
|
||||
dsa![ldw stack, bpr],
|
||||
dsa![mov bpr, spr],
|
||||
dsa![push zero],
|
||||
dsa![call main],
|
||||
dsa![call print::print_newline],
|
||||
dsa![lwi message, rg0],
|
||||
dsa![push rg0],
|
||||
dsa![call print::print],
|
||||
dsa![pop zero],
|
||||
dsa![call print::print_hex_word],
|
||||
dsa![pop zero],
|
||||
dsa![hlt]
|
||||
],
|
||||
"",
|
||||
comment!("Function return boilerplate"),
|
||||
block! [ "_ret"
|
||||
dsa![mov bpr, spr],
|
||||
dsa![pop bpr],
|
||||
dsa![return]
|
||||
],
|
||||
// block! [ "main"
|
||||
// dsa![push bpr],
|
||||
// dsa![mov spr, bpr],
|
||||
// dsa![lwi 67, rg1],
|
||||
// dsa![stw rg1, spr, 8],
|
||||
// dsa![mov bpr, spr],
|
||||
// dsa![pop bpr],
|
||||
// dsa![return]
|
||||
// ],
|
||||
"",
|
||||
self.functions.join("\n"),
|
||||
])
|
||||
}
|
||||
|
||||
fn generate_global(&mut self, name: &str, init: Option<ConstExpr>) {
|
||||
self.globals.push(format!(
|
||||
"dw {}: {}",
|
||||
name,
|
||||
init.unwrap_or(ConstExpr::Number(0))
|
||||
))
|
||||
}
|
||||
|
||||
fn generate_block(&mut self, block: Declaration) -> Result<(), String> {
|
||||
match block {
|
||||
Declaration::Variable { name, init } => self.generate_global(&name, init),
|
||||
Declaration::Function {
|
||||
name,
|
||||
return_type,
|
||||
params,
|
||||
body,
|
||||
} => {
|
||||
let func = self.generate_function(&name, ¶ms, &body).join("\n");
|
||||
|
||||
self.functions.push(format!("{func}\n"));
|
||||
}
|
||||
Declaration::Import { name, path } => {
|
||||
self.imports.insert(name, path);
|
||||
}
|
||||
};
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Example: Generate code for a function
|
||||
fn generate_function(
|
||||
&mut self,
|
||||
name: &str,
|
||||
params: &[Parameter],
|
||||
body: &[Statement],
|
||||
) -> Vec<String> {
|
||||
let mut code = Vec::new();
|
||||
|
||||
// Reset allocator for new function
|
||||
self.allocator.reset();
|
||||
|
||||
// Function prologue
|
||||
code.push(format!("{}:", name));
|
||||
code.push("\tpush bpr".to_string());
|
||||
code.push("\tmov spr, bpr".to_string());
|
||||
code.push(String::new());
|
||||
|
||||
// Allocate parameters to registers or stack locations
|
||||
for (i, param) in params.iter().enumerate() {
|
||||
let offset = 8 + (i as i32 * 4); // Parameters start at bpr+8
|
||||
// Track that this parameter is at a stack location
|
||||
let (reg, load_code) = self.allocator.alloc_var(¶m.name).unwrap();
|
||||
code.extend(load_code);
|
||||
code.push(format!("\tldw bpr, {}, {}", reg, offset));
|
||||
}
|
||||
|
||||
// Generate code for function body
|
||||
for stmt in body {
|
||||
let stmt_code = self.generate_statement(stmt).unwrap();
|
||||
code.extend(stmt_code);
|
||||
}
|
||||
|
||||
// automatically return at function end
|
||||
if let Some(x) = code.last()
|
||||
&& x == Self::RET
|
||||
{
|
||||
} else {
|
||||
code.push(Self::RET.to_string());
|
||||
}
|
||||
|
||||
code
|
||||
}
|
||||
|
||||
// Example: Generate code for a statement
|
||||
fn generate_statement(&mut self, stmt: &Statement) -> Result<Vec<String>, String> {
|
||||
let mut code = Vec::new();
|
||||
|
||||
match stmt {
|
||||
Statement::Assign {
|
||||
name,
|
||||
declare_type,
|
||||
value,
|
||||
} => {
|
||||
if let Some(expr) = value {
|
||||
// Evaluate expression
|
||||
let (result_reg, expr_code) = self.generate_expression(expr)?;
|
||||
code.extend(expr_code);
|
||||
|
||||
// Store result in variable
|
||||
let store_code = self.allocator.store_var(name, &result_reg);
|
||||
code.extend(store_code);
|
||||
|
||||
// Free temporary register
|
||||
self.allocator.free_temp(&result_reg);
|
||||
} else {
|
||||
// Just declaring variable without initialization
|
||||
self.allocator.alloc_var(name)?;
|
||||
}
|
||||
}
|
||||
|
||||
Statement::Return { expr } => {
|
||||
if let Some(e) = expr {
|
||||
let (result_reg, expr_code) = self.generate_expression(e)?;
|
||||
code.extend(expr_code);
|
||||
code.push(format!("\tstw {}, bpr, 8", result_reg));
|
||||
code.push(format!("\tjmp _ret"));
|
||||
self.allocator.free_temp(&result_reg);
|
||||
}
|
||||
}
|
||||
|
||||
Statement::If {
|
||||
condition,
|
||||
then_stmt,
|
||||
else_stmt,
|
||||
} => {
|
||||
// Generate condition
|
||||
let (cond_reg, cond_code) = self.generate_expression(condition)?;
|
||||
code.extend(cond_code);
|
||||
|
||||
// Compare with zero
|
||||
code.push(format!("\tcmp {}, zero", cond_reg));
|
||||
self.allocator.free_temp(&cond_reg);
|
||||
|
||||
// Generate unique labels
|
||||
let then_label = format!("_then_{}", self.get_unique_label());
|
||||
let else_label = format!("_else_{}", self.get_unique_label());
|
||||
let end_label = format!("_end_{}", self.get_unique_label());
|
||||
|
||||
// Jump to else if condition is false (equal to zero)
|
||||
code.push(format!("\tjeq {}", else_label));
|
||||
|
||||
// Then block
|
||||
code.push(format!("{}:", then_label));
|
||||
for s in then_stmt {
|
||||
code.extend(self.generate_statement(s)?);
|
||||
}
|
||||
|
||||
if then_stmt.len() == 0 {
|
||||
code.push("\tnop".to_string());
|
||||
}
|
||||
|
||||
code.push(format!("\tjmp {}", end_label));
|
||||
|
||||
// Else block
|
||||
code.push(format!("{}:", else_label));
|
||||
for s in else_stmt {
|
||||
code.extend(self.generate_statement(s)?);
|
||||
}
|
||||
|
||||
if else_stmt.len() == 0 {
|
||||
code.push("\tnop".to_string());
|
||||
}
|
||||
|
||||
code.push(format!("{}:", end_label));
|
||||
}
|
||||
|
||||
Statement::While { condition, body } => {
|
||||
let loop_start = format!("_while_start_{}", self.get_unique_label());
|
||||
let loop_end = format!("_while_end_{}", self.get_unique_label());
|
||||
|
||||
code.push(format!("{}:", loop_start));
|
||||
|
||||
// Generate condition
|
||||
let (cond_reg, cond_code) = self.generate_expression(condition)?;
|
||||
code.extend(cond_code);
|
||||
|
||||
code.push(format!("\tcmp {}, zero", cond_reg));
|
||||
self.allocator.free_temp(&cond_reg);
|
||||
|
||||
code.push(format!("\tjeq {}", loop_end));
|
||||
|
||||
// Loop body
|
||||
for s in body {
|
||||
code.extend(self.generate_statement(s)?);
|
||||
}
|
||||
|
||||
code.push(format!("\tjmp {}", loop_start));
|
||||
code.push(format!("{}:", loop_end));
|
||||
}
|
||||
|
||||
Statement::Expression { expr } => {
|
||||
let (result_reg, expr_code) = self.generate_expression(expr)?;
|
||||
code.extend(expr_code);
|
||||
self.allocator.free_temp(&result_reg);
|
||||
}
|
||||
|
||||
Statement::Block(statements) => {
|
||||
for s in statements {
|
||||
code.extend(self.generate_statement(s)?);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(code)
|
||||
}
|
||||
|
||||
// Example: Generate code for an expression
|
||||
// Returns (register containing result, assembly code)
|
||||
fn generate_expression(
|
||||
&mut self,
|
||||
expr: &Expression,
|
||||
) -> Result<(String, Vec<String>), String> {
|
||||
let mut code = Vec::new();
|
||||
|
||||
match expr {
|
||||
Expression::Number { value } => {
|
||||
let (reg, alloc_code) = self.allocator.alloc_temp()?;
|
||||
code.extend(alloc_code);
|
||||
|
||||
// Load immediate value
|
||||
code.push(format!("\tlli {}, {}", value & 0xFFFF, reg));
|
||||
if *value > 0xFFFF || *value < 0 {
|
||||
code.push(format!("\tlui {}, {}", (value >> 16) & 0xFFFF, reg));
|
||||
}
|
||||
|
||||
Ok((reg, code))
|
||||
}
|
||||
|
||||
Expression::Variable { name, .. } => {
|
||||
let (reg, load_code) = self.allocator.load_var(name)?;
|
||||
code.extend(load_code);
|
||||
Ok((reg, code))
|
||||
}
|
||||
|
||||
Expression::Binary { op, left, right } => {
|
||||
// Evaluate left operand
|
||||
let (left_reg, left_code) = self.generate_expression(left)?;
|
||||
code.extend(left_code);
|
||||
|
||||
// Evaluate right operand
|
||||
let (right_reg, right_code) = self.generate_expression(right)?;
|
||||
code.extend(right_code);
|
||||
|
||||
// Allocate result register
|
||||
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
|
||||
code.extend(result_alloc);
|
||||
|
||||
// Generate operation
|
||||
match op {
|
||||
BinaryOperator::Add => {
|
||||
code.push(format!(
|
||||
"\tadd {}, {}, {}",
|
||||
left_reg, right_reg, result_reg
|
||||
));
|
||||
}
|
||||
BinaryOperator::Sub => {
|
||||
code.push(format!(
|
||||
"\tsub {}, {}, {}",
|
||||
left_reg, right_reg, result_reg
|
||||
));
|
||||
}
|
||||
BinaryOperator::Mul => {
|
||||
self.include("maths", "./lib/maths/core.dsa");
|
||||
// Call multiply function
|
||||
code.push(format!("\tpush {}", right_reg));
|
||||
code.push(format!("\tpush {}", left_reg));
|
||||
code.push("\tcall maths::multiply".to_string());
|
||||
code.push(format!("\tpop {}", result_reg));
|
||||
code.push("\tpop zero".to_string());
|
||||
}
|
||||
// Comparison operators - return 1 (true) or 0 (false)
|
||||
BinaryOperator::Eq => {
|
||||
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
|
||||
code.push(format!("\tlli 0, {}", result_reg));
|
||||
let end_label = format!("_cmp_end_{}", self.get_unique_label());
|
||||
code.push(format!("\tjne {}", end_label)); // If not equal, skip setting to 1
|
||||
code.push(format!("\tlli 1, {}", result_reg));
|
||||
code.push(format!("{}:", end_label));
|
||||
}
|
||||
BinaryOperator::Ne => {
|
||||
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
|
||||
code.push(format!("\tlli 0, {}", result_reg));
|
||||
let end_label = format!("_cmp_end_{}", self.get_unique_label());
|
||||
code.push(format!("\tjeq {}", end_label)); // If equal, skip setting to 1
|
||||
code.push(format!("\tlli 1, {}", result_reg));
|
||||
code.push(format!("{}:", end_label));
|
||||
}
|
||||
BinaryOperator::Lt => {
|
||||
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
|
||||
code.push(format!("\tlli 0, {}", result_reg));
|
||||
let end_label = format!("_cmp_end_{}", self.get_unique_label());
|
||||
code.push(format!("\tjge {}", end_label)); // If greater or equal, skip setting to 1
|
||||
code.push(format!("\tlli 1, {}", result_reg));
|
||||
code.push(format!("{}:", end_label));
|
||||
}
|
||||
BinaryOperator::Le => {
|
||||
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
|
||||
code.push(format!("\tlli 0, {}", result_reg));
|
||||
let end_label = format!("_cmp_end_{}", self.get_unique_label());
|
||||
code.push(format!("\tjgt {}", end_label)); // If greater than, skip setting to 1
|
||||
code.push(format!("\tlli 1, {}", result_reg));
|
||||
code.push(format!("{}:", end_label));
|
||||
}
|
||||
BinaryOperator::Gt => {
|
||||
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
|
||||
code.push(format!("\tlli 0, {}", result_reg));
|
||||
let end_label = format!("_cmp_end_{}", self.get_unique_label());
|
||||
code.push(format!("\tjle {}", end_label)); // If less or equal, skip setting to 1
|
||||
code.push(format!("\tlli 1, {}", result_reg));
|
||||
code.push(format!("{}:", end_label));
|
||||
}
|
||||
BinaryOperator::Ge => {
|
||||
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
|
||||
code.push(format!("\tlli 0, {}", result_reg));
|
||||
let end_label = format!("_cmp_end_{}", self.get_unique_label());
|
||||
code.push(format!("\tjlt {}", end_label)); // If less than, skip setting to 1
|
||||
code.push(format!("\tlli 1, {}", result_reg));
|
||||
code.push(format!("{}:", end_label));
|
||||
}
|
||||
_ => return Err(format!("Unsupported binary operator: {:?}", op)),
|
||||
}
|
||||
|
||||
// Free operand registers (allocator will protect variables)
|
||||
self.allocator.free_temp(&left_reg);
|
||||
self.allocator.free_temp(&right_reg);
|
||||
|
||||
Ok((result_reg, code))
|
||||
}
|
||||
|
||||
Expression::Call { name, args } => {
|
||||
// Save caller-saved registers and track which ones we saved
|
||||
let saved_regs = self.allocator.get_caller_saved_registers();
|
||||
for reg in &saved_regs {
|
||||
code.push(format!("\tpush {}", reg));
|
||||
}
|
||||
|
||||
// Evaluate and push arguments in reverse order
|
||||
let mut arg_regs = Vec::new();
|
||||
for arg in args.iter().rev() {
|
||||
let (arg_reg, arg_code) = self.generate_expression(arg)?;
|
||||
code.extend(arg_code);
|
||||
code.push(format!("\tpush {}", arg_reg));
|
||||
arg_regs.push(arg_reg);
|
||||
}
|
||||
|
||||
if GLOBAL_METHODS.contains_key(name.as_str()) {
|
||||
code.push(format!("\tcall {}", GLOBAL_METHODS[name.as_str()]));
|
||||
} else if self.symbols.contains(name) {
|
||||
// Call local function
|
||||
code.push(format!("\tcall {}", name));
|
||||
} else {
|
||||
return Err(format!("undefined function {name}"));
|
||||
}
|
||||
|
||||
// Result is in rg0, allocate a register and move it
|
||||
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
|
||||
|
||||
code.extend(result_alloc);
|
||||
code.push(format!("\tpop {}", result_reg));
|
||||
|
||||
// Clean up arguments
|
||||
if args.len() > 1 {
|
||||
for _ in 0..(args.len() - 1) {
|
||||
code.push("\tpop zero".to_string());
|
||||
}
|
||||
}
|
||||
|
||||
// Restore caller-saved registers in reverse order (LIFO)
|
||||
for reg in saved_regs.iter().rev() {
|
||||
code.push(format!("\tpop {}", reg));
|
||||
}
|
||||
|
||||
// Free argument registers
|
||||
for reg in arg_regs {
|
||||
self.allocator.free_temp(®);
|
||||
}
|
||||
|
||||
Ok((result_reg, code))
|
||||
}
|
||||
|
||||
Expression::Unary { op, operand } => {
|
||||
let (operand_reg, operand_code) = self.generate_expression(operand)?;
|
||||
code.extend(operand_code);
|
||||
|
||||
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
|
||||
code.extend(result_alloc);
|
||||
|
||||
match op {
|
||||
UnaryOperator::Minus => {
|
||||
// Negate: result = 0 - operand
|
||||
code.push(format!("\tsub zero, {}, {}", operand_reg, result_reg));
|
||||
}
|
||||
UnaryOperator::Plus => {
|
||||
// Just move
|
||||
code.push(format!("\tmov {}, {}", operand_reg, result_reg));
|
||||
}
|
||||
}
|
||||
|
||||
self.allocator.free_temp(&operand_reg);
|
||||
Ok((result_reg, code))
|
||||
}
|
||||
|
||||
Expression::Empty => Ok(("zero".to_string(), code)),
|
||||
}
|
||||
}
|
||||
|
||||
// Helper for generating unique labels
|
||||
fn get_unique_label(&mut self) -> String {
|
||||
// You'd implement a counter here
|
||||
static COUNTER: AtomicU32 = AtomicU32::new(0);
|
||||
|
||||
let val = COUNTER.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
|
||||
(val + 1).to_string()
|
||||
}
|
||||
}
|
||||
|
||||
/// Build a single string from any number of arguments.
|
||||
/// Each argument must implement `Display` or be convertible to a string.
|
||||
#[macro_export]
|
||||
macro_rules! dsa {
|
||||
($($arg:expr),* $(,)?) => {{
|
||||
// Start with an empty String – we’ll grow it as we go.
|
||||
use std::fmt::Write;
|
||||
let mut s = ::std::string::String::new();
|
||||
$(
|
||||
// `write!` is cheaper than `format!` for each element
|
||||
// because it re‑uses the same buffer.
|
||||
|
||||
write!(s, "{}\n", $arg).expect("write to String failed");
|
||||
)*
|
||||
s
|
||||
}};
|
||||
}
|
||||
|
||||
// ──────────────────────── dsa! ────────────────────────
|
||||
// A tiny helper that just turns its token‑stream into a string.
|
||||
// The trailing comma is kept – it’s part of the syntax you want.
|
||||
#[macro_export]
|
||||
macro_rules! cmd {
|
||||
($($tokens:tt)*) => {{
|
||||
// We’ll just stringify the tokens and return a String.
|
||||
format!("{}", concat!(stringify!($tokens), "\n"))
|
||||
}};
|
||||
}
|
||||
|
||||
// ──────────────────────── block! ────────────────────────
|
||||
// Usage:
|
||||
//
|
||||
// let asm = block![ "name"
|
||||
// dsa![mov rg0, rg1],
|
||||
// dsa![add rg1, rg1]
|
||||
// ];
|
||||
//
|
||||
// `asm` is a `&'static str` containing:
|
||||
//
|
||||
// name:
|
||||
// mov rg0, rg1
|
||||
// add rg1, rg1
|
||||
//
|
||||
#[macro_export]
|
||||
macro_rules! block {
|
||||
// The first token must be a string literal – that’s the label.
|
||||
($label:literal $(dsa![$($ins:tt)*]),* ) => {{
|
||||
// Build a single string at compile time.
|
||||
const CODE: &str = concat!(
|
||||
$label, ":\n",
|
||||
// Each `dsa!` call yields a string like `"mov rg0, rg1"`.
|
||||
// We add a newline after each one to get the desired layout.
|
||||
$(concat!("\t", stringify!($($ins)*), "\n")),*
|
||||
);
|
||||
CODE
|
||||
}};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! comment {
|
||||
($text:expr) => {{ format!("// {}", $text) }};
|
||||
}
|
||||
@@ -1,335 +0,0 @@
|
||||
// ============================================================================
|
||||
// Token Types
|
||||
// ============================================================================
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum TokenType {
|
||||
// Keywords
|
||||
Int,
|
||||
If,
|
||||
Else,
|
||||
While,
|
||||
Return,
|
||||
Include,
|
||||
|
||||
// Identifiers and literals
|
||||
Identifier(String),
|
||||
Number(i32),
|
||||
String(String),
|
||||
Char(char),
|
||||
|
||||
// Operators
|
||||
Plus,
|
||||
Minus,
|
||||
Star,
|
||||
Slash,
|
||||
Assign,
|
||||
Eq,
|
||||
Ne,
|
||||
Lt,
|
||||
Gt,
|
||||
Le,
|
||||
Ge,
|
||||
|
||||
// Delimiters
|
||||
LParen,
|
||||
RParen,
|
||||
LBrace,
|
||||
RBrace,
|
||||
Semicolon,
|
||||
Comma,
|
||||
Colon,
|
||||
Namespace,
|
||||
|
||||
Eof,
|
||||
}
|
||||
|
||||
pub enum Type {
|
||||
Int32,
|
||||
Int16,
|
||||
Int8,
|
||||
Uint32,
|
||||
Uint16,
|
||||
Uint8,
|
||||
Char,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Token {
|
||||
pub token_type: TokenType,
|
||||
pub line: usize,
|
||||
pub col: usize,
|
||||
}
|
||||
|
||||
impl Token {
|
||||
pub fn new(token_type: TokenType, line: usize, col: usize) -> Self {
|
||||
Self {
|
||||
token_type,
|
||||
line,
|
||||
col,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Lexer
|
||||
// ============================================================================
|
||||
|
||||
pub struct Lexer {
|
||||
source: Vec<char>,
|
||||
pos: usize,
|
||||
line: usize,
|
||||
col: usize,
|
||||
}
|
||||
|
||||
impl Lexer {
|
||||
pub fn new(source: &str) -> Self {
|
||||
Self {
|
||||
source: source.chars().collect(),
|
||||
pos: 0,
|
||||
line: 1,
|
||||
col: 1,
|
||||
}
|
||||
}
|
||||
|
||||
fn error(&self, msg: &str) -> String {
|
||||
format!(
|
||||
"Lexer error at line {}, col {}: {}",
|
||||
self.line, self.col, msg
|
||||
)
|
||||
}
|
||||
|
||||
fn peek(&self, offset: usize) -> Option<char> {
|
||||
self.source.get(self.pos + offset).copied()
|
||||
}
|
||||
|
||||
fn advance(&mut self) -> Option<char> {
|
||||
if self.pos >= self.source.len() {
|
||||
return None;
|
||||
}
|
||||
let ch = self.source[self.pos];
|
||||
self.pos += 1;
|
||||
if ch == '\n' {
|
||||
self.line += 1;
|
||||
self.col = 1;
|
||||
} else {
|
||||
self.col += 1;
|
||||
}
|
||||
Some(ch)
|
||||
}
|
||||
|
||||
fn skip_whitespace(&mut self) {
|
||||
while let Some(ch) = self.peek(0) {
|
||||
if ch.is_whitespace() {
|
||||
self.advance();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn skip_comment(&mut self) {
|
||||
if self.peek(0) == Some('/') && self.peek(1) == Some('/') {
|
||||
while let Some(ch) = self.peek(0) {
|
||||
if ch == '\n' {
|
||||
break;
|
||||
}
|
||||
self.advance();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn read_number(&mut self) -> i32 {
|
||||
let mut num_str = String::new();
|
||||
while let Some(ch) = self.peek(0) {
|
||||
if ch.is_ascii_digit() {
|
||||
num_str.push(ch);
|
||||
self.advance();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
num_str.parse().unwrap_or(0)
|
||||
}
|
||||
|
||||
fn read_identifier(&mut self) -> String {
|
||||
let mut ident = String::new();
|
||||
while let Some(ch) = self.peek(0) {
|
||||
if ch.is_alphanumeric() || ch == '_' {
|
||||
ident.push(ch);
|
||||
self.advance();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
ident
|
||||
}
|
||||
|
||||
fn read_string(&mut self) -> Result<String, String> {
|
||||
let mut string = String::new();
|
||||
self.advance(); // Consume the opening quote
|
||||
|
||||
while let Some(ch) = self.peek(0) {
|
||||
if ch == '"' {
|
||||
self.advance(); // Consume the closing quote
|
||||
return Ok(string);
|
||||
} else if ch == '\\' {
|
||||
self.advance(); // Consume the backslash
|
||||
if let Some(escaped_char) = self.peek(0) {
|
||||
string.push(escaped_char);
|
||||
self.advance();
|
||||
}
|
||||
} else {
|
||||
string.push(ch);
|
||||
self.advance();
|
||||
}
|
||||
}
|
||||
|
||||
Err(String::from("Unexpected EOF"))
|
||||
}
|
||||
|
||||
fn read_char(&mut self) -> Result<char, String> {
|
||||
self.advance(); // Consume the opening quote
|
||||
|
||||
if let Some(ch) = self.peek(0) {
|
||||
self.advance();
|
||||
if self.peek(0) == Some('\'') {
|
||||
self.advance();
|
||||
return Ok(ch);
|
||||
} else {
|
||||
Err(String::from("expected closing quote"))
|
||||
}
|
||||
} else {
|
||||
Err(String::from("expected character"))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn tokenize(&mut self) -> Result<Vec<Token>, String> {
|
||||
let mut tokens = Vec::new();
|
||||
|
||||
loop {
|
||||
self.skip_whitespace();
|
||||
self.skip_comment();
|
||||
|
||||
if self.pos >= self.source.len() {
|
||||
break;
|
||||
}
|
||||
|
||||
let line = self.line;
|
||||
let col = self.col;
|
||||
let ch = self.peek(0).unwrap();
|
||||
|
||||
let token_type = if ch.is_ascii_digit() {
|
||||
let num = self.read_number();
|
||||
TokenType::Number(num)
|
||||
} else if ch == '"' {
|
||||
let string = self.read_string()?;
|
||||
TokenType::String(string)
|
||||
} else if ch == '\'' {
|
||||
let char = self.read_char()?;
|
||||
TokenType::Char(char)
|
||||
} else if ch.is_alphabetic() || ch == '_' {
|
||||
let ident = self.read_identifier();
|
||||
match ident.as_str() {
|
||||
"int" => TokenType::Int,
|
||||
"if" => TokenType::If,
|
||||
"else" => TokenType::Else,
|
||||
"while" => TokenType::While,
|
||||
"return" => TokenType::Return,
|
||||
"include" => TokenType::Include,
|
||||
_ => TokenType::Identifier(ident),
|
||||
}
|
||||
} else {
|
||||
match ch {
|
||||
':' if self.peek(1) == Some(':') => {
|
||||
self.advance();
|
||||
self.advance();
|
||||
TokenType::Namespace
|
||||
}
|
||||
':' => {
|
||||
self.advance();
|
||||
TokenType::Colon
|
||||
}
|
||||
'=' if self.peek(1) == Some('=') => {
|
||||
self.advance();
|
||||
self.advance();
|
||||
TokenType::Eq
|
||||
}
|
||||
'!' if self.peek(1) == Some('=') => {
|
||||
self.advance();
|
||||
self.advance();
|
||||
TokenType::Ne
|
||||
}
|
||||
'<' if self.peek(1) == Some('=') => {
|
||||
self.advance();
|
||||
self.advance();
|
||||
TokenType::Le
|
||||
}
|
||||
'>' if self.peek(1) == Some('=') => {
|
||||
self.advance();
|
||||
self.advance();
|
||||
TokenType::Ge
|
||||
}
|
||||
'+' => {
|
||||
self.advance();
|
||||
TokenType::Plus
|
||||
}
|
||||
'-' => {
|
||||
self.advance();
|
||||
TokenType::Minus
|
||||
}
|
||||
'*' => {
|
||||
self.advance();
|
||||
TokenType::Star
|
||||
}
|
||||
'/' => {
|
||||
self.advance();
|
||||
TokenType::Slash
|
||||
}
|
||||
'=' => {
|
||||
self.advance();
|
||||
TokenType::Assign
|
||||
}
|
||||
'<' => {
|
||||
self.advance();
|
||||
TokenType::Lt
|
||||
}
|
||||
'>' => {
|
||||
self.advance();
|
||||
TokenType::Gt
|
||||
}
|
||||
'(' => {
|
||||
self.advance();
|
||||
TokenType::LParen
|
||||
}
|
||||
')' => {
|
||||
self.advance();
|
||||
TokenType::RParen
|
||||
}
|
||||
'{' => {
|
||||
self.advance();
|
||||
TokenType::LBrace
|
||||
}
|
||||
'}' => {
|
||||
self.advance();
|
||||
TokenType::RBrace
|
||||
}
|
||||
';' => {
|
||||
self.advance();
|
||||
TokenType::Semicolon
|
||||
}
|
||||
',' => {
|
||||
self.advance();
|
||||
TokenType::Comma
|
||||
}
|
||||
_ => return Err(self.error(&format!("Unexpected character: {}", ch))),
|
||||
}
|
||||
};
|
||||
|
||||
tokens.push(Token::new(token_type, line, col));
|
||||
}
|
||||
|
||||
tokens.push(Token::new(TokenType::Eof, self.line, self.col));
|
||||
Ok(tokens)
|
||||
}
|
||||
}
|
||||
@@ -1,74 +0,0 @@
|
||||
use std::fmt;
|
||||
|
||||
use crate::{codegen::CodeGenerator, lexer::Lexer, parser::Parser};
|
||||
|
||||
// mod assembly;
|
||||
pub mod codegen;
|
||||
pub mod lexer;
|
||||
pub mod parser;
|
||||
mod registers;
|
||||
|
||||
// ============================================================================
|
||||
// Main & Tests
|
||||
// ============================================================================
|
||||
|
||||
fn main() {
|
||||
// read from input file: syntax "c_compiler <src.c> [output.dsa]"
|
||||
let args: Vec<String> = std::env::args().collect();
|
||||
if args.len() < 2 {
|
||||
eprintln!("Usage: c_compiler <src.c> [output.dsa]");
|
||||
return;
|
||||
}
|
||||
|
||||
let input_file = &args[1];
|
||||
let output_file = if args.len() > 2 {
|
||||
&args[2]
|
||||
} else {
|
||||
"output.dsa"
|
||||
};
|
||||
|
||||
// read input
|
||||
let input = std::fs::read_to_string(input_file).expect("Failed to read input file");
|
||||
|
||||
// Lexing
|
||||
let mut lexer = Lexer::new(&input);
|
||||
let tokens = match lexer.tokenize() {
|
||||
Ok(tokens) => tokens,
|
||||
Err(e) => {
|
||||
eprintln!("Lexing error: {}", e);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
println!("Tokens:");
|
||||
for token in &tokens {
|
||||
println!(" {:?}", token.token_type);
|
||||
}
|
||||
println!();
|
||||
|
||||
// Parsing
|
||||
let mut parser = Parser::new(tokens);
|
||||
let ast = match parser.parse() {
|
||||
Ok(ast) => ast,
|
||||
Err(e) => {
|
||||
eprintln!("Parsing error: {}", e);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
println!("AST:");
|
||||
println!("{:#?}", ast);
|
||||
|
||||
// Code Gen
|
||||
let mut generator = CodeGenerator::new(ast);
|
||||
let result = match generator.generate() {
|
||||
Ok(code) => code,
|
||||
Err(e) => {
|
||||
eprintln!("Parsing error: {}", e);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
std::fs::write(output_file, &result).expect("Failed to write output");
|
||||
println!("Result written to {}", output_file);
|
||||
}
|
||||
@@ -1,610 +0,0 @@
|
||||
// ============================================================================
|
||||
// AST Node Types
|
||||
// ============================================================================
|
||||
|
||||
use std::fmt;
|
||||
|
||||
use crate::lexer::{Token, TokenType};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Program {
|
||||
pub declarations: Vec<Declaration>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Declaration {
|
||||
Function {
|
||||
name: String,
|
||||
return_type: Type,
|
||||
params: Vec<Parameter>,
|
||||
body: Block,
|
||||
},
|
||||
Variable {
|
||||
name: String,
|
||||
init: Option<ConstExpr>,
|
||||
},
|
||||
Import {
|
||||
name: String,
|
||||
path: String,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Parameter {
|
||||
pub name: String,
|
||||
pub param_type: Type,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Type {
|
||||
Int,
|
||||
Long,
|
||||
Float,
|
||||
Double,
|
||||
Char,
|
||||
Void,
|
||||
Ptr(Box<Type>),
|
||||
Array(Box<Type>, usize),
|
||||
Struct(String),
|
||||
}
|
||||
|
||||
pub type Block = Vec<Statement>;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Statement {
|
||||
Block(Block),
|
||||
Assign {
|
||||
// left side
|
||||
name: String,
|
||||
declare_type: Option<Type>,
|
||||
|
||||
// right side
|
||||
value: Option<Box<Expression>>,
|
||||
},
|
||||
Expression {
|
||||
expr: Expression,
|
||||
},
|
||||
If {
|
||||
condition: Expression,
|
||||
then_stmt: Block,
|
||||
else_stmt: Block,
|
||||
},
|
||||
While {
|
||||
condition: Expression,
|
||||
body: Vec<Statement>,
|
||||
},
|
||||
Return {
|
||||
expr: Option<Expression>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ConstExpr {
|
||||
Number(i32),
|
||||
String(String),
|
||||
}
|
||||
|
||||
impl fmt::Display for ConstExpr {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
ConstExpr::Number(n) => write!(f, "{}", n),
|
||||
ConstExpr::String(s) => write!(f, "\"{}\"", s),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Expression {
|
||||
Empty,
|
||||
Binary {
|
||||
op: BinaryOperator,
|
||||
left: Box<Expression>,
|
||||
right: Box<Expression>,
|
||||
},
|
||||
Unary {
|
||||
op: UnaryOperator,
|
||||
operand: Box<Expression>,
|
||||
},
|
||||
Variable {
|
||||
name: String,
|
||||
expr_type: Option<Type>,
|
||||
},
|
||||
Number {
|
||||
value: i32,
|
||||
},
|
||||
Call {
|
||||
name: String,
|
||||
args: Vec<Expression>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum BinaryOperator {
|
||||
Add,
|
||||
Sub,
|
||||
Mul,
|
||||
Div,
|
||||
Eq,
|
||||
Ne,
|
||||
Lt,
|
||||
Gt,
|
||||
Le,
|
||||
Ge,
|
||||
}
|
||||
|
||||
impl fmt::Display for BinaryOperator {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
BinaryOperator::Add => write!(f, "+"),
|
||||
BinaryOperator::Sub => write!(f, "-"),
|
||||
BinaryOperator::Mul => write!(f, "*"),
|
||||
BinaryOperator::Div => write!(f, "/"),
|
||||
BinaryOperator::Eq => write!(f, "=="),
|
||||
BinaryOperator::Ne => write!(f, "!="),
|
||||
BinaryOperator::Lt => write!(f, "<"),
|
||||
BinaryOperator::Gt => write!(f, ">"),
|
||||
BinaryOperator::Le => write!(f, "<="),
|
||||
BinaryOperator::Ge => write!(f, ">="),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum UnaryOperator {
|
||||
Plus,
|
||||
Minus,
|
||||
}
|
||||
|
||||
impl fmt::Display for UnaryOperator {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
UnaryOperator::Plus => write!(f, "+"),
|
||||
UnaryOperator::Minus => write!(f, "-"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Parser
|
||||
// ============================================================================
|
||||
|
||||
pub struct Parser {
|
||||
tokens: Vec<Token>,
|
||||
pos: usize,
|
||||
}
|
||||
|
||||
impl Parser {
|
||||
pub fn new(tokens: Vec<Token>) -> Self {
|
||||
Self { tokens, pos: 0 }
|
||||
}
|
||||
|
||||
fn error(&self, msg: &str) -> String {
|
||||
let token = self.current();
|
||||
format!(
|
||||
"Parser error at line {}, col {}: {}",
|
||||
token.line, token.col, msg
|
||||
)
|
||||
}
|
||||
|
||||
fn current(&self) -> &Token {
|
||||
self.tokens
|
||||
.get(self.pos)
|
||||
.unwrap_or_else(|| self.tokens.last().unwrap())
|
||||
}
|
||||
|
||||
fn peek(&self, offset: usize) -> &Token {
|
||||
self.tokens
|
||||
.get(self.pos + offset)
|
||||
.unwrap_or_else(|| self.tokens.last().unwrap())
|
||||
}
|
||||
|
||||
fn advance(&mut self) -> &Token {
|
||||
if self.pos < self.tokens.len() - 1 {
|
||||
self.pos += 1;
|
||||
}
|
||||
self.current()
|
||||
}
|
||||
|
||||
fn expect(&mut self, expected: TokenType) -> Result<Token, String> {
|
||||
let token = self.current().clone();
|
||||
if std::mem::discriminant(&token.token_type) != std::mem::discriminant(&expected)
|
||||
{
|
||||
return Err(self.error(&format!(
|
||||
"Expected {:?}, got {:?}",
|
||||
expected, token.token_type
|
||||
)));
|
||||
}
|
||||
self.advance();
|
||||
Ok(token)
|
||||
}
|
||||
|
||||
pub fn parse(&mut self) -> Result<Program, String> {
|
||||
let mut declarations = Vec::new();
|
||||
|
||||
while !matches!(self.current().token_type, TokenType::Eof) {
|
||||
declarations.push(self.parse_declaration()?);
|
||||
}
|
||||
|
||||
Ok(Program { declarations })
|
||||
}
|
||||
|
||||
fn parse_declaration(&mut self) -> Result<Declaration, String> {
|
||||
// check for an import
|
||||
if let TokenType::Include = self.current().token_type {
|
||||
self.advance();
|
||||
|
||||
let name =
|
||||
if let TokenType::Identifier(id) = self.current().clone().token_type {
|
||||
Some(id)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
.ok_or(String::from("Expected identifier"))?;
|
||||
|
||||
self.advance();
|
||||
self.expect(TokenType::Colon)?;
|
||||
|
||||
let path = if let TokenType::String(id) = self.current().clone().token_type {
|
||||
Some(id)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
.ok_or(String::from("Expected string literal"))?;
|
||||
|
||||
self.advance();
|
||||
return Ok(Declaration::Import { name, path });
|
||||
}
|
||||
|
||||
self.expect(TokenType::Int)?;
|
||||
|
||||
let name = match &self.current().token_type {
|
||||
TokenType::Identifier(s) => s.clone(),
|
||||
_ => return Err(self.error("Expected identifier")),
|
||||
};
|
||||
self.advance();
|
||||
|
||||
match &self.current().token_type {
|
||||
TokenType::LParen => {
|
||||
// Function declaration
|
||||
self.advance();
|
||||
let mut params = Vec::<Parameter>::new();
|
||||
|
||||
if !matches!(self.current().token_type, TokenType::RParen) {
|
||||
self.expect(TokenType::Int)?;
|
||||
|
||||
match &self.current().token_type {
|
||||
TokenType::Identifier(s) => {
|
||||
params.push(Parameter {
|
||||
name: s.clone(),
|
||||
param_type: Type::Int,
|
||||
});
|
||||
self.advance();
|
||||
}
|
||||
_ => return Err(self.error("Expected parameter name")),
|
||||
}
|
||||
|
||||
while matches!(self.current().token_type, TokenType::Comma) {
|
||||
self.advance();
|
||||
self.expect(TokenType::Int)?;
|
||||
|
||||
match &self.current().token_type {
|
||||
TokenType::Identifier(s) => {
|
||||
params.push(Parameter {
|
||||
name: s.clone(),
|
||||
param_type: Type::Int,
|
||||
});
|
||||
self.advance();
|
||||
}
|
||||
_ => return Err(self.error("Expected parameter name")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.expect(TokenType::RParen)?;
|
||||
let body = self.parse_block()?;
|
||||
|
||||
Ok(Declaration::Function {
|
||||
name,
|
||||
params,
|
||||
body,
|
||||
return_type: Type::Int,
|
||||
})
|
||||
}
|
||||
_ => {
|
||||
// Variable declaration
|
||||
let init = if matches!(self.current().token_type, TokenType::Assign) {
|
||||
self.advance();
|
||||
|
||||
if let TokenType::Number(n) = self.current().token_type {
|
||||
self.advance();
|
||||
Some(ConstExpr::Number(n))
|
||||
} else {
|
||||
return Err(self
|
||||
.error("Expected constant in global variable declaration"));
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
self.expect(TokenType::Semicolon)?;
|
||||
Ok(Declaration::Variable { name, init })
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_block(&mut self) -> Result<Block, String> {
|
||||
self.expect(TokenType::LBrace)?;
|
||||
let mut statements = Vec::new();
|
||||
|
||||
while !matches!(self.current().token_type, TokenType::RBrace) {
|
||||
statements.push(self.parse_statement()?);
|
||||
}
|
||||
|
||||
self.expect(TokenType::RBrace)?;
|
||||
Ok(statements)
|
||||
}
|
||||
|
||||
fn parse_statement(&mut self) -> Result<Statement, String> {
|
||||
match &self.current().token_type {
|
||||
TokenType::LBrace => Ok(Statement::Block(self.parse_block()?)),
|
||||
TokenType::If => self.parse_if_stmt(),
|
||||
TokenType::While => self.parse_while_stmt(),
|
||||
TokenType::Return => self.parse_return_stmt(),
|
||||
TokenType::Identifier(name) => {
|
||||
let name = name.clone();
|
||||
|
||||
// peek ahead for open paren (func call expr)
|
||||
if matches!(self.peek(1).token_type, TokenType::LParen) {
|
||||
let expr = self.parse_expression()?; // a function call expr
|
||||
self.expect(TokenType::Semicolon)?;
|
||||
return Ok(Statement::Expression { expr });
|
||||
}
|
||||
|
||||
self.advance(); // advance past identifier
|
||||
|
||||
// assignment expression
|
||||
if matches!(self.current().token_type, TokenType::Assign) {
|
||||
self.advance();
|
||||
let expr = self.parse_expression()?;
|
||||
|
||||
self.expect(TokenType::Semicolon)?;
|
||||
Ok(Statement::Assign {
|
||||
name,
|
||||
value: Some(Box::new(expr)),
|
||||
declare_type: None,
|
||||
})
|
||||
}
|
||||
// var expression
|
||||
else {
|
||||
self.expect(TokenType::Semicolon)?;
|
||||
Ok(Statement::Expression {
|
||||
expr: Expression::Variable {
|
||||
name,
|
||||
expr_type: None,
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
TokenType::Int => {
|
||||
// Local variable declaration
|
||||
self.advance();
|
||||
let name = match &self.current().token_type {
|
||||
TokenType::Identifier(s) => s.clone(),
|
||||
_ => return Err(self.error("Expected variable name")),
|
||||
};
|
||||
self.advance();
|
||||
|
||||
let init = if matches!(self.current().token_type, TokenType::Assign) {
|
||||
self.advance();
|
||||
Some(self.parse_expression()?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
self.expect(TokenType::Semicolon)?;
|
||||
|
||||
// Convert to assignment expression statement
|
||||
let expr = if let Some(init_expr) = init {
|
||||
Statement::Assign {
|
||||
name,
|
||||
value: Some(Box::new(init_expr)),
|
||||
declare_type: Some(Type::Int),
|
||||
}
|
||||
} else {
|
||||
Statement::Assign {
|
||||
name,
|
||||
value: None,
|
||||
declare_type: Some(Type::Int),
|
||||
}
|
||||
};
|
||||
|
||||
Ok(expr)
|
||||
}
|
||||
_ => {
|
||||
let expr = if matches!(self.current().token_type, TokenType::Semicolon) {
|
||||
Expression::Empty
|
||||
} else {
|
||||
self.parse_expression()?
|
||||
};
|
||||
|
||||
self.expect(TokenType::Semicolon)?;
|
||||
Ok(Statement::Expression { expr })
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_if_stmt(&mut self) -> Result<Statement, String> {
|
||||
self.expect(TokenType::If)?;
|
||||
self.expect(TokenType::LParen)?;
|
||||
let condition = self.parse_expression()?;
|
||||
self.expect(TokenType::RParen)?;
|
||||
let then_stmt = self.parse_block()?;
|
||||
|
||||
let else_stmt = if matches!(self.current().token_type, TokenType::Else) {
|
||||
self.advance();
|
||||
self.parse_block()?
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
Ok(Statement::If {
|
||||
condition,
|
||||
then_stmt,
|
||||
else_stmt,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_while_stmt(&mut self) -> Result<Statement, String> {
|
||||
self.expect(TokenType::While)?;
|
||||
self.expect(TokenType::LParen)?;
|
||||
let condition = self.parse_expression()?;
|
||||
self.expect(TokenType::RParen)?;
|
||||
let body = self.parse_block()?;
|
||||
|
||||
Ok(Statement::While { condition, body })
|
||||
}
|
||||
|
||||
fn parse_return_stmt(&mut self) -> Result<Statement, String> {
|
||||
self.expect(TokenType::Return)?;
|
||||
|
||||
let expr = if matches!(self.current().token_type, TokenType::Semicolon) {
|
||||
None
|
||||
} else {
|
||||
Some(self.parse_expression()?)
|
||||
};
|
||||
|
||||
self.expect(TokenType::Semicolon)?;
|
||||
Ok(Statement::Return { expr })
|
||||
}
|
||||
|
||||
fn parse_expression(&mut self) -> Result<Expression, String> {
|
||||
self.parse_comparison()
|
||||
}
|
||||
|
||||
fn parse_comparison(&mut self) -> Result<Expression, String> {
|
||||
let mut expr = self.parse_additive()?;
|
||||
|
||||
while let Some(op) = match &self.current().token_type {
|
||||
TokenType::Eq => Some(BinaryOperator::Eq),
|
||||
TokenType::Ne => Some(BinaryOperator::Ne),
|
||||
TokenType::Lt => Some(BinaryOperator::Lt),
|
||||
TokenType::Gt => Some(BinaryOperator::Gt),
|
||||
TokenType::Le => Some(BinaryOperator::Le),
|
||||
TokenType::Ge => Some(BinaryOperator::Ge),
|
||||
_ => None,
|
||||
} {
|
||||
self.advance();
|
||||
let right = Box::new(self.parse_additive()?);
|
||||
expr = Expression::Binary {
|
||||
op,
|
||||
left: Box::new(expr),
|
||||
right,
|
||||
};
|
||||
}
|
||||
|
||||
Ok(expr)
|
||||
}
|
||||
|
||||
fn parse_additive(&mut self) -> Result<Expression, String> {
|
||||
let mut expr = self.parse_multiplicative()?;
|
||||
|
||||
while let Some(op) = match &self.current().token_type {
|
||||
TokenType::Plus => Some(BinaryOperator::Add),
|
||||
TokenType::Minus => Some(BinaryOperator::Sub),
|
||||
_ => None,
|
||||
} {
|
||||
self.advance();
|
||||
let right = Box::new(self.parse_multiplicative()?);
|
||||
expr = Expression::Binary {
|
||||
op,
|
||||
left: Box::new(expr),
|
||||
right,
|
||||
};
|
||||
}
|
||||
|
||||
Ok(expr)
|
||||
}
|
||||
|
||||
fn parse_multiplicative(&mut self) -> Result<Expression, String> {
|
||||
let mut expr = self.parse_unary()?;
|
||||
|
||||
while let Some(op) = match &self.current().token_type {
|
||||
TokenType::Star => Some(BinaryOperator::Mul),
|
||||
TokenType::Slash => Some(BinaryOperator::Div),
|
||||
_ => None,
|
||||
} {
|
||||
self.advance();
|
||||
let right = Box::new(self.parse_unary()?);
|
||||
expr = Expression::Binary {
|
||||
op,
|
||||
left: Box::new(expr),
|
||||
right,
|
||||
};
|
||||
}
|
||||
|
||||
Ok(expr)
|
||||
}
|
||||
|
||||
fn parse_unary(&mut self) -> Result<Expression, String> {
|
||||
let op = match &self.current().token_type {
|
||||
TokenType::Plus => Some(UnaryOperator::Plus),
|
||||
TokenType::Minus => Some(UnaryOperator::Minus),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
if let Some(op) = op {
|
||||
self.advance();
|
||||
let operand = Box::new(self.parse_unary()?);
|
||||
return Ok(Expression::Unary { op, operand });
|
||||
}
|
||||
|
||||
self.parse_primary()
|
||||
}
|
||||
|
||||
fn parse_primary(&mut self) -> Result<Expression, String> {
|
||||
match &self.current().token_type.clone() {
|
||||
TokenType::Number(n) => {
|
||||
let value = *n;
|
||||
self.advance();
|
||||
Ok(Expression::Number { value })
|
||||
}
|
||||
TokenType::Identifier(name) => {
|
||||
let name = name.clone();
|
||||
self.advance();
|
||||
|
||||
if matches!(self.current().token_type, TokenType::LParen) {
|
||||
// Function call
|
||||
self.advance();
|
||||
let mut args = Vec::new();
|
||||
|
||||
if !matches!(self.current().token_type, TokenType::RParen) {
|
||||
args.push(self.parse_expression()?);
|
||||
|
||||
while matches!(self.current().token_type, TokenType::Comma) {
|
||||
self.advance();
|
||||
args.push(self.parse_expression()?);
|
||||
}
|
||||
}
|
||||
|
||||
self.expect(TokenType::RParen)?;
|
||||
Ok(Expression::Call { name, args })
|
||||
} else {
|
||||
Ok(Expression::Variable {
|
||||
name,
|
||||
expr_type: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
TokenType::LParen => {
|
||||
self.advance();
|
||||
let expr = self.parse_expression()?;
|
||||
self.expect(TokenType::RParen)?;
|
||||
Ok(expr)
|
||||
}
|
||||
_ => Err(self.error(&format!(
|
||||
"Unexpected token: {:?}",
|
||||
self.current().token_type
|
||||
))),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,344 +0,0 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Register allocator for DSA assembly generation
|
||||
/// Manages general-purpose registers (rg0-rgf) and handles stack spilling
|
||||
pub struct RegisterAllocator {
|
||||
/// Available general-purpose registers
|
||||
available_registers: Vec<String>,
|
||||
|
||||
/// Maps variable names to their current location (register or stack offset)
|
||||
variable_locations: HashMap<String, Location>,
|
||||
|
||||
/// Maps registers to the variables they currently hold
|
||||
register_contents: HashMap<String, String>,
|
||||
|
||||
/// Current stack offset for local variables (relative to bpr)
|
||||
/// Starts at -4 (going downward from base pointer)
|
||||
stack_offset: i32,
|
||||
|
||||
/// Track which registers are currently in use
|
||||
in_use: HashMap<String, bool>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Location {
|
||||
Register(String),
|
||||
Stack(i32), // offset from bpr
|
||||
}
|
||||
|
||||
impl RegisterAllocator {
|
||||
pub fn new() -> Self {
|
||||
// Initialize with available GP registers (rg0-rgf = 16 registers)
|
||||
let registers = vec![
|
||||
"rg0", "rg1", "rg2", "rg3", "rg4", "rg5", "rg6", "rg7", "rg8", "rg9", "rga",
|
||||
"rgb", "rgc", "rgd", "rge", "rgf",
|
||||
]
|
||||
.into_iter()
|
||||
.map(String::from)
|
||||
.collect();
|
||||
|
||||
RegisterAllocator {
|
||||
available_registers: registers,
|
||||
variable_locations: HashMap::new(),
|
||||
register_contents: HashMap::new(),
|
||||
stack_offset: -4, // Start at -4 (first local below saved bpr)
|
||||
in_use: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Allocate a temporary register for expression evaluation
|
||||
/// Returns the register name and optionally assembly code to save it
|
||||
pub fn alloc_temp(&mut self) -> Result<(String, Vec<String>), String> {
|
||||
let mut code = Vec::new();
|
||||
|
||||
// Try to find an unused register
|
||||
for reg in &self.available_registers {
|
||||
if !self.in_use.get(reg).unwrap_or(&false) {
|
||||
self.in_use.insert(reg.clone(), true);
|
||||
return Ok((reg.clone(), code));
|
||||
}
|
||||
}
|
||||
|
||||
// All registers in use - need to spill one
|
||||
// Choose the first register with a variable we can spill
|
||||
// Find a register to spill
|
||||
let reg_to_spill = self
|
||||
.available_registers
|
||||
.iter()
|
||||
.find(|reg| self.register_contents.contains_key(*reg))
|
||||
.cloned();
|
||||
|
||||
if let Some(reg) = reg_to_spill {
|
||||
// Spill this variable to stack
|
||||
let spill_code = self.spill_register(®)?;
|
||||
code.extend(spill_code);
|
||||
|
||||
self.in_use.insert(reg.clone(), true);
|
||||
return Ok((reg, code));
|
||||
}
|
||||
|
||||
Err("No registers available and nothing to spill".to_string())
|
||||
}
|
||||
|
||||
/// Free a temporary register after use
|
||||
/// NOTE: This will NOT free registers that contain variables!
|
||||
/// Variables persist throughout their scope and must not be freed
|
||||
pub fn free_temp(&mut self, reg: &str) {
|
||||
// Check if this register contains a variable
|
||||
if self.register_contents.contains_key(reg) {
|
||||
// This register holds a variable - don't free it!
|
||||
// Variables are only freed when they go out of scope via free_var()
|
||||
return;
|
||||
}
|
||||
|
||||
// This is a true temporary - safe to free
|
||||
self.in_use.insert(reg.to_string(), false);
|
||||
}
|
||||
|
||||
/// Allocate a register for a named variable
|
||||
/// Returns the register and any necessary assembly code
|
||||
pub fn alloc_var(&mut self, var_name: &str) -> Result<(String, Vec<String>), String> {
|
||||
// Check if variable already has a location
|
||||
if let Some(location) = self.variable_locations.get(var_name).cloned() {
|
||||
match location {
|
||||
Location::Register(reg) => {
|
||||
return Ok((reg.clone(), Vec::new()));
|
||||
}
|
||||
Location::Stack(offset) => {
|
||||
// Variable is on stack, load it into a register
|
||||
let (reg, mut code) = self.alloc_temp()?;
|
||||
code.push(format!("\tldw bpr, {}, {}", reg, offset));
|
||||
|
||||
// Update location to register
|
||||
self.variable_locations
|
||||
.insert(var_name.to_string(), Location::Register(reg.clone()));
|
||||
self.register_contents
|
||||
.insert(reg.clone(), var_name.to_string());
|
||||
|
||||
return Ok((reg, code));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Variable doesn't have a location yet, allocate a new register
|
||||
let (reg, code) = self.alloc_temp()?;
|
||||
self.variable_locations
|
||||
.insert(var_name.to_string(), Location::Register(reg.clone()));
|
||||
self.register_contents
|
||||
.insert(reg.clone(), var_name.to_string());
|
||||
|
||||
Ok((reg, code))
|
||||
}
|
||||
|
||||
/// Get the current location of a variable
|
||||
pub fn get_var_location(&self, var_name: &str) -> Option<&Location> {
|
||||
self.variable_locations.get(var_name)
|
||||
}
|
||||
|
||||
/// Load a variable into a register (allocating if necessary)
|
||||
/// Returns the register and assembly code to load it
|
||||
pub fn load_var(&mut self, var_name: &str) -> Result<(String, Vec<String>), String> {
|
||||
self.alloc_var(var_name)
|
||||
}
|
||||
|
||||
/// Store a value from a register into a variable
|
||||
/// Updates tracking and returns any necessary assembly code
|
||||
pub fn store_var(&mut self, var_name: &str, source_reg: &str) -> Vec<String> {
|
||||
let mut code = Vec::new();
|
||||
|
||||
// Check if variable already has a location
|
||||
if let Some(location) = self.variable_locations.get(var_name) {
|
||||
match location {
|
||||
Location::Register(dest_reg) => {
|
||||
if dest_reg != source_reg {
|
||||
code.push(format!("\tmov {}, {}", source_reg, dest_reg));
|
||||
}
|
||||
}
|
||||
Location::Stack(offset) => {
|
||||
code.push(format!("\tstw {}, bpr, {}", source_reg, offset));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Variable doesn't exist yet - try to allocate a register
|
||||
if let Some(free_reg) = self.find_free_register() {
|
||||
if &free_reg != source_reg {
|
||||
code.push(format!("\tmov {}, {}", source_reg, free_reg));
|
||||
}
|
||||
self.variable_locations
|
||||
.insert(var_name.to_string(), Location::Register(free_reg.clone()));
|
||||
self.register_contents
|
||||
.insert(free_reg.clone(), var_name.to_string());
|
||||
self.in_use.insert(free_reg, true);
|
||||
} else {
|
||||
// No free registers - allocate on stack
|
||||
code.push(format!("\tstw {}, bpr, {}", source_reg, self.stack_offset));
|
||||
self.variable_locations
|
||||
.insert(var_name.to_string(), Location::Stack(self.stack_offset));
|
||||
self.stack_offset -= 4; // Move to next stack slot
|
||||
}
|
||||
}
|
||||
|
||||
code
|
||||
}
|
||||
|
||||
/// Spill a register to the stack
|
||||
/// Returns assembly code to perform the spill
|
||||
fn spill_register(&mut self, reg: &str) -> Result<Vec<String>, String> {
|
||||
let mut code = Vec::new();
|
||||
|
||||
if let Some(var_name) = self.register_contents.get(reg).cloned() {
|
||||
// Store register content to stack
|
||||
code.push(format!("\tstw {}, bpr, {}", reg, self.stack_offset));
|
||||
|
||||
// Update variable location
|
||||
self.variable_locations
|
||||
.insert(var_name.clone(), Location::Stack(self.stack_offset));
|
||||
|
||||
// Remove from register tracking
|
||||
self.register_contents.remove(reg);
|
||||
|
||||
// Move to next stack slot
|
||||
self.stack_offset -= 4;
|
||||
}
|
||||
|
||||
Ok(code)
|
||||
}
|
||||
|
||||
/// Find a free register (not currently in use)
|
||||
fn find_free_register(&self) -> Option<String> {
|
||||
for reg in &self.available_registers {
|
||||
if !self.in_use.get(reg).unwrap_or(&false) {
|
||||
return Some(reg.clone());
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Spill all registers to stack (useful before function calls)
|
||||
pub fn spill_all(&mut self) -> Vec<String> {
|
||||
let mut code = Vec::new();
|
||||
|
||||
let regs_to_spill: Vec<String> = self.register_contents.keys().cloned().collect();
|
||||
|
||||
for reg in regs_to_spill {
|
||||
if let Ok(spill_code) = self.spill_register(®) {
|
||||
code.extend(spill_code);
|
||||
}
|
||||
}
|
||||
|
||||
code
|
||||
}
|
||||
|
||||
/// Get the total stack space needed for local variables
|
||||
pub fn get_stack_size(&self) -> i32 {
|
||||
-self.stack_offset // Convert negative offset to positive size
|
||||
}
|
||||
|
||||
/// Reset allocator for a new function
|
||||
pub fn reset(&mut self) {
|
||||
self.variable_locations.clear();
|
||||
self.register_contents.clear();
|
||||
self.stack_offset = -4;
|
||||
self.in_use.clear();
|
||||
}
|
||||
|
||||
/// Mark a variable as dead (no longer needed)
|
||||
/// Frees its register if it's in one
|
||||
pub fn free_var(&mut self, var_name: &str) {
|
||||
if let Some(Location::Register(reg)) = self.variable_locations.get(var_name) {
|
||||
let reg = reg.clone();
|
||||
self.register_contents.remove(®);
|
||||
self.in_use.insert(reg, false);
|
||||
}
|
||||
self.variable_locations.remove(var_name);
|
||||
}
|
||||
|
||||
/// Get list of registers that contain variables and are in use
|
||||
/// These need to be saved before function calls
|
||||
pub fn get_caller_saved_registers(&self) -> Vec<String> {
|
||||
self.register_contents
|
||||
.iter()
|
||||
.filter(|(reg, _)| *self.in_use.get(*reg).unwrap_or(&false))
|
||||
.map(|(reg, _)| reg.clone())
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Save caller-saved registers before a function call
|
||||
/// Returns assembly code to save them
|
||||
pub fn save_caller_saved(&mut self) -> Vec<String> {
|
||||
let mut code = Vec::new();
|
||||
|
||||
// For simplicity, save all currently used registers
|
||||
// In a more sophisticated compiler, you'd only save registers that are live
|
||||
for (reg, var_name) in self.register_contents.clone() {
|
||||
if *self.in_use.get(®).unwrap_or(&false) {
|
||||
code.push(format!("\tpush {}", reg));
|
||||
}
|
||||
}
|
||||
|
||||
code
|
||||
}
|
||||
|
||||
/// Restore caller-saved registers after a function call
|
||||
/// Returns assembly code to restore them
|
||||
pub fn restore_caller_saved(&mut self, saved_regs: &[String]) -> Vec<String> {
|
||||
let mut code = Vec::new();
|
||||
|
||||
// Restore in reverse order (LIFO)
|
||||
for reg in saved_regs.iter().rev() {
|
||||
code.push(format!("\tpop {}", reg));
|
||||
}
|
||||
|
||||
code
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_basic_allocation() {
|
||||
let mut allocator = RegisterAllocator::new();
|
||||
|
||||
let (reg1, code1) = allocator.alloc_temp().unwrap();
|
||||
assert_eq!(code1.len(), 0); // No spill needed
|
||||
assert_eq!(reg1, "rg0");
|
||||
|
||||
let (reg2, code2) = allocator.alloc_temp().unwrap();
|
||||
assert_eq!(code2.len(), 0);
|
||||
assert_eq!(reg2, "rg1");
|
||||
|
||||
allocator.free_temp(®1);
|
||||
|
||||
let (reg3, code3) = allocator.alloc_temp().unwrap();
|
||||
assert_eq!(code3.len(), 0);
|
||||
assert_eq!(reg3, "rg0"); // Reuses freed register
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_variable_allocation() {
|
||||
let mut allocator = RegisterAllocator::new();
|
||||
|
||||
let (reg, _) = allocator.alloc_var("x").unwrap();
|
||||
assert_eq!(reg, "rg0");
|
||||
|
||||
// Requesting same variable again should return same register
|
||||
let (reg2, _) = allocator.alloc_var("x").unwrap();
|
||||
assert_eq!(reg2, "rg0");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stack_allocation() {
|
||||
let mut allocator = RegisterAllocator::new();
|
||||
|
||||
// Allocate all 16 registers
|
||||
for i in 0..16 {
|
||||
allocator.alloc_var(&format!("var{}", i)).unwrap();
|
||||
}
|
||||
|
||||
// Next allocation should spill to stack
|
||||
let (reg, code) = allocator.alloc_var("var16").unwrap();
|
||||
assert!(code.len() > 0); // Should have spill code
|
||||
}
|
||||
}
|
||||
@@ -1,756 +0,0 @@
|
||||
use std::collections::HashMap;
|
||||
use std::hash::Hash;
|
||||
use std::sync::LazyLock;
|
||||
use std::sync::atomic::AtomicU32;
|
||||
use std::time::SystemTime;
|
||||
|
||||
use chrono::{DateTime, Local};
|
||||
|
||||
use crate::registers::{Location, RegisterAllocator};
|
||||
use crate::{block, cmd, comment, dsa};
|
||||
|
||||
use crate::parser::{
|
||||
BinaryOperator, CompilerError, ConstExpr, Declaration, Dependency, Expression,
|
||||
Program, Statement, UnaryOperator, Variable,
|
||||
};
|
||||
|
||||
pub struct CodeGenerator {
|
||||
ast: Program,
|
||||
imports: HashMap<String, String>,
|
||||
globals: Vec<String>,
|
||||
functions: Vec<String>,
|
||||
symbols: Vec<String>,
|
||||
allocator: RegisterAllocator,
|
||||
}
|
||||
|
||||
static GLOBAL_METHODS: LazyLock<HashMap<&str, &str>> = LazyLock::new(|| {
|
||||
HashMap::from([
|
||||
// ("print", "print::print"),
|
||||
// ("println", "print::println"),
|
||||
// ("printnum", "print::print_num"),
|
||||
// ("print_space", "print::print_whitespace"),
|
||||
// ("print_newline", "print::print_newline"),
|
||||
// ("print_char", "print::print_byte"),
|
||||
// ("print_word", "print::print_word"),
|
||||
// ("print_hex", "print::print_hex_word"),
|
||||
])
|
||||
});
|
||||
|
||||
fn import(name: &str, path: &str) -> String {
|
||||
format!("include {name}: \"{}\"", path)
|
||||
}
|
||||
|
||||
impl CodeGenerator {
|
||||
const RET: &'static str = "\tjmp _ret";
|
||||
|
||||
pub fn new(ast: Program) -> Self {
|
||||
CodeGenerator {
|
||||
ast,
|
||||
imports: HashMap::new(),
|
||||
globals: Vec::new(),
|
||||
functions: Vec::new(),
|
||||
symbols: Vec::new(),
|
||||
allocator: RegisterAllocator::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn include(&mut self, name: &str, path: &str) {
|
||||
self.imports.insert(name.to_string(), path.to_string());
|
||||
}
|
||||
|
||||
fn is_global(&self, name: &str) -> bool {
|
||||
// Check if this variable is in the globals list
|
||||
self.globals
|
||||
.iter()
|
||||
.any(|g| g.contains(&format!("dw {}:", name)))
|
||||
}
|
||||
|
||||
pub fn generate(&mut self) -> Result<String, CompilerError> {
|
||||
// always include the print library for debugging!
|
||||
self.include("print", "./lib/io/print.dsa");
|
||||
|
||||
for block in self.ast.clone().declarations {
|
||||
match block {
|
||||
Declaration::Variable {
|
||||
var: Variable { name, .. },
|
||||
..
|
||||
} => self.symbols.push(name),
|
||||
Declaration::Function { name, .. } => self.symbols.push(name),
|
||||
Declaration::Dependency(Dependency { name, .. }) => {
|
||||
self.symbols.push(name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for block in self.ast.clone().declarations {
|
||||
self.generate_block(block.clone())?;
|
||||
}
|
||||
|
||||
self.generate_layout()
|
||||
}
|
||||
|
||||
fn generate_layout(&mut self) -> Result<String, CompilerError> {
|
||||
let datetime: DateTime<Local> = SystemTime::now().into();
|
||||
Ok(dsa![
|
||||
"",
|
||||
comment!("GENERATED BY DSC COMPILER"),
|
||||
comment!(format!(
|
||||
"Generated at {}",
|
||||
datetime.format("%Y-%m-%d %H:%M:%S")
|
||||
)),
|
||||
"",
|
||||
// imports
|
||||
comment!("Imports"),
|
||||
self.imports
|
||||
.iter()
|
||||
.map(|(k, v)| import(k, v))
|
||||
.collect::<Vec<String>>()
|
||||
.join("\n"),
|
||||
"",
|
||||
// reserved memory
|
||||
comment!("Globals & Reserved Memory"),
|
||||
self.globals.join("\n"),
|
||||
"",
|
||||
// entry point
|
||||
comment!("Entry Point"),
|
||||
"dw stack: 0x10000",
|
||||
"db message: \"Process Exited with code:\"",
|
||||
block! [ "_init"
|
||||
dsa![ldw stack, bpr],
|
||||
dsa![mov bpr, spr],
|
||||
dsa![push zero],
|
||||
dsa![call main],
|
||||
dsa![call print::print_newline],
|
||||
dsa![lwi message, rg0],
|
||||
dsa![push rg0],
|
||||
dsa![call print::print],
|
||||
dsa![pop zero],
|
||||
dsa![call print::print_hex_word],
|
||||
dsa![pop zero],
|
||||
dsa![hlt]
|
||||
],
|
||||
"",
|
||||
comment!("Return"),
|
||||
block! [ "_ret"
|
||||
dsa![mov bpr, spr],
|
||||
dsa![pop bpr],
|
||||
dsa![return]
|
||||
],
|
||||
comment!("Compiled Code Starts..."),
|
||||
// block! [ "main"
|
||||
// dsa![push bpr],
|
||||
// dsa![mov spr, bpr],
|
||||
// dsa![lwi 67, rg1],
|
||||
// dsa![stw rg1, spr, 8],
|
||||
// dsa![mov bpr, spr],
|
||||
// dsa![pop bpr],
|
||||
// dsa![return]
|
||||
// ],
|
||||
self.functions.join("\n"),
|
||||
])
|
||||
}
|
||||
|
||||
fn generate_global(&mut self, name: &str, init: Option<ConstExpr>) {
|
||||
self.globals.push(format!(
|
||||
"dw {}: {}",
|
||||
name,
|
||||
init.unwrap_or(ConstExpr::Number(0))
|
||||
))
|
||||
}
|
||||
|
||||
fn generate_block(&mut self, block: Declaration) -> Result<(), CompilerError> {
|
||||
match block {
|
||||
Declaration::Variable { var, init, .. } => {
|
||||
self.generate_global(&var.name, init)
|
||||
}
|
||||
Declaration::Function {
|
||||
name,
|
||||
return_type,
|
||||
params,
|
||||
body,
|
||||
} => {
|
||||
let func = self.generate_function(&name, ¶ms, &body).join("\n");
|
||||
|
||||
self.functions.push(format!("{func}\n"));
|
||||
}
|
||||
Declaration::Dependency(Dependency { name, path }) => {
|
||||
self.imports.insert(name, path);
|
||||
}
|
||||
};
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Example: Generate code for a function
|
||||
fn generate_function(
|
||||
&mut self,
|
||||
name: &str,
|
||||
params: &[Variable],
|
||||
body: &[Statement],
|
||||
) -> Vec<String> {
|
||||
let mut code = Vec::new();
|
||||
|
||||
// Reset allocator for new function
|
||||
self.allocator.reset();
|
||||
|
||||
// Function prologue
|
||||
code.push(format!("{}:", name));
|
||||
code.push("\tpush bpr".to_string());
|
||||
code.push("\tmov spr, bpr".to_string());
|
||||
code.push(String::new());
|
||||
|
||||
// Allocate parameters to registers or stack locations
|
||||
for (i, param) in params.iter().enumerate() {
|
||||
let offset = 8 + (i as i32 * 4); // Parameters start at bpr+8
|
||||
// Track that this parameter is at a stack location
|
||||
let (reg, load_code) = self.allocator.alloc_var(¶m.name).unwrap();
|
||||
code.extend(load_code);
|
||||
code.push(format!("\tldw bpr, {}, {}", reg, offset));
|
||||
}
|
||||
|
||||
// Generate code for function body
|
||||
for stmt in body {
|
||||
let stmt_code = self.generate_statement(stmt).unwrap();
|
||||
code.extend(stmt_code);
|
||||
}
|
||||
|
||||
// automatically return at function end
|
||||
if let Some(x) = code.last()
|
||||
&& x == Self::RET
|
||||
{
|
||||
} else {
|
||||
code.push(Self::RET.to_string());
|
||||
}
|
||||
|
||||
code
|
||||
}
|
||||
|
||||
// Example: Generate code for a statement
|
||||
fn generate_statement(
|
||||
&mut self,
|
||||
stmt: &Statement,
|
||||
) -> Result<Vec<String>, CompilerError> {
|
||||
let mut code = Vec::new();
|
||||
|
||||
match stmt {
|
||||
Statement::Declaration { var, value } => {
|
||||
if let Some(expr) = value {
|
||||
// Evaluate expression
|
||||
let (result_reg, expr_code) = self.generate_expression(expr, true)?;
|
||||
code.extend(expr_code);
|
||||
|
||||
// Store result in variable
|
||||
let store_code = self.allocator.store_var(&var.name, &result_reg);
|
||||
code.extend(store_code);
|
||||
|
||||
// Free temporary register
|
||||
self.allocator.free_temp(&result_reg);
|
||||
} else {
|
||||
// Just declaring variable without initialization
|
||||
self.allocator.alloc_var(&var.name)?;
|
||||
}
|
||||
}
|
||||
|
||||
Statement::Break => unimplemented!(),
|
||||
Statement::Continue => unimplemented!(),
|
||||
|
||||
Statement::PtrWrite { ptr, value } => {
|
||||
let (result_reg, expr_code) = self.generate_expression(value, true)?;
|
||||
code.extend(expr_code);
|
||||
|
||||
let (ptr_reg, ptr_code) = self.generate_expression(ptr, true)?;
|
||||
code.extend(ptr_code);
|
||||
|
||||
code.push(format!("\tstw {}, {}", result_reg, ptr_reg));
|
||||
|
||||
self.allocator.free_temp(&result_reg);
|
||||
self.allocator.free_temp(&ptr_reg);
|
||||
}
|
||||
|
||||
Statement::Assign { varname, value } => {
|
||||
// Evaluate expression
|
||||
let (result_reg, expr_code) = self.generate_expression(value, true)?;
|
||||
code.extend(expr_code);
|
||||
|
||||
// Check if this is a global variable
|
||||
if self.is_global(varname) {
|
||||
// Store to global label
|
||||
code.push(format!("\tstw {}, {}", result_reg, varname));
|
||||
} else {
|
||||
// Store result in local variable
|
||||
let store_code = self.allocator.store_var(varname, &result_reg);
|
||||
code.extend(store_code);
|
||||
}
|
||||
|
||||
// Free temporary register
|
||||
self.allocator.free_temp(&result_reg);
|
||||
}
|
||||
|
||||
Statement::Return(expr) => {
|
||||
if let Some(e) = expr {
|
||||
let (result_reg, expr_code) = self.generate_expression(e, true)?;
|
||||
code.extend(expr_code);
|
||||
code.push(format!("\tstw {}, bpr, 8", result_reg));
|
||||
code.push(format!("\tjmp _ret"));
|
||||
self.allocator.free_temp(&result_reg);
|
||||
}
|
||||
}
|
||||
|
||||
Statement::If {
|
||||
condition,
|
||||
then_stmt,
|
||||
else_stmt,
|
||||
} => {
|
||||
// Generate condition
|
||||
let (cond_reg, cond_code) = self.generate_expression(condition, true)?;
|
||||
code.extend(cond_code);
|
||||
|
||||
// Compare with zero
|
||||
code.push(format!("\tcmp {}, zero", cond_reg));
|
||||
self.allocator.free_temp(&cond_reg);
|
||||
|
||||
// Generate unique labels
|
||||
let then_label = format!("_then_{}", self.get_unique_label());
|
||||
let else_label = format!("_else_{}", self.get_unique_label());
|
||||
let end_label = format!("_end_{}", self.get_unique_label());
|
||||
|
||||
// Jump to else if condition is false (equal to zero)
|
||||
code.push(format!("\tjeq {}", else_label));
|
||||
|
||||
// Then block
|
||||
code.push(format!("{}:", then_label));
|
||||
for s in then_stmt {
|
||||
code.extend(self.generate_statement(s)?);
|
||||
}
|
||||
|
||||
if then_stmt.len() == 0 {
|
||||
code.push("\tnop".to_string());
|
||||
}
|
||||
|
||||
code.push(format!("\tjmp {}", end_label));
|
||||
|
||||
// Else block
|
||||
code.push(format!("{}:", else_label));
|
||||
for s in else_stmt {
|
||||
code.extend(self.generate_statement(s)?);
|
||||
}
|
||||
|
||||
if else_stmt.len() == 0 {
|
||||
code.push("\tnop".to_string());
|
||||
}
|
||||
|
||||
code.push(format!("{}:", end_label));
|
||||
}
|
||||
|
||||
Statement::While { condition, body } => {
|
||||
let loop_start = format!("_while_start_{}", self.get_unique_label());
|
||||
let loop_end = format!("_while_end_{}", self.get_unique_label());
|
||||
|
||||
code.push(format!("{}:", loop_start));
|
||||
|
||||
// Generate condition
|
||||
let (cond_reg, cond_code) = self.generate_expression(condition, true)?;
|
||||
code.extend(cond_code);
|
||||
|
||||
code.push(format!("\tcmp {}, zero", cond_reg));
|
||||
self.allocator.free_temp(&cond_reg);
|
||||
|
||||
code.push(format!("\tjeq {}", loop_end));
|
||||
|
||||
// Loop body
|
||||
for s in body {
|
||||
code.extend(self.generate_statement(s)?);
|
||||
}
|
||||
|
||||
code.push(format!("\tjmp {}", loop_start));
|
||||
code.push(format!("{}:", loop_end));
|
||||
}
|
||||
|
||||
Statement::Loop(body) => {
|
||||
let loop_start = format!("_loop_start_{}", self.get_unique_label());
|
||||
|
||||
code.push(format!("{}:", loop_start));
|
||||
|
||||
for s in body {
|
||||
code.extend(self.generate_statement(s)?);
|
||||
}
|
||||
|
||||
code.push(format!("\tjmp {}", loop_start));
|
||||
}
|
||||
|
||||
Statement::Expression { expr } => {
|
||||
let (result_reg, expr_code) = self.generate_expression(expr, false)?;
|
||||
code.extend(expr_code);
|
||||
self.allocator.free_temp(&result_reg);
|
||||
}
|
||||
|
||||
Statement::Block(statements) => {
|
||||
for s in statements {
|
||||
code.extend(self.generate_statement(s)?);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(code)
|
||||
}
|
||||
|
||||
// Example: Generate code for an expression
|
||||
// Returns (register containing result, assembly code)
|
||||
fn generate_expression(
|
||||
&mut self,
|
||||
expr: &Expression,
|
||||
use_result: bool,
|
||||
) -> Result<(String, Vec<String>), CompilerError> {
|
||||
let mut code = Vec::new();
|
||||
|
||||
// optimisation to prevent generating dead code!
|
||||
if expr.is_pure() && !use_result {
|
||||
return Ok((String::new(), code));
|
||||
}
|
||||
|
||||
match expr {
|
||||
Expression::StringLiteral(value) => {
|
||||
let (reg, alloc_code) = self.allocator.alloc_temp()?;
|
||||
code.extend(alloc_code);
|
||||
|
||||
// write string into memory
|
||||
let uuid = self.get_unique_label();
|
||||
code.push(format!("\tdb str_{uuid}: \"{value}\""));
|
||||
|
||||
// Load pointer to string
|
||||
code.push(format!("\tlwi str_{uuid}, {reg}"));
|
||||
|
||||
Ok((reg, code))
|
||||
}
|
||||
|
||||
Expression::CharLiteral(value) => {
|
||||
let (reg, alloc_code) = self.allocator.alloc_temp()?;
|
||||
code.extend(alloc_code);
|
||||
|
||||
// Load immediate value
|
||||
code.push(format!("\tlli {}, {} // '{value}'", *value as u8, reg));
|
||||
|
||||
Ok((reg, code))
|
||||
}
|
||||
|
||||
Expression::Number(value) => {
|
||||
let (reg, alloc_code) = self.allocator.alloc_temp()?;
|
||||
code.extend(alloc_code);
|
||||
|
||||
// Load immediate value
|
||||
code.push(format!("\tlli {}, {}", value & 0xFFFF, reg));
|
||||
if *value > 0xFFFF || *value < 0 {
|
||||
code.push(format!("\tlui {}, {}", (value >> 16) & 0xFFFF, reg));
|
||||
}
|
||||
|
||||
Ok((reg, code))
|
||||
}
|
||||
|
||||
Expression::Variable { name, .. } => {
|
||||
if self.is_global(&name.name) {
|
||||
// Allocate a temporary register for the global
|
||||
let (reg, alloc_code) = self.allocator.alloc_temp()?;
|
||||
code.extend(alloc_code);
|
||||
|
||||
// Load from global label
|
||||
code.push(format!("\tldw {}, {}", name.name, reg));
|
||||
|
||||
Ok((reg, code))
|
||||
} else {
|
||||
// Local variable - use existing allocator logic
|
||||
let (reg, load_code) = self.allocator.load_var(&name.name)?;
|
||||
code.extend(load_code);
|
||||
Ok((reg, code))
|
||||
}
|
||||
}
|
||||
|
||||
Expression::Binary { op, left, right } => {
|
||||
// Evaluate left operand
|
||||
let (left_reg, left_code) = self.generate_expression(left, true)?;
|
||||
code.extend(left_code);
|
||||
|
||||
// Evaluate right operand
|
||||
let (right_reg, right_code) = self.generate_expression(right, true)?;
|
||||
code.extend(right_code);
|
||||
|
||||
// Allocate result register
|
||||
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
|
||||
code.extend(result_alloc);
|
||||
|
||||
// Generate operation
|
||||
match op {
|
||||
BinaryOperator::Add => {
|
||||
code.push(format!(
|
||||
"\tadd {}, {}, {}",
|
||||
left_reg, right_reg, result_reg
|
||||
));
|
||||
}
|
||||
BinaryOperator::Sub => {
|
||||
code.push(format!(
|
||||
"\tsub {}, {}, {}",
|
||||
left_reg, right_reg, result_reg
|
||||
));
|
||||
}
|
||||
BinaryOperator::Mul => {
|
||||
self.include("maths", "./lib/maths/core.dsa");
|
||||
// Call multiply function
|
||||
code.push(format!("\tpush {}", right_reg));
|
||||
code.push(format!("\tpush {}", left_reg));
|
||||
code.push("\tcall maths::multiply".to_string());
|
||||
code.push(format!("\tpop {}", result_reg));
|
||||
code.push("\tpop zero".to_string());
|
||||
}
|
||||
// Comparison operators - return 1 (true) or 0 (false)
|
||||
BinaryOperator::Eq => {
|
||||
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
|
||||
code.push(format!("\tlli 0, {}", result_reg));
|
||||
let end_label = format!("_cmp_end_{}", self.get_unique_label());
|
||||
code.push(format!("\tjne {}", end_label)); // If not equal, skip setting to 1
|
||||
code.push(format!("\tlli 1, {}", result_reg));
|
||||
code.push(format!("{}:", end_label));
|
||||
}
|
||||
BinaryOperator::Ne => {
|
||||
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
|
||||
code.push(format!("\tlli 0, {}", result_reg));
|
||||
let end_label = format!("_cmp_end_{}", self.get_unique_label());
|
||||
code.push(format!("\tjeq {}", end_label)); // If equal, skip setting to 1
|
||||
code.push(format!("\tlli 1, {}", result_reg));
|
||||
code.push(format!("{}:", end_label));
|
||||
}
|
||||
BinaryOperator::Lt => {
|
||||
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
|
||||
code.push(format!("\tlli 0, {}", result_reg));
|
||||
let end_label = format!("_cmp_end_{}", self.get_unique_label());
|
||||
code.push(format!("\tjge {}", end_label)); // If greater or equal, skip setting to 1
|
||||
code.push(format!("\tlli 1, {}", result_reg));
|
||||
code.push(format!("{}:", end_label));
|
||||
}
|
||||
BinaryOperator::Le => {
|
||||
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
|
||||
code.push(format!("\tlli 0, {}", result_reg));
|
||||
let end_label = format!("_cmp_end_{}", self.get_unique_label());
|
||||
code.push(format!("\tjgt {}", end_label)); // If greater than, skip setting to 1
|
||||
code.push(format!("\tlli 1, {}", result_reg));
|
||||
code.push(format!("{}:", end_label));
|
||||
}
|
||||
BinaryOperator::Gt => {
|
||||
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
|
||||
code.push(format!("\tlli 0, {}", result_reg));
|
||||
let end_label = format!("_cmp_end_{}", self.get_unique_label());
|
||||
code.push(format!("\tjle {}", end_label)); // If less or equal, skip setting to 1
|
||||
code.push(format!("\tlli 1, {}", result_reg));
|
||||
code.push(format!("{}:", end_label));
|
||||
}
|
||||
BinaryOperator::Ge => {
|
||||
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
|
||||
code.push(format!("\tlli 0, {}", result_reg));
|
||||
let end_label = format!("_cmp_end_{}", self.get_unique_label());
|
||||
code.push(format!("\tjlt {}", end_label)); // If less than, skip setting to 1
|
||||
code.push(format!("\tlli 1, {}", result_reg));
|
||||
code.push(format!("{}:", end_label));
|
||||
}
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
|
||||
// Free operand registers (allocator will protect variables)
|
||||
self.allocator.free_temp(&left_reg);
|
||||
self.allocator.free_temp(&right_reg);
|
||||
|
||||
Ok((result_reg, code))
|
||||
}
|
||||
|
||||
Expression::Call { name, args } => {
|
||||
// first evaluate all the args we're going to need
|
||||
let mut arg_regs = Vec::new();
|
||||
for arg in args.iter().rev() {
|
||||
let (arg_reg, arg_code) = self.generate_expression(arg, true)?;
|
||||
code.extend(arg_code);
|
||||
arg_regs.push(arg_reg);
|
||||
}
|
||||
|
||||
// Save caller-saved registers and track which ones we saved
|
||||
// old method, inefficient.
|
||||
// let saved_regs = self.allocator.get_caller_saved_registers();
|
||||
// for reg in &saved_regs {
|
||||
// code.push(format!("\tpush {}", reg));
|
||||
// }
|
||||
|
||||
// Save caller-saved registers and track which ones we saved
|
||||
let saved_regs = self.allocator.get_caller_saved_registers();
|
||||
for reg in &saved_regs {
|
||||
// spill variables to stack
|
||||
code.extend(self.allocator.spill_register(reg).unwrap());
|
||||
}
|
||||
|
||||
// Evaluate and push arguments in reverse order
|
||||
for (i, arg_reg) in arg_regs.iter().enumerate() {
|
||||
code.push(format!(
|
||||
"\tpush {} // push arg {}",
|
||||
arg_reg,
|
||||
args.len() - 1 - i
|
||||
));
|
||||
}
|
||||
|
||||
// if GLOBAL_METHODS.contains_key(name.name.as_str()) {
|
||||
// code.push(format!("\tcall {}",
|
||||
// GLOBAL_METHODS[name.name.as_str()])); } else
|
||||
if self.symbols.contains(&name.name) {
|
||||
// Call local function
|
||||
code.push(format!("\tcall {}", name));
|
||||
} else if let Some(ns) = name.namespace.clone()
|
||||
&& self.imports.contains_key(&ns)
|
||||
{
|
||||
code.push(format!("\tcall {}", name));
|
||||
} else {
|
||||
return Err(CompilerError::Undefined(name.clone()));
|
||||
}
|
||||
|
||||
let result_reg: String;
|
||||
|
||||
if use_result {
|
||||
let (temp_result_reg, result_alloc) = self.allocator.alloc_temp()?;
|
||||
result_reg = temp_result_reg;
|
||||
|
||||
code.extend(result_alloc);
|
||||
code.push(format!("\tpop {}", result_reg));
|
||||
|
||||
// Clean up arguments
|
||||
if args.len() > 1 {
|
||||
for _ in 0..(args.len() - 1) {
|
||||
code.push("\tpop zero".to_string());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
result_reg = "zero".to_string();
|
||||
|
||||
// Clean up arguments
|
||||
if args.len() > 0 {
|
||||
for _ in 0..(args.len()) {
|
||||
code.push("\tpop zero".to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Restore caller-saved registers in reverse order (LIFO)
|
||||
// for reg in saved_regs.iter().rev() {
|
||||
// code.push(format!("\tpop {}", reg));
|
||||
// }
|
||||
|
||||
// Free argument registers
|
||||
for reg in arg_regs {
|
||||
self.allocator.free_temp(®);
|
||||
}
|
||||
|
||||
Ok((result_reg, code))
|
||||
}
|
||||
|
||||
Expression::Unary { op, operand } => {
|
||||
let (operand_reg, operand_code) =
|
||||
self.generate_expression(operand, true)?;
|
||||
code.extend(operand_code);
|
||||
|
||||
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
|
||||
code.extend(result_alloc);
|
||||
|
||||
match op {
|
||||
UnaryOperator::Minus => {
|
||||
// Negate: result = 0 - operand
|
||||
code.push(format!("\tsub zero, {}, {}", operand_reg, result_reg));
|
||||
}
|
||||
UnaryOperator::Plus => {
|
||||
// Just move
|
||||
code.push(format!("\tmov {}, {}", operand_reg, result_reg));
|
||||
}
|
||||
UnaryOperator::Dereference => {
|
||||
code.push(format!("\tldw {}, {}", operand_reg, result_reg));
|
||||
}
|
||||
UnaryOperator::Reference => {
|
||||
code.extend(self.allocator.spill_register(&operand_reg)?);
|
||||
code.push(format!(
|
||||
"\tsubi bpr {} {}",
|
||||
-(4 + self.allocator.get_stack_offset()),
|
||||
result_reg
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
self.allocator.free_temp(&operand_reg);
|
||||
Ok((result_reg, code))
|
||||
}
|
||||
|
||||
Expression::Empty => Ok(("zero".to_string(), code)),
|
||||
}
|
||||
}
|
||||
|
||||
// Helper for generating unique labels
|
||||
fn get_unique_label(&mut self) -> String {
|
||||
// You'd implement a counter here
|
||||
static COUNTER: AtomicU32 = AtomicU32::new(0);
|
||||
|
||||
let val = COUNTER.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
|
||||
(val + 1).to_string()
|
||||
}
|
||||
}
|
||||
|
||||
/// Build a single string from any number of arguments.
|
||||
/// Each argument must implement `Display` or be convertible to a string.
|
||||
#[macro_export]
|
||||
macro_rules! dsa {
|
||||
($($arg:expr),* $(,)?) => {{
|
||||
// Start with an empty String – we’ll grow it as we go.
|
||||
use std::fmt::Write;
|
||||
let mut s = ::std::string::String::new();
|
||||
$(
|
||||
// `write!` is cheaper than `format!` for each element
|
||||
// because it re‑uses the same buffer.
|
||||
|
||||
write!(s, "{}\n", $arg).expect("write to String failed");
|
||||
)*
|
||||
s
|
||||
}};
|
||||
}
|
||||
|
||||
// ──────────────────────── dsa! ────────────────────────
|
||||
// A tiny helper that just turns its token‑stream into a string.
|
||||
// The trailing comma is kept – it’s part of the syntax you want.
|
||||
#[macro_export]
|
||||
macro_rules! cmd {
|
||||
($($tokens:tt)*) => {{
|
||||
// We’ll just stringify the tokens and return a String.
|
||||
format!("{}", concat!(stringify!($tokens), "\n"))
|
||||
}};
|
||||
}
|
||||
|
||||
// ──────────────────────── block! ────────────────────────
|
||||
// Usage:
|
||||
//
|
||||
// let asm = block![ "name"
|
||||
// dsa![mov rg0, rg1],
|
||||
// dsa![add rg1, rg1]
|
||||
// ];
|
||||
//
|
||||
// `asm` is a `&'static str` containing:
|
||||
//
|
||||
// name:
|
||||
// mov rg0, rg1
|
||||
// add rg1, rg1
|
||||
//
|
||||
#[macro_export]
|
||||
macro_rules! block {
|
||||
// The first token must be a string literal – that’s the label.
|
||||
($label:literal $(dsa![$($ins:tt)*]),* ) => {{
|
||||
// Build a single string at compile time.
|
||||
const CODE: &str = concat!(
|
||||
$label, ":\n",
|
||||
// Each `dsa!` call yields a string like `"mov rg0, rg1"`.
|
||||
// We add a newline after each one to get the desired layout.
|
||||
$(concat!("\t", stringify!($($ins)*), "\n")),*
|
||||
);
|
||||
CODE
|
||||
}};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! comment {
|
||||
($text:expr) => {{ format!("// {}", $text) }};
|
||||
}
|
||||
@@ -1,627 +0,0 @@
|
||||
use std::iter::Peekable;
|
||||
use std::str::Chars;
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum Token {
|
||||
// Keywords
|
||||
Fn,
|
||||
Let,
|
||||
If,
|
||||
Else,
|
||||
Loop,
|
||||
While,
|
||||
Break,
|
||||
Return,
|
||||
Continue,
|
||||
Include,
|
||||
Static,
|
||||
Const,
|
||||
|
||||
// Identifiers and literals
|
||||
Identifier(Name),
|
||||
String(String),
|
||||
Integer(u64),
|
||||
Char(char),
|
||||
|
||||
// Symbols
|
||||
LeftParen, // (
|
||||
RightParen, // )
|
||||
LeftBrace, // {
|
||||
RightBrace, // }
|
||||
Semicolon, // ;
|
||||
Colon, // :
|
||||
Comma, // ,
|
||||
|
||||
// Operators
|
||||
Plus, // +
|
||||
Minus, // -
|
||||
Star, // *
|
||||
Amphersand, // &
|
||||
Slash, // /
|
||||
Assign, // =
|
||||
EqualEqual, // ==
|
||||
Bang, // !
|
||||
BangEqual, // !=
|
||||
Less, // <
|
||||
LessEqual, // <=
|
||||
Greater, // >
|
||||
GreaterEqual, // >=
|
||||
RightArrow, // ->
|
||||
|
||||
// Special
|
||||
Eof,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct Name {
|
||||
pub name: String,
|
||||
pub namespace: Option<String>,
|
||||
}
|
||||
|
||||
use std::fmt;
|
||||
|
||||
impl fmt::Display for Name {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
if let Some(ref ns) = self.namespace {
|
||||
write!(f, "{}::{}", ns, self.name)
|
||||
} else {
|
||||
write!(f, "{}", self.name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Token {
|
||||
pub fn tt(&self) -> &str {
|
||||
match self {
|
||||
Token::Const => "Const",
|
||||
Token::Static => "Static",
|
||||
Token::Include => "Include",
|
||||
Token::Fn => "Fn",
|
||||
Token::If => "If",
|
||||
Token::Let => "Let",
|
||||
Token::Else => "Else",
|
||||
Token::Loop => "Loop",
|
||||
Token::While => "While",
|
||||
Token::Break => "Break",
|
||||
Token::Return => "Return",
|
||||
Token::Continue => "Continue",
|
||||
Token::Identifier(_) => "Identifier",
|
||||
Token::String(_) => "String",
|
||||
Token::Integer(_) => "UnsignedInt",
|
||||
Token::Char(_) => "Char",
|
||||
Token::LeftParen => "LeftParen",
|
||||
Token::RightParen => "RightParen",
|
||||
Token::LeftBrace => "LeftBrace",
|
||||
Token::RightBrace => "RightBrace",
|
||||
Token::Semicolon => "Semicolon",
|
||||
Token::Colon => "Colon",
|
||||
Token::Comma => "Comma",
|
||||
Token::RightArrow => "RightArrow",
|
||||
Token::Plus => "Plus",
|
||||
Token::Minus => "Minus",
|
||||
Token::Star => "Star",
|
||||
Token::Amphersand => "Amphersand",
|
||||
Token::Slash => "Slash",
|
||||
Token::Assign => "Assign",
|
||||
Token::EqualEqual => "EqualEqual",
|
||||
Token::Bang => "Bang",
|
||||
Token::BangEqual => "BangEqual",
|
||||
Token::Less => "Less",
|
||||
Token::LessEqual => "LessEqual",
|
||||
Token::Greater => "Greater",
|
||||
Token::GreaterEqual => "GreaterEqual",
|
||||
Token::Eof => "Eof",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Lexer<'a> {
|
||||
chars: Peekable<Chars<'a>>,
|
||||
current: Option<char>,
|
||||
line: usize,
|
||||
}
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
pub fn new(input: &'a str) -> Self {
|
||||
let mut chars = input.chars().peekable();
|
||||
let current = chars.next();
|
||||
|
||||
Lexer {
|
||||
chars,
|
||||
current,
|
||||
line: 1,
|
||||
}
|
||||
}
|
||||
|
||||
fn advance(&mut self) -> Option<char> {
|
||||
self.current = self.chars.next();
|
||||
self.current
|
||||
}
|
||||
|
||||
fn peek(&mut self) -> Option<&char> {
|
||||
self.chars.peek()
|
||||
}
|
||||
|
||||
fn skip_whitespace(&mut self) {
|
||||
while let Some(c) = self.current {
|
||||
if !c.is_whitespace() {
|
||||
break;
|
||||
}
|
||||
if c == '\n' {
|
||||
self.line += 1;
|
||||
}
|
||||
self.advance();
|
||||
}
|
||||
}
|
||||
|
||||
fn skip_line_comment(&mut self) {
|
||||
// Skip the two slashes
|
||||
self.advance(); // first /
|
||||
self.advance(); // second /
|
||||
|
||||
// Skip until newline or EOF
|
||||
while let Some(c) = self.current {
|
||||
if c == '\n' {
|
||||
self.line += 1;
|
||||
self.advance();
|
||||
break;
|
||||
}
|
||||
self.advance();
|
||||
}
|
||||
}
|
||||
|
||||
fn skip_block_comment(&mut self) -> Result<(), String> {
|
||||
// Skip the /*
|
||||
self.advance(); // /
|
||||
self.advance(); // *
|
||||
|
||||
let start_line = self.line;
|
||||
|
||||
// Look for */
|
||||
while let Some(c) = self.current {
|
||||
if c == '\n' {
|
||||
self.line += 1;
|
||||
}
|
||||
|
||||
if c == '*' {
|
||||
if let Some(&next) = self.peek() {
|
||||
if next == '/' {
|
||||
self.advance(); // *
|
||||
self.advance(); // /
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.advance();
|
||||
}
|
||||
|
||||
Err(format!(
|
||||
"Unterminated block comment starting at line {}",
|
||||
start_line
|
||||
))
|
||||
}
|
||||
|
||||
fn skip_whitespace_and_comments(&mut self) {
|
||||
loop {
|
||||
self.skip_whitespace();
|
||||
|
||||
// Check for comments
|
||||
if let Some('/') = self.current {
|
||||
if let Some(&next) = self.peek() {
|
||||
match next {
|
||||
'/' => {
|
||||
self.skip_line_comment();
|
||||
continue;
|
||||
}
|
||||
'*' => {
|
||||
if let Err(e) = self.skip_block_comment() {
|
||||
eprintln!("Lexer error: {}", e);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fn read_identifier(&mut self) -> String {
|
||||
let mut ident = String::new();
|
||||
|
||||
// Include the current character if it's valid
|
||||
if let Some(c) = self.current {
|
||||
if c.is_alphabetic() || c == '_' {
|
||||
ident.push(c);
|
||||
}
|
||||
}
|
||||
|
||||
// Read remaining characters
|
||||
while let Some(&c) = self.peek() {
|
||||
if c.is_alphanumeric() || c == '_' {
|
||||
self.advance();
|
||||
ident.push(c);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ident
|
||||
}
|
||||
|
||||
fn keyword_or_identifier(&mut self) -> Token {
|
||||
let first_ident = self.read_identifier();
|
||||
|
||||
// Check if it's a keyword first (keywords can't have namespaces)
|
||||
let keyword = match first_ident.as_str() {
|
||||
"fn" => Some(Token::Fn),
|
||||
"if" => Some(Token::If),
|
||||
"else" => Some(Token::Else),
|
||||
"while" => Some(Token::While),
|
||||
"loop" => Some(Token::Loop),
|
||||
"break" => Some(Token::Break),
|
||||
"return" => Some(Token::Return),
|
||||
"continue" => Some(Token::Continue),
|
||||
"include" => Some(Token::Include),
|
||||
"let" => Some(Token::Let),
|
||||
"const" => Some(Token::Const),
|
||||
"static" => Some(Token::Static),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
if let Some(kw) = keyword {
|
||||
return kw;
|
||||
}
|
||||
|
||||
// Not a keyword - check for namespace separator (::)
|
||||
// We need to peek TWO characters ahead without consuming anything
|
||||
if let Some(&':') = self.peek() {
|
||||
// We see one colon, but we need to check if there's another one after it
|
||||
// We can't peek two ahead directly, so we need a different approach
|
||||
|
||||
// Save the current position by using a temporary peekable iterator
|
||||
// Actually, we can't do that easily. Instead, let's just check:
|
||||
// If we see ':', temporarily advance and check the next char
|
||||
|
||||
// Create a temporary check
|
||||
let mut temp_chars = self.chars.clone();
|
||||
let first_peek = temp_chars.next(); // This is the ':' we already saw
|
||||
let second_peek = temp_chars.peek();
|
||||
|
||||
if let Some(&':') = second_peek {
|
||||
// It's :: - consume both colons
|
||||
self.advance(); // consume first :
|
||||
self.advance(); // consume second :
|
||||
|
||||
// Read the second identifier (the actual name)
|
||||
let second_ident = self.read_identifier();
|
||||
|
||||
// Return namespaced identifier
|
||||
return Token::Identifier(Name {
|
||||
namespace: Some(first_ident),
|
||||
name: second_ident,
|
||||
});
|
||||
}
|
||||
// else: It's a single colon (type annotation) - DON'T consume it
|
||||
// Just fall through and return the identifier
|
||||
}
|
||||
|
||||
// No namespace separator - just a regular identifier
|
||||
Token::Identifier(Name {
|
||||
namespace: None,
|
||||
name: first_ident,
|
||||
})
|
||||
}
|
||||
|
||||
fn read_number(&mut self) -> Result<u64, String> {
|
||||
let current = self.current.unwrap();
|
||||
|
||||
// Check for hex (0x) or binary (0b) prefix
|
||||
if current == '0' {
|
||||
if let Some(&next_char) = self.peek() {
|
||||
match next_char {
|
||||
'x' | 'X' => {
|
||||
self.advance(); // consume '0'
|
||||
self.advance(); // consume 'x'
|
||||
return self.read_hex_number();
|
||||
}
|
||||
'b' | 'B' => {
|
||||
self.advance(); // consume '0'
|
||||
self.advance(); // consume 'b'
|
||||
return self.read_binary_number();
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Read decimal number
|
||||
self.read_decimal_number()
|
||||
}
|
||||
|
||||
fn read_decimal_number(&mut self) -> Result<u64, String> {
|
||||
let mut num_str = String::new();
|
||||
|
||||
if let Some(c) = self.current {
|
||||
num_str.push(c);
|
||||
}
|
||||
|
||||
while let Some(&c) = self.peek() {
|
||||
if c.is_ascii_digit() {
|
||||
self.advance();
|
||||
num_str.push(c);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
num_str
|
||||
.parse::<u64>()
|
||||
.map_err(|_| format!("Invalid decimal number: {}", num_str))
|
||||
}
|
||||
|
||||
fn read_hex_number(&mut self) -> Result<u64, String> {
|
||||
let mut num_str = String::new();
|
||||
|
||||
// Read current character if it's a hex digit
|
||||
if let Some(c) = self.current {
|
||||
if c.is_ascii_hexdigit() {
|
||||
num_str.push(c);
|
||||
}
|
||||
}
|
||||
|
||||
while let Some(&c) = self.peek() {
|
||||
if c.is_ascii_hexdigit() {
|
||||
self.advance();
|
||||
num_str.push(c);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if num_str.is_empty() {
|
||||
return Err("Invalid hexadecimal number: no digits after 0x".to_string());
|
||||
}
|
||||
|
||||
u64::from_str_radix(&num_str, 16)
|
||||
.map_err(|_| format!("Invalid hexadecimal number: {}", num_str))
|
||||
}
|
||||
|
||||
fn read_binary_number(&mut self) -> Result<u64, String> {
|
||||
let mut num_str = String::new();
|
||||
|
||||
// Read current character if it's a binary digit
|
||||
if let Some(c) = self.current {
|
||||
if c == '0' || c == '1' {
|
||||
num_str.push(c);
|
||||
}
|
||||
}
|
||||
|
||||
while let Some(&c) = self.peek() {
|
||||
if c == '0' || c == '1' {
|
||||
self.advance();
|
||||
num_str.push(c);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if num_str.is_empty() {
|
||||
return Err("Invalid binary number: no digits after 0b".to_string());
|
||||
}
|
||||
|
||||
u64::from_str_radix(&num_str, 2)
|
||||
.map_err(|_| format!("Invalid binary number: {}", num_str))
|
||||
}
|
||||
|
||||
fn read_string(&mut self) -> Result<String, String> {
|
||||
self.advance(); // Skip the opening quote
|
||||
let mut s = String::new();
|
||||
|
||||
while let Some(c) = self.current {
|
||||
if c == '"' {
|
||||
return Ok(s);
|
||||
}
|
||||
|
||||
// Handle escape sequences
|
||||
if c == '\\' {
|
||||
self.advance();
|
||||
if let Some(escaped) = self.current {
|
||||
let escaped_char = match escaped {
|
||||
'n' => '\n',
|
||||
't' => '\t',
|
||||
'r' => '\r',
|
||||
'\\' => '\\',
|
||||
'"' => '"',
|
||||
_ => escaped, // For now, just use the character as-is
|
||||
};
|
||||
s.push(escaped_char);
|
||||
} else {
|
||||
return Err("Unexpected end of string after escape".to_string());
|
||||
}
|
||||
} else {
|
||||
s.push(c);
|
||||
}
|
||||
|
||||
self.advance();
|
||||
}
|
||||
|
||||
Err("Unterminated string literal".to_string())
|
||||
}
|
||||
|
||||
fn match_next(&mut self, expected: char) -> bool {
|
||||
match self.peek() {
|
||||
Some(&c) if c == expected => {
|
||||
self.advance();
|
||||
true
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn scan_single_char_token(&mut self, c: char) -> Option<Token> {
|
||||
match c {
|
||||
'(' => Some(Token::LeftParen),
|
||||
')' => Some(Token::RightParen),
|
||||
'{' => Some(Token::LeftBrace),
|
||||
'}' => Some(Token::RightBrace),
|
||||
';' => Some(Token::Semicolon),
|
||||
',' => Some(Token::Comma),
|
||||
'&' => Some(Token::Amphersand),
|
||||
'+' => Some(Token::Plus),
|
||||
'*' => Some(Token::Star),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn scan_operator(&mut self, c: char) -> Option<Token> {
|
||||
match c {
|
||||
'-' => Some(if self.match_next('>') {
|
||||
Token::RightArrow
|
||||
} else {
|
||||
Token::Minus
|
||||
}),
|
||||
'!' => Some(if self.match_next('=') {
|
||||
Token::BangEqual
|
||||
} else {
|
||||
Token::Bang
|
||||
}),
|
||||
'=' => Some(if self.match_next('=') {
|
||||
Token::EqualEqual
|
||||
} else {
|
||||
Token::Assign
|
||||
}),
|
||||
'<' => Some(if self.match_next('=') {
|
||||
Token::LessEqual
|
||||
} else {
|
||||
Token::Less
|
||||
}),
|
||||
'>' => Some(if self.match_next('=') {
|
||||
Token::GreaterEqual
|
||||
} else {
|
||||
Token::Greater
|
||||
}),
|
||||
':' => {
|
||||
// Single colon (for type annotations)
|
||||
// Note: :: is handled in keyword_or_identifier for namespaces
|
||||
Some(Token::Colon)
|
||||
}
|
||||
'/' => {
|
||||
// Check if it's a comment or division
|
||||
if let Some(&next) = self.peek() {
|
||||
if next == '/' || next == '*' {
|
||||
// It's a comment, don't consume it here
|
||||
// Let skip_whitespace_and_comments handle it
|
||||
None
|
||||
} else {
|
||||
Some(Token::Slash)
|
||||
}
|
||||
} else {
|
||||
Some(Token::Slash)
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next_token(&mut self) -> Token {
|
||||
self.skip_whitespace_and_comments();
|
||||
|
||||
let Some(c) = self.current else {
|
||||
return Token::Eof;
|
||||
};
|
||||
|
||||
// Try single-character tokens first
|
||||
if let Some(token) = self.scan_single_char_token(c) {
|
||||
self.advance();
|
||||
return token;
|
||||
}
|
||||
|
||||
// Try operators (may be multi-character)
|
||||
if let Some(token) = self.scan_operator(c) {
|
||||
self.advance();
|
||||
return token;
|
||||
}
|
||||
|
||||
// String literals
|
||||
if c == '"' {
|
||||
let token = match self.read_string() {
|
||||
Ok(s) => Token::String(s),
|
||||
Err(e) => {
|
||||
eprintln!("Lexer error on line {}: {}", self.line, e);
|
||||
// Skip to next quote or end
|
||||
while let Some(ch) = self.current {
|
||||
if ch == '"' || ch == '\n' {
|
||||
break;
|
||||
}
|
||||
self.advance();
|
||||
}
|
||||
Token::String(String::new())
|
||||
}
|
||||
};
|
||||
self.advance();
|
||||
return token;
|
||||
}
|
||||
|
||||
// Identifiers and keywords (including namespaced identifiers)
|
||||
if c.is_alphabetic() || c == '_' {
|
||||
let token = self.keyword_or_identifier();
|
||||
self.advance();
|
||||
return token;
|
||||
}
|
||||
|
||||
// Numbers (decimal, hex, binary)
|
||||
if c.is_ascii_digit() {
|
||||
let token = match self.read_number() {
|
||||
Ok(num) => Token::Integer(num),
|
||||
Err(e) => {
|
||||
eprintln!("Lexer error on line {}: {}", self.line, e);
|
||||
// Skip invalid number
|
||||
while let Some(&ch) = self.peek() {
|
||||
if !ch.is_alphanumeric() {
|
||||
break;
|
||||
}
|
||||
self.advance();
|
||||
}
|
||||
Token::Integer(0)
|
||||
}
|
||||
};
|
||||
self.advance();
|
||||
return token;
|
||||
}
|
||||
|
||||
// Unknown character - skip it
|
||||
eprintln!(
|
||||
"Lexer warning on line {}: Skipping unknown character '{}'",
|
||||
self.line, c
|
||||
);
|
||||
self.advance();
|
||||
self.next_token()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for Lexer<'a> {
|
||||
type Item = Token;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self.next_token() {
|
||||
Token::Eof => None,
|
||||
token => Some(token),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_basic() {
|
||||
// Placeholder test
|
||||
assert!(true);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user