reoganised project

This commit is contained in:
2026-02-23 20:32:45 +00:00
parent 42bc666c11
commit 71b36dc6b5
96 changed files with 662 additions and 46 deletions
+955
View File
@@ -0,0 +1,955 @@
use std::collections::HashMap;
use std::sync::atomic::AtomicU32;
use std::time::SystemTime;
use chrono::{DateTime, Local};
use super::registers::RegisterAllocator;
use crate::backend::dsa::instruction::{InsBlock as IB, Instruction as I, Label};
use crate::backend::dsa::registers::Register;
use crate::model::{
AssignmentOperator, BinaryOperator, Call, CompilerError, ConstExpr, Declaration,
Dependency, Expression, Number, Program, Statement, TypeId, UnaryOperator, Variable,
};
pub struct CodeGenerator {
ast: Program,
imports: HashMap<String, I>,
globals: HashMap<String, I>,
functions: Vec<IB>,
symbols: Vec<String>,
allocator: RegisterAllocator,
}
impl CodeGenerator {
pub fn new(ast: Program) -> Self {
CodeGenerator {
ast,
imports: HashMap::new(),
globals: HashMap::new(),
functions: Vec::new(),
symbols: Vec::new(),
allocator: RegisterAllocator::new(),
}
}
pub fn include(&mut self, name: impl Into<String>, path: impl Into<String>) {
let name = name.into();
self.imports.insert(name.clone(), I::include(name, path));
}
fn is_global(&self, name: &str) -> bool {
// Check if this variable is in the globals list
self.globals.contains_key(name)
}
pub fn generate(&mut self) -> Result<String, CompilerError> {
// always include the print library for debugging!
self.include("print", "./lib/io/print.dsa");
for block in self.ast.clone().declarations {
match block {
Declaration::Variable {
var: Variable { name, .. },
..
} => self.symbols.push(name),
Declaration::Function { name, .. } => self.symbols.push(name),
Declaration::Dependency(Dependency { name, .. }) => {
self.symbols.push(name)
}
Declaration::Struct { .. } => {} /* we can't do any code generation for
* a struct yet. we may need to later
* once these become class-like
* objects with implementations */
}
}
for block in self.ast.clone().declarations {
self.generate_block(block.clone())?;
}
let assembly = self.generate_layout()?;
Ok(assembly
.iter()
.map(|i| i.to_string())
.collect::<Vec<_>>()
.join("\n"))
}
fn generate_layout(&mut self) -> Result<IB, CompilerError> {
let datetime: DateTime<Local> = SystemTime::now().into();
let mut block = IB::new();
block.extend(vec![
I::global_comment(format!(
"GENERATED BY DSC COMPILER
Generated at {}",
datetime.format("%Y-%m-%d %H:%M:%S")
)),
I::Newline,
I::global_comment("Imports"),
]);
block.extend(self.imports.values().cloned().collect::<Vec<_>>());
block.extend(vec![
I::Newline,
I::global_comment("Globals & Reserved Memory"),
]);
block.extend(self.globals.values().cloned().collect::<Vec<_>>());
block.extend(vec![
I::Newline,
I::global_comment("Entry Point"),
I::db_word("stack", 0x10000),
I::db_string("message", "Process Exited with code:"),
// init function for stack setup.
I::label("_init"),
I::ldw_label("stack", Register::Bpr),
I::mov(Register::Bpr, Register::Spr),
I::push(Register::Zero),
I::call("main"),
I::call("print::print_newline"),
I::lwi_label("message", Register::Rg0),
I::push(Register::Rg0),
I::call("print::print"),
I::pop(Register::Zero),
I::call("print::print_hex_word"),
I::pop(Register::Zero),
I::Hlt,
I::Newline,
// default return block boilerplate
I::global_comment("Return"),
I::label("_ret"),
I::mov(Register::Bpr, Register::Spr),
I::pop(Register::Bpr),
I::Return,
]);
for function in self.functions.iter() {
block.extend(function.iter().cloned());
}
Ok(block)
}
fn generate_global(&mut self, name: &str, init: Option<ConstExpr>) {
let init = init.unwrap_or(ConstExpr::Number(0));
match init {
ConstExpr::Number(value) => {
self.globals
.insert(name.to_string(), I::db_word(name, value as u32));
}
ConstExpr::String(str) => {
self.globals
.insert(name.to_string(), I::db_string(name, str));
}
}
}
fn generate_block(&mut self, block: Declaration) -> Result<(), CompilerError> {
match block {
Declaration::Variable { var, init, .. } => {
self.generate_global(&var.name, init)
}
Declaration::Function {
name,
params,
body,
return_type,
} => {
let func = self.generate_function(&name, &params, &body, return_type);
self.functions.push(func);
}
Declaration::Dependency(Dependency { name, path }) => {
self.include(name, path);
}
Declaration::Struct { .. } => {} /* can't do any codegen for these yet,
* they're just types. */
};
Ok(())
}
// Example: Generate code for a function
fn generate_function(
&mut self,
name: &str,
params: &[Variable],
body: &[Statement],
return_type: TypeId,
) -> IB {
let mut code = IB::new();
// Reset allocator for new function
self.allocator.reset();
let fmtparams = params
.iter()
.map(|p| format!("{}: {}", p.name, p.type_id))
.collect::<Vec<String>>()
.join(", ");
code.extend(vec![
I::global_comment(format!("fn {name}({fmtparams}) -> {return_type}")),
I::label(name),
I::push(Register::Bpr),
I::mov(Register::Spr, Register::Bpr),
]);
// Allocate parameters to registers or stack locations
for (i, param) in params.iter().enumerate() {
let offset = 8 + (i as i32 * 4); // Parameters start at bpr+8
// Track that this parameter is at a stack location
let (reg, load_code) = self.allocator.alloc_var(&param.name).unwrap();
code.append(load_code);
code.push(I::ldw_reg_offset(Register::Bpr, reg, offset));
}
// Generate code for function body
for stmt in body {
let stmt_code = self.generate_statement(stmt, &mut code).unwrap();
code.append(stmt_code);
}
// automatically return at function end
if let Some(x) = code.iter().last()
&& let I::Jmp { target: Label(val) } = x
&& val == "_ret"
{
} else {
code.push(I::jmp("_ret"));
}
code.insert(0, I::Newline);
code
}
// Example: Generate code for a statement
fn generate_statement(
&mut self,
stmt: &Statement,
func_body: &mut IB,
) -> Result<IB, CompilerError> {
let mut code = IB::new();
match stmt {
Statement::Declaration { var, value } => {
if let Some(expr) = value {
// Evaluate expression
let (result_reg, expr_code) =
self.generate_expression(expr, true, func_body)?;
code.append(expr_code);
// Store result in variable
let store_code = self.allocator.store_var(&var.name, &result_reg);
code.append(store_code);
// Free temporary register
self.allocator.free_temp(result_reg);
} else {
// Just declaring variable without initialization
self.allocator.alloc_var(&var.name)?;
}
}
Statement::Break => unimplemented!("need scope tracking first!"),
Statement::Continue => unimplemented!("need scope tracking first!"),
Statement::Defer(_func) => unimplemented!("we need scope tracking first!"),
Statement::PtrWrite { ptr, value } => {
let (result_reg, expr_code) =
self.generate_expression(value, true, func_body)?;
code.append(expr_code);
let (ptr_reg, ptr_code) =
self.generate_expression(ptr, true, func_body)?;
code.append(ptr_code);
code.push(I::stw_reg(result_reg, ptr_reg));
self.allocator.free_temp(result_reg);
self.allocator.free_temp(ptr_reg);
}
Statement::Assign {
varname,
value,
operator,
} => {
// Evaluate expression
let (result_reg, expr_code) =
self.generate_expression(value, true, func_body)?;
code.append(expr_code);
if *operator == AssignmentOperator::Assign {
// Check if this is a global variable
if self.is_global(varname) {
// Store to global label
code.push(I::stw_label(result_reg, varname.clone()))
} else {
// Store result in local variable
let store_code = self.allocator.store_var(varname, &result_reg);
code.append(store_code);
}
// Free temporary register
self.allocator.free_temp(result_reg);
return Ok(code);
}
// for more complex assignment cases we need an intermediate register.
let (temp_reg, temp_code) = self.allocator.alloc_temp()?;
code.append(temp_code);
// fetch the value of the variable
let var_reg = if self.is_global(varname) {
let instruction = I::ldw_label(varname.clone(), temp_reg);
code.push(instruction);
temp_reg
} else {
let (rg, block) = self.allocator.load_var(varname)?;
code.append(block);
rg
};
let assign_code = match operator {
AssignmentOperator::Assign => {
unreachable!("assignment was already checked earlier.")
}
AssignmentOperator::AddAssign => {
I::add(var_reg, result_reg, temp_reg)
}
AssignmentOperator::SubAssign => {
I::sub(var_reg, result_reg, temp_reg)
}
AssignmentOperator::MulAssign => {
return Err(CompilerError::Unimplemented(
"TODO: implement multiplication for assignment".to_string(),
));
}
AssignmentOperator::DivAssign => {
return Err(CompilerError::Unimplemented(
"TODO: write proper div function for DSA".to_string(),
));
}
AssignmentOperator::ModAssign => {
return Err(CompilerError::Unimplemented(
"TODO: write proper mod function for DSA".to_string(),
));
}
AssignmentOperator::AndAssign => {
I::and(var_reg, result_reg, temp_reg)
}
AssignmentOperator::OrAssign => I::or(var_reg, result_reg, temp_reg),
AssignmentOperator::XorAssign => {
I::xor(var_reg, result_reg, temp_reg)
}
AssignmentOperator::LeftShiftAssign => {
// this is only useful if we optimise out the register allocation
// inside value.
// if let Expression::Number { value, .. } = *value {
// I::shl(var_reg, value, temp_reg)
// }
I::shl(var_reg, result_reg, 0, temp_reg)
}
AssignmentOperator::RightShiftAssign => {
// this is only useful if we optimise out the register allocation
// if let Expression::Number { value, .. } = *value {
// I::shr(var_reg, value, temp_reg)
// }
I::shr(var_reg, result_reg, 0, temp_reg)
}
};
code.push(assign_code);
// Check if this is a global variable
if self.is_global(varname) {
// Store to global label
code.push(I::stw_label(temp_reg, varname.clone()))
} else {
// Store result in local variable
let store_code = self.allocator.store_var(varname, &temp_reg);
code.append(store_code);
}
self.allocator.free_temp(result_reg);
self.allocator.free_temp(temp_reg);
}
Statement::Return(expr) => {
if let Some(e) = expr {
let (result_reg, expr_code) =
self.generate_expression(e, true, func_body)?;
code.append(expr_code);
code.push(I::stw_reg_offset(result_reg, Register::Bpr, 8));
code.push(I::jmp("_ret"));
self.allocator.free_temp(result_reg);
}
}
Statement::If {
condition,
then_stmt,
else_stmt,
} => {
// Generate condition
let (cond_reg, cond_code) =
self.generate_expression(condition, true, func_body)?;
code.append(cond_code);
// Compare with zero
code.push(I::cmp(cond_reg, Register::Zero));
self.allocator.free_temp(cond_reg);
// Generate unique labels
let then_label = format!("_then_{}", self.get_unique_label());
let else_label = format!("_else_{}", self.get_unique_label());
let end_label = format!("_end_{}", self.get_unique_label());
// Jump to else if condition is false (equal to zero)
code.push(I::jeq(else_label.clone()));
// Then block
code.push(I::label(then_label));
for s in then_stmt {
code.append(self.generate_statement(s, func_body)?);
}
if then_stmt.is_empty() {
code.push(I::Nop);
}
code.push(I::jmp(end_label.clone()));
// Else block
code.push(I::label(else_label));
for s in else_stmt {
code.append(self.generate_statement(s, func_body)?);
}
if else_stmt.is_empty() {
code.push(I::Nop);
}
code.push(I::label(end_label));
}
Statement::While { condition, body } => {
let loop_start = format!("_while_start_{}", self.get_unique_label());
let loop_end = format!("_while_end_{}", self.get_unique_label());
code.push(I::label(&loop_start));
// Generate condition
let (cond_reg, cond_code) =
self.generate_expression(condition, true, func_body)?;
code.append(cond_code);
code.push(I::cmp(cond_reg, Register::Zero));
self.allocator.free_temp(cond_reg);
code.push(I::jeq(loop_end.clone()));
// Loop body
for s in body {
code.append(self.generate_statement(s, func_body)?);
}
code.push(I::jmp(loop_start));
code.push(I::label(loop_end));
}
Statement::Loop(body) => {
let loop_start = format!("_loop_start_{}", self.get_unique_label());
code.push(I::label(&loop_start));
for s in body {
code.append(self.generate_statement(s, func_body)?);
}
code.push(I::jmp(loop_start));
}
Statement::Expression { expr } => {
let (result_reg, expr_code) =
self.generate_expression(expr, false, func_body)?;
code.append(expr_code);
self.allocator.free_temp(result_reg);
}
Statement::Block(statements) => {
for s in statements {
code.append(self.generate_statement(s, func_body)?);
}
}
}
Ok(code)
}
// Example: Generate code for an expression
// Returns (register containing result, assembly code)
fn generate_expression(
&mut self,
expr: &Expression,
use_result: bool,
func_body: &mut IB,
) -> Result<(Register, IB), CompilerError> {
let mut code = IB::new();
match expr {
Expression::Empty => Ok((Register::Null, code)),
Expression::Number(n) => match n {
Number::Signed(value, _) => {
let (reg, alloc_code) = self.allocator.alloc_temp()?;
code.append(alloc_code);
// Load immediate value
code.push(I::lwi(*value as u32, reg));
Ok((reg, code))
}
Number::Unsigned(value, _) => {
let (reg, alloc_code) = self.allocator.alloc_temp()?;
code.append(alloc_code);
// Load immediate value
code.push(I::lwi(*value as u32, reg));
Ok((reg, code))
}
},
Expression::CharLiteral(value) => {
let (reg, alloc_code) = self.allocator.alloc_temp()?;
code.append(alloc_code);
// Load immediate value
code.push(I::comment(format!("char literal '{value}'")));
code.push(I::lwi(*value as u32, reg));
Ok((reg, code))
}
Expression::StringLiteral(value) => {
let (reg, alloc_code) = self.allocator.alloc_temp()?;
code.append(alloc_code);
// write string into memory
let uuid = self.get_unique_label();
func_body.insert(0, I::db_string(format!("str_{uuid}"), value));
// Load pointer to string
code.push(I::lwi_label(format!("str_{uuid}"), reg));
Ok((reg, code))
}
Expression::ArrayLiteral { elements, type_id } => todo!(),
Expression::StructLiteral {
name,
fields,
type_id,
} => todo!(),
Expression::Variable { name, .. } => {
if self.is_global(&name.name) {
// Allocate a temporary register for the global
let (reg, alloc_code) = self.allocator.alloc_temp()?;
code.append(alloc_code);
// Load from global label
code.push(I::ldw_label(name.name.clone(), reg));
Ok((reg, code))
} else {
// Local variable - use existing allocator logic
let (reg, load_code) = self.allocator.load_var(&name.name)?;
code.append(load_code);
Ok((reg, code))
}
}
Expression::Binary {
op, left, right, ..
} => {
// Evaluate left operand
let (left_reg, left_code) =
self.generate_expression(left, true, func_body)?;
code.append(left_code);
// Evaluate right operand
let (right_reg, right_code) =
self.generate_expression(right, true, func_body)?;
code.append(right_code);
// Allocate result register
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
code.append(result_alloc);
// Generate operation
match op {
BinaryOperator::Add => {
code.push(I::add(left_reg, right_reg, result_reg));
}
BinaryOperator::Sub => {
code.push(I::sub(left_reg, right_reg, result_reg));
}
BinaryOperator::Mul => {
self.include("maths", "./lib/maths/core.dsa");
// Call multiply function
code.push(I::push(right_reg));
code.push(I::push(left_reg));
code.push(I::call("maths::multiply"));
code.push(I::pop(result_reg));
code.push(I::pop(Register::Zero));
}
BinaryOperator::Div => {
return Err(CompilerError::Unimplemented(
"TODO: write proper div function for DSA".to_string(),
));
// self.include("maths", "./lib/maths/core.dsa");
// // Call divide function
// code.push(format!("\tpush {}", right_reg));
// code.push(format!("\tpush {}", left_reg));
// code.push("\tcall maths::divide".to_string());
// code.push(format!("\tpop {}", result_reg));
// code.push("\tpop zero".to_string());
}
BinaryOperator::Mod => {
return Err(CompilerError::Unimplemented(
"TODO: write proper mod function for DSA".to_string(),
));
// self.include("maths", "./lib/maths/core.dsa");
// // Call modulo function
// code.push(format!("\tpush {}", right_reg));
// code.push(format!("\tpush {}", left_reg));
// code.push("\tcall maths::modulo".to_string());
// code.push(format!("\tpop {}", result_reg));
// code.push("\tpop zero".to_string());
}
BinaryOperator::BitwiseAnd => {
code.push(I::and(left_reg, right_reg, result_reg));
}
BinaryOperator::BitwiseOr => {
code.push(I::or(left_reg, right_reg, result_reg));
}
BinaryOperator::BitwiseXor => {
code.push(I::xor(left_reg, right_reg, result_reg));
}
BinaryOperator::LogicalAnd => {
return Err(CompilerError::Unimplemented(
"assembler/ISA does not yet support logical and!".to_string(),
));
}
BinaryOperator::LogicalOr => {
return Err(CompilerError::Unimplemented(
"assembler/ISA does not yet support logical or!".to_string(),
));
}
BinaryOperator::LeftShift => {
code.push(I::shl(left_reg, right_reg, 0, result_reg));
}
BinaryOperator::RightShift => {
code.push(I::shr(left_reg, right_reg, 0, result_reg));
}
// Comparison operators - return 1 (true) or 0 (false)
BinaryOperator::Equal => {
code.push(I::cmp(left_reg, right_reg));
code.push(I::lwi(1, result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(I::jeq(end_label.clone()));
code.push(I::lwi(0, result_reg));
code.push(I::label(end_label));
}
BinaryOperator::NotEqual => {
code.push(I::cmp(left_reg, right_reg));
code.push(I::lwi(1, result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(I::Jne {
target: Label(end_label.clone()),
});
code.push(I::lwi(0, result_reg));
code.push(I::label(&end_label));
}
BinaryOperator::LessThan => {
code.push(I::cmp(left_reg, right_reg));
code.push(I::lwi(1, result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(I::Jlt {
target: Label(end_label.clone()),
});
code.push(I::lwi(0, result_reg));
code.push(I::label(&end_label));
}
BinaryOperator::LessOrEqual => {
code.push(I::cmp(left_reg, right_reg));
code.push(I::lwi(1, result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(I::Jle {
target: Label(end_label.clone()),
});
code.push(I::lwi(0, result_reg));
code.push(I::label(&end_label));
}
BinaryOperator::GreaterThan => {
code.push(I::cmp(left_reg, right_reg));
code.push(I::lwi(1, result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(I::Jgt {
target: Label(end_label.clone()),
});
code.push(I::lwi(0, result_reg));
code.push(I::label(&end_label));
}
BinaryOperator::GreaterOrEqual => {
code.push(I::cmp(left_reg, right_reg));
code.push(I::lwi(1, result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(I::Jge {
target: Label(end_label.clone()),
});
code.push(I::lwi(0, result_reg));
code.push(I::label(&end_label));
} // _ => unimplemented!(),
}
// Free operand registers (allocator will protect variables)
self.allocator.free_temp(left_reg);
self.allocator.free_temp(right_reg);
Ok((result_reg, code))
}
Expression::UnaryPostfix { op, operand, .. } => {
let (operand_reg, operand_code) =
self.generate_expression(operand, true, func_body)?;
code.append(operand_code);
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
code.append(result_alloc);
match op {
UnaryOperator::Increment => {
// postfix increment - return old value
code.push(I::mov(operand_reg, result_reg));
}
UnaryOperator::Decrement => {
// postfix decrement - return old value
code.push(I::mov(operand_reg, result_reg));
}
_ => {
return Err(CompilerError::Generic(format!(
"{op} is prefix only!"
)));
}
}
self.allocator.free_temp(operand_reg);
Ok((result_reg, code))
}
Expression::Unary { op, operand, .. } => {
let (operand_reg, operand_code) =
self.generate_expression(operand, true, func_body)?;
code.append(operand_code);
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
code.append(result_alloc);
match op {
UnaryOperator::Minus => {
// Negate: result = 0 - operand
code.push(I::sub(Register::Zero, operand_reg, result_reg));
}
UnaryOperator::Plus => {
// Just move
code.push(I::mov(operand_reg, result_reg));
}
UnaryOperator::Dereference => {
code.push(I::ldw_reg(operand_reg, result_reg));
}
UnaryOperator::AddressOf => {
// ensure the referenced variable is on the stack and return its
// address.
let (offset, alloc_code) =
self.allocator.free_register(&operand_reg)?;
code.push(alloc_code);
code.push(I::iadd_dest(
Register::Spr,
offset - self.allocator.get_stack_offset(),
result_reg,
));
}
UnaryOperator::SizeOf => {
if let Ok(id) = operand.type_id() {
let size = id.size();
code.push(I::lwi(size as u32, result_reg));
}
}
UnaryOperator::Increment => {
// prefix increment
code.push(I::mov(operand_reg, result_reg));
code.push(I::iadd_dest(operand_reg, 1, result_reg));
}
UnaryOperator::Decrement => {
// prefix decrement
code.push(I::mov(operand_reg, result_reg));
code.push(I::iadd_dest(operand_reg, -1, result_reg));
}
UnaryOperator::BitwiseNot => {
code.push(I::not(operand_reg, result_reg));
}
UnaryOperator::LogicalNot => {
return Err(CompilerError::Unimplemented(
"Assembler/ISA does not yet support logical not".to_string(),
));
}
_ => {
return Err(CompilerError::Generic(format!(
"{op} is postfix only!"
)));
}
}
self.allocator.free_temp(operand_reg);
Ok((result_reg, code))
}
Expression::Call {
func: Call { name, args },
..
} => {
// first evaluate all the args we're going to need
let mut arg_regs = Vec::new();
for arg in args.iter().rev() {
let (arg_reg, arg_code) =
self.generate_expression(arg, true, func_body)?;
code.append(arg_code);
arg_regs.push(arg_reg);
}
// Save caller-saved registers and track which ones we saved
let saved_regs = self.allocator.get_caller_saved_registers();
for reg in &saved_regs {
// spill variables to stack
code.push(self.allocator.free_register(reg).unwrap().1);
}
// Evaluate and push arguments in reverse order
for (i, arg_reg) in arg_regs.iter().enumerate() {
code.push(I::comment(format!("push arg {}", args.len() - 1 - i)));
code.push(I::push(*arg_reg));
}
if self.symbols.contains(&name.name) {
// Call local function
code.push(I::call(name.to_string()));
} else if let Some(ns) = name.namespace.clone()
&& self.imports.contains_key(&ns)
{
code.push(I::call(name.to_string()));
} else {
return Err(CompilerError::Undefined(name.clone()));
}
let result_reg: Register;
if use_result {
let (temp_result_reg, result_alloc) = self.allocator.alloc_temp()?;
result_reg = temp_result_reg;
code.append(result_alloc);
code.push(I::pop(result_reg));
// Clean up arguments
if args.len() > 1 {
for _ in 0..(args.len() - 1) {
code.push(I::pop(Register::Zero));
}
}
} else {
result_reg = Register::Zero;
// Clean up arguments
if args.len() > 0 {
for _ in 0..(args.len()) {
code.push(I::pop(Register::Zero));
}
}
}
// Free argument registers
for reg in arg_regs {
self.allocator.free_temp(reg);
}
Ok((result_reg, code))
}
Expression::IndexAccess {
expr,
index,
type_id,
} => {
let (expr_reg, expr_alloc) =
self.generate_expression(expr, true, func_body)?;
code.append(expr_alloc);
let (index_reg, index_alloc) =
self.generate_expression(index, true, func_body)?;
code.append(index_alloc);
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
code.append(result_alloc);
// add the expr pointer to the index to get the final address.
code.push(I::add(expr_reg, index_reg, result_reg));
// load the value at the address.
code.push(I::ldw_reg(result_reg, result_reg));
self.allocator.free_temp(expr_reg);
self.allocator.free_temp(index_reg);
Ok((result_reg, code))
}
Expression::MemberAccess {
expr,
field_name,
type_id,
} => Err(CompilerError::Unimplemented(
"Structs are not yet implemented!".to_string(),
)),
Expression::TypeCast {
expr,
target_type,
type_id,
} => {
let (expr_reg, expr_code) =
self.generate_expression(expr, true, func_body)?;
// not sure if we actually need to do anything here.
// for now we just return the previous expression.
Ok((expr_reg, expr_code))
}
}
}
// Helper for generating unique labels
fn get_unique_label(&mut self) -> String {
// You'd implement a counter here
static COUNTER: AtomicU32 = AtomicU32::new(0);
let val = COUNTER.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
(val + 1).to_string()
}
}
@@ -0,0 +1,797 @@
use std::fmt;
use crate::backend::dsa::registers::Register;
pub struct InsBlock {
instructions: Vec<Instruction>,
}
impl InsBlock {
pub fn new() -> Self {
Self {
instructions: vec![],
}
}
pub fn insert(&mut self, index: usize, instr: Instruction) {
self.instructions.insert(index, instr);
}
pub fn push(&mut self, instr: Instruction) {
self.instructions.push(instr);
}
pub fn append(&mut self, mut other: Self) {
self.instructions.append(&mut other.instructions);
}
pub fn extend(&mut self, instrs: impl IntoIterator<Item = Instruction>) {
self.instructions.extend(instrs);
}
pub fn is_empty(&self) -> bool {
self.instructions.is_empty()
}
pub fn len(&self) -> usize {
self.instructions.len()
}
pub fn iter(&self) -> impl Iterator<Item = &Instruction> {
self.instructions.iter()
}
}
impl From<Vec<Instruction>> for InsBlock {
fn from(instructions: Vec<Instruction>) -> Self {
Self { instructions }
}
}
impl From<Instruction> for InsBlock {
fn from(instr: Instruction) -> Self {
Self {
instructions: vec![instr],
}
}
}
#[derive(Debug, Clone)]
pub enum Instruction {
// Labels and comments
Label(Label),
Comment {
text: String,
top_level: bool,
},
Newline,
// Data Directives
Db {
label: String,
data: Vec<u8>,
},
Dh {
label: String,
data: Vec<u16>,
},
Dw {
label: String,
data: Vec<u32>,
},
DString {
// alias for db.
label: String,
data: String,
},
Resx {
label: String,
size: u32,
},
// Include
Include {
name: String,
path: String,
},
// Data movement
Mov {
src: Register,
dest: Register,
},
Movs {
src: Register,
dest: Register,
},
// Memory operations
Ldb {
src: MemOperand,
dest: Register,
},
Ldh {
src: MemOperand,
dest: Register,
},
Ldw {
src: MemOperand,
dest: Register,
},
Stb {
src: Register,
dest: MemOperand,
},
Sth {
src: Register,
dest: MemOperand,
},
Stw {
src: Register,
dest: MemOperand,
},
// Immediate loads
Lli {
imm: Imm,
dest: Register,
},
Lui {
imm: Imm,
dest: Register,
},
Lwi {
imm: Imm,
dest: Register,
},
LwiLabel {
label: String,
dest: Register,
},
// Arithmetic
Add {
src1: Register,
src2: Register,
dest: Register,
},
Sub {
src1: Register,
src2: Register,
dest: Register,
},
IAdd {
src: Register,
imm: Imm,
dest: Option<Register>,
},
ISub {
src: Register,
imm: Imm,
dest: Option<Register>,
},
Inc {
reg: Register,
},
Dec {
reg: Register,
},
// Bitwise
And {
src1: Register,
src2: Register,
dest: Register,
},
Or {
src1: Register,
src2: Register,
dest: Register,
},
Xor {
src1: Register,
src2: Register,
dest: Register,
},
Not {
src: Register,
dest: Register,
},
Nand {
src1: Register,
src2: Register,
dest: Register,
},
Nor {
src1: Register,
src2: Register,
dest: Register,
},
Xnor {
src1: Register,
src2: Register,
dest: Register,
},
// Shifts
Shl {
src1: Register,
r_shamt: Register,
i_shamt: u16,
dest: Register,
},
Shr {
src1: Register,
r_shamt: Register,
i_shamt: u16,
dest: Register,
},
// Comparison
Cmp {
reg1: Register,
reg2: Register,
},
// Jumps
Jmp {
target: Label,
},
Jeq {
target: Label,
},
Jne {
target: Label,
},
Jgt {
target: Label,
},
Jge {
target: Label,
},
Jlt {
target: Label,
},
Jle {
target: Label,
},
// Stack
Push {
reg: Register,
},
Pop {
reg: Register,
},
// Function calls
Call {
target: String,
}, // namespace::function
Return,
// System
Hlt,
Nop,
Int {
code: u8,
},
}
pub enum DataDirective {
U8(Vec<u8>),
U16(Vec<u16>),
U32(Vec<u32>),
String(String),
Char(char),
}
impl fmt::Display for Instruction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::Label(l) => write!(f, "{}:", l),
Self::Newline => write!(f, ""), /* empty string as newlines are inserted */
// automatically.
Self::Comment { text, top_level } => write!(
f,
"{}",
text.lines()
.map(|line| format!(
"{}// {}",
if *top_level { "" } else { " " },
line.trim(),
))
.collect::<Vec<String>>()
.join("\n")
),
Self::Include { name, path } => write!(f, "include {name}: \"{}\"", path),
Self::Db { label, data } => write!(
f,
"db {}: {}",
label,
data.iter()
.map(|&b| format!("{:#04X}", b))
.collect::<Vec<String>>()
.join(", ")
),
Self::Dh { label, data } => write!(
f,
"dh {}: {}",
label,
data.iter()
.map(|&b| format!("{:#06X}", b))
.collect::<Vec<String>>()
.join(", ")
),
Self::Dw { label, data } => write!(
f,
"dw {}: {}",
label,
data.iter()
.map(|&b| format!("{:#08X}", b))
.collect::<Vec<String>>()
.join(", ")
),
Self::DString { label, data } => write!(f, "db {}: \"{}\"", label, data),
Self::Resx { label, size } => write!(f, "resx {}: {}", label, size),
Self::Mov { src, dest } => write!(f, " mov {}, {}", src, dest),
Self::Movs { src, dest } => write!(f, " movs {}, {}", src, dest),
Self::Ldb { src: addr, dest } => {
let (reg, offset) = reg_and_offset(addr);
write!(f, " ldb {}, {}, {}", reg, dest, offset)
}
Self::Ldh { src: addr, dest } => {
let (reg, offset) = reg_and_offset(addr);
write!(f, " ldh {}, {}, {}", reg, dest, offset)
}
Self::Ldw { src, dest } => {
let (reg, offset) = reg_and_offset(src);
write!(f, " ldw {}, {}, {}", reg, dest, offset)
}
// Self::Ldbs { addr, dest } => {
// write!(f, " ldbs {}, {}", format_mem_operand(addr), dest)
// }
// Self::Ldhs { addr, dest } => {
// write!(f, " ldhs {}, {}", format_mem_operand(addr), dest)
// }
// Self::Ldws { addr, dest } => {
// write!(f, " ldws {}, {}", format_mem_operand(addr), dest)
// }
Self::Stb { src, dest: addr } => {
let (reg, offset) = reg_and_offset(addr);
write!(f, " stb {}, {}, {}", src, reg, offset)
}
Self::Sth { src, dest: addr } => {
let (reg, offset) = reg_and_offset(addr);
write!(f, " sth {}, {}, {}", src, reg, offset)
}
Self::Stw { src, dest: addr } => {
let (reg, offset) = reg_and_offset(addr);
write!(f, " stw {}, {}, {}", src, reg, offset)
}
Self::Lli { imm, dest } => write!(f, " lli {}, {}", imm, dest),
Self::Lui { imm, dest } => write!(f, " lui {}, {}", imm, dest),
Self::Lwi { imm, dest } => write!(f, " lwi {}, {}", imm, dest),
Self::LwiLabel { label, dest } => write!(f, " lwi {}, {}", label, dest),
// arithmetic
Self::Add { src1, src2, dest } => {
write!(f, " add {}, {}, {}", src1, src2, dest)
}
Self::Sub { src1, src2, dest } => {
write!(f, " sub {}, {}, {}", src1, src2, dest)
}
Self::And { src1, src2, dest } => {
write!(f, " and {}, {}, {}", src1, src2, dest)
}
Self::Or { src1, src2, dest } => {
write!(f, " or {}, {}, {}", src1, src2, dest)
}
Self::Nand { src1, src2, dest } => {
write!(f, " nand {}, {}, {}", src1, src2, dest)
}
Self::Xor { src1, src2, dest } => {
write!(f, " xor {}, {}, {}", src1, src2, dest)
}
Self::Nor { src1, src2, dest } => {
write!(f, " nor {}, {}, {}", src1, src2, dest)
}
Self::Not { src, dest } => {
write!(f, " not {} {}", src, dest)
}
Self::Xnor { src1, src2, dest } => {
write!(f, " xnor {}, {}, {}", src1, src2, dest)
}
Self::IAdd { src, imm, dest } => {
if let Some(d) = dest {
write!(f, " addi {}, {}, {}", src, imm, d)
} else {
write!(f, " addi {}, {}", src, imm)
}
}
Self::ISub { src, imm, dest } => {
if let Some(d) = dest {
write!(f, " subi {}, {}, {}", src, imm, d)
} else {
write!(f, " subi {}, {}", src, imm)
}
}
// shift instructions
Self::Shl {
src1,
r_shamt,
i_shamt,
dest,
} => {
write!(f, " shl {}, {}, {}, {}", src1, r_shamt, i_shamt, dest)
}
Self::Shr {
src1,
r_shamt,
i_shamt,
dest,
} => {
write!(f, " shl {}, {}, {}, {}", src1, r_shamt, i_shamt, dest)
}
// increment instructions
Self::Inc { reg } => write!(f, " inc {}", reg),
Self::Dec { reg } => write!(f, " dec {}", reg),
Self::Cmp { reg1, reg2 } => write!(f, " cmp {}, {}", reg1, reg2),
// jump instructions
Self::Jmp { target } => write!(f, " jmp {}", target),
Self::Jeq { target } => write!(f, " jeq {}", target),
Self::Jne { target } => write!(f, " jne {}", target),
Self::Jgt { target } => write!(f, " jgt {}", target),
Self::Jge { target } => write!(f, " jge {}", target),
Self::Jlt { target } => write!(f, " jlt {}", target),
Self::Jle { target } => write!(f, " jle {}", target),
// stack pseudoinstructions
Self::Push { reg } => write!(f, " push {}", reg),
Self::Pop { reg } => write!(f, " pop {}", reg),
// call & return pseudoinstructions
Self::Call { target } => write!(f, " call {}", target),
Self::Return => write!(f, " return"),
// misc instructions
Self::Int { code } => write!(f, " int {}", code),
Self::Hlt => write!(f, " hlt"),
Self::Nop => write!(f, " nop"),
}
}
}
impl Instruction {
// data directives
pub fn db_string(label: impl Into<String>, data: impl Into<String>) -> Self {
Self::DString {
label: label.into(),
data: data.into(),
}
}
pub fn db_word(label: impl Into<String>, data: u32) -> Self {
Self::Dw {
label: label.into(),
data: vec![data],
}
}
pub fn db_bytes(label: impl Into<String>, data: &[u8]) -> Self {
Self::Db {
label: label.into(),
data: data.to_vec(),
}
}
// Movement
pub fn mov<R1, R2>(src: R1, dest: R2) -> Self
where
R1: Into<Register>,
R2: Into<Register>,
{
Self::Mov {
src: src.into(),
dest: dest.into(),
}
}
// Memory loads
pub fn ldw_reg<R>(base: R, dest: Register) -> Self
where
R: Into<Register>,
{
Self::Ldw {
src: MemOperand::RegIndirect(base.into()),
dest,
}
}
pub fn ldw_reg_offset<R>(base: R, dest: Register, offset: i32) -> Self
where
R: Into<Register>,
{
Self::Ldw {
src: MemOperand::RegOffset(base.into(), offset),
dest,
}
}
pub fn ldw_label(label: impl Into<Label>, dest: Register) -> Self {
Self::Ldw {
src: MemOperand::Label(label.into()),
dest,
}
}
// Memory stores
pub fn stw_reg<R>(src: Register, base: R) -> Self
where
R: Into<Register>,
{
Self::Stw {
src,
dest: MemOperand::RegIndirect(base.into()),
}
}
pub fn stw_reg_offset<R>(src: Register, base: R, offset: i32) -> Self
where
R: Into<Register>,
{
Self::Stw {
src,
dest: MemOperand::RegOffset(base.into(), offset),
}
}
pub fn stw_label(src: Register, label: impl Into<Label>) -> Self {
Self::Stw {
src,
dest: MemOperand::Label(label.into()),
}
}
// Arithmetic
pub fn add(src1: Register, src2: Register, dest: Register) -> Self {
Self::Add { src1, src2, dest }
}
pub fn sub(src1: Register, src2: Register, dest: Register) -> Self {
Self::Sub { src1, src2, dest }
}
pub fn and(src1: Register, src2: Register, dest: Register) -> Self {
Self::And { src1, src2, dest }
}
pub fn or(src1: Register, src2: Register, dest: Register) -> Self {
Self::Or { src1, src2, dest }
}
pub fn xor(src1: Register, src2: Register, dest: Register) -> Self {
Self::Xor { src1, src2, dest }
}
pub fn not(src: Register, dest: Register) -> Self {
Self::Not { src, dest }
}
pub fn shl(src1: Register, r_shamt: Register, i_shamt: u16, dest: Register) -> Self {
Self::Shl {
src1,
r_shamt,
i_shamt,
dest,
}
}
pub fn shr(src1: Register, r_shamt: Register, i_shamt: u16, dest: Register) -> Self {
Self::Shr {
src1,
r_shamt,
i_shamt,
dest,
}
}
pub fn iadd(src: Register, value: i64) -> Self {
let imm = Imm(value.unsigned_abs() as u32);
if value < 0 {
Self::ISub {
src,
imm,
dest: None,
}
} else {
Self::IAdd {
src,
imm,
dest: None,
}
}
}
pub fn iadd_dest(src: Register, value: i32, dest: Register) -> Self {
let imm = Imm(value.unsigned_abs());
if value < 0 {
Self::ISub {
src,
imm,
dest: Some(dest),
}
} else {
Self::IAdd {
src,
imm,
dest: Some(dest),
}
}
}
pub fn inc(reg: Register) -> Self {
Self::Inc { reg }
}
pub fn dec(reg: Register) -> Self {
Self::Dec { reg }
}
// Immediate loads
pub fn lwi(value: u32, dest: Register) -> Self {
if value > 0xFFFF {
Self::Lwi {
imm: Imm(value),
dest,
}
} else {
Self::Lli {
imm: Imm(value),
dest,
}
}
}
pub fn lwi_label(label: impl Into<String>, dest: Register) -> Self {
Self::LwiLabel {
label: label.into(),
dest,
}
}
// Control flow
pub fn label(name: impl Into<String>) -> Self {
Self::Label(Label(name.into()))
}
pub fn jmp(target: impl Into<Label>) -> Self {
Self::Jmp {
target: target.into(),
}
}
pub fn jeq(target: impl Into<Label>) -> Self {
Self::Jeq {
target: target.into(),
}
}
pub fn cmp(reg1: Register, reg2: Register) -> Self {
Self::Cmp { reg1, reg2 }
}
// Stack
pub fn push(reg: Register) -> Self {
Self::Push { reg }
}
pub fn pop(reg: Register) -> Self {
Self::Pop { reg }
}
// Functions
pub fn call(target: impl Into<String>) -> Self {
Self::Call {
target: target.into(),
}
}
pub fn int(code: u8) -> Self {
Self::Int { code }
}
pub fn ret() -> Self {
Self::Return
}
// Utilities
pub fn comment(text: impl Into<String>) -> Self {
Self::Comment {
text: text.into(),
top_level: false,
}
}
pub fn global_comment(text: impl Into<String>) -> Self {
Self::Comment {
text: text.into(),
top_level: true,
}
}
pub fn include(name: impl Into<String>, path: impl Into<String>) -> Self {
Self::Include {
name: name.into(),
path: path.into(),
}
}
}
// Convenience trait for Label conversion
impl From<String> for Label {
fn from(s: String) -> Self {
Label(s)
}
}
impl From<&str> for Label {
fn from(s: &str) -> Self {
Label(s.to_string())
}
}
fn reg_and_offset(op: &MemOperand) -> (String, i32) {
match op {
MemOperand::RegIndirect(reg) => (reg.to_string(), 0),
MemOperand::RegOffset(reg, offset) => (reg.to_string(), *offset),
MemOperand::Label(label) => (label.to_string(), 0),
MemOperand::LabelOffset(label, offset) => (label.to_string(), *offset),
}
}
/// Memory operand for loads/stores
#[derive(Debug, Clone)]
pub enum MemOperand {
/// Register indirect: [reg]
RegIndirect(Register),
/// Register with offset: [reg + offset]
RegOffset(Register, i32),
/// Label: [label]
Label(Label),
/// Label with offset: [label + offset]
LabelOffset(Label, i32),
}
/// Immediate value (16-bit or 32-bit)
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Imm(pub u32);
impl fmt::Display for Imm {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.0)
}
}
/// Label reference
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Label(pub String);
impl fmt::Display for Label {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.0)
}
}
+11
View File
@@ -0,0 +1,11 @@
use crate::model::{CompilerError, Program};
mod codegen;
mod instruction;
mod registers;
mod scope;
pub fn generate_code(ast: &Program) -> Result<String, CompilerError> {
let mut codegen = codegen::CodeGenerator::new(ast.clone());
codegen.generate()
}
+560
View File
@@ -0,0 +1,560 @@
use std::{collections::HashMap, fmt};
use crate::{
backend::dsa::instruction::{InsBlock, Instruction},
model::CompilerError,
};
/// Register allocator for DSA assembly generation
/// Manages general-purpose registers (rg0-rgf) and handles stack spilling
pub struct RegisterAllocator {
/// Available general-purpose registers
/// Maps variable names to their current location (register or stack offset)
variable_locations: HashMap<String, Location>,
/// Maps registers to the variables they currently hold
register_contents: HashMap<Register, String>,
/// Current stack offset for local variables (relative to bpr)
/// Starts at -4 (going downward from base pointer)
stack_offset: i32,
/// Track which registers are currently in use
in_use: Vec<(Register, bool)>,
}
#[derive(Debug, Clone)]
pub struct Location {
register: Option<Register>,
stack: Option<i32>,
}
impl Location {
pub fn stack(offset: i32) -> Self {
Location {
register: None,
stack: Some(offset),
}
}
pub fn register(register: Register) -> Self {
Location {
register: Some(register),
stack: None,
}
}
}
impl RegisterAllocator {
pub fn new() -> Self {
// Initialize with available GP registers (rg0-rgf = 16 registers)
let in_use = vec![
Register::Rg0,
Register::Rg1,
Register::Rg2,
Register::Rg3,
Register::Rg4,
Register::Rg5,
Register::Rg6,
Register::Rg7,
Register::Rg8,
Register::Rg9,
Register::Rga,
Register::Rgb,
Register::Rgc,
Register::Rgd,
Register::Rge,
Register::Rgf,
]
.iter()
.map(|&reg| (reg, false))
.collect();
RegisterAllocator {
// available_registers: registers,
variable_locations: HashMap::new(),
register_contents: HashMap::new(),
stack_offset: -4, // Start at -4 (first local below saved bpr)
in_use,
}
}
/// Allocate a temporary register for expression evaluation
/// Returns the register name and optionally assembly code to save it
pub fn alloc_temp(&mut self) -> Result<(Register, InsBlock), CompilerError> {
// Try to find an unused register
// println!("finding! {:#?}", self.in_use);
if let Some(reg) = self.find_free_register() {
self.in_use[reg as usize].1 = true;
return Ok((reg, InsBlock::new()));
}
// All registers in use - need to spill one
// Choose the first register with a variable we can spill
// Find a register to spill
// let reg_to_spill = self
// .available_registers
// .iter()
// .find(|reg| self.register_contents.contains_key(*reg))
// .cloned();
// if let Some(reg) = reg_to_spill {
// // Spill this variable to stack
// let spill_code = self.spill_register(&reg)?;
// code.extend(spill_code);
// self.in_use.insert(reg.clone(), true);
// return Ok((reg, code));
// }
todo!("an efficient stack spilling algorithm. needs scope awareness.");
Err(CompilerError::Generic(
"All registers are used up yet there are no variables to spill to the stack"
.to_string(),
))
}
// fn set_in_use(&mut self, reg: Register, in_use: bool) {
// self.in_use[reg as usize].1 = in_use;
// }
/// Free a temporary register after use
/// NOTE: This will NOT free registers that contain variables!
/// Variables persist throughout their scope and must not be freed
pub fn free_temp(&mut self, reg: Register) {
// Check if this register contains a variable
if self.register_contents.contains_key(&reg) {
// This register holds a variable - don't free it!
// Variables are only freed when they go out of scope via free_var()
return;
}
// This is a true temporary - safe to free
if !matches!(reg, Register::Zero | Register::Null) {
self.in_use[reg as usize].1 = false;
}
}
pub fn free_var(&mut self, var: &str) {
// Check if this variable is in a register
if let Some(location) = self.variable_locations.get(var).cloned() {
if let Some(reg) = location.register
&& !matches!(reg, Register::Zero | Register::Null)
{
self.register_contents.remove(&reg);
self.in_use[reg as usize].1 = false;
}
self.variable_locations.remove(var);
}
}
/// Allocate a register for a named variable
/// Returns the register and any necessary assembly code
pub fn alloc_var(
&mut self,
var_name: &str,
) -> Result<(Register, InsBlock), CompilerError> {
if let Some(mut location) = self.variable_locations.get(var_name).cloned() {
// if the var is in a register we can use it already.
if let Some(reg) = location.register {
return Ok((reg, InsBlock::new()));
}
// if the variable is on the stack only, we need to get it in a register.
if let Some(offset) = location.stack {
// Variable was pushed, need to calculate actual position and update its
// location.
let (reg, mut code) = self.alloc_temp()?;
// acknowledge var is now in a reg as well.
location.register = Some(reg);
// Load from bpr + offset (offset is negative)
// code.push(format!("\tsubi bpr {} {}", -(offset + 4), reg));
code.push(Instruction::ldw_reg_offset(
Register::Spr,
reg,
offset - self.stack_offset,
));
// Update location to register
self.variable_locations
.insert(var_name.to_string(), location);
self.register_contents.insert(reg, var_name.to_string());
return Ok((reg, code));
}
}
// Variable doesn't have a location yet, allocate a new register
let (reg, code) = self.alloc_temp()?;
self.variable_locations
.insert(var_name.to_string(), Location::register(reg));
self.register_contents.insert(reg, var_name.to_string());
Ok((reg, code))
}
/// Get the current location of a variable
pub fn _get_var_location(&self, var_name: &str) -> Option<&Location> {
self.variable_locations.get(var_name)
}
/// Load a variable into a register (allocating if necessary)
/// Returns the register and assembly code to load it
pub fn load_var(
&mut self,
var_name: &str,
) -> Result<(Register, InsBlock), CompilerError> {
self.alloc_var(var_name)
}
/// Store a value from a register into a variable
/// Updates tracking and returns any necessary assembly code
pub fn store_var(&mut self, var_name: &str, source_reg: &Register) -> InsBlock {
let mut block = InsBlock::new();
// Check if variable already has a location
if let Some(location) = self.variable_locations.get(var_name) {
// if the variable exists in a register we write to that.
match location.register {
Some(reg) if reg == *source_reg => {
block.push(Instruction::mov(*source_reg, reg));
return block;
}
_ => (),
}
// if the variable exists on the stack but not a register we write here.
if let Some(offset) = location.stack {
block.push(Instruction::stw_reg_offset(
*source_reg,
Register::Spr,
offset - self.stack_offset,
));
return block;
}
}
// Variable doesn't exist yet, we can just use the same reg.
// if we can avoid a move, absolutely do that.
// if this is true then there's no permanent variable here so it's safe to use.
if !self.register_contents.contains_key(source_reg) {
self.variable_locations
.insert(var_name.to_string(), Location::register(*source_reg));
self.register_contents
.insert(*source_reg, var_name.to_string());
self.in_use[*source_reg as usize].1 = true;
return block;
}
// if current register isn't free, (eg is another variable) we assign somewhere
// else.
if let Some(free_reg) = self.find_free_register() {
self.variable_locations
.insert(var_name.to_string(), Location::register(free_reg));
self.register_contents
.insert(free_reg, var_name.to_string());
self.in_use[free_reg as usize].1 = true;
block.push(Instruction::mov(*source_reg, free_reg));
return block;
}
// No free registers - allocate on stack
// code.push(format!("\tstw {}, bpr, {}", source_reg, self.stack_offset));
// self.variable_locations
// .insert(var_name.to_string(), Location::Stack(self.stack_offset));
// self.stack_offset -= 4; // Move to next stack slot
//
todo!("an efficient stack spilling algorithm. needs scope awareness.");
}
/// spill a register to the stack (WITHOUT FREEING)
/// DO NOT USE this if it's for a pointer!!!!
pub fn _spill_register(&mut self, reg: &Register) -> Result<InsBlock, CompilerError> {
let mut code = InsBlock::new();
// check if the variable is declared.
if let Some(var_name) = self.register_contents.get(reg).cloned()
&& let Some(location) = self.variable_locations.get_mut(&var_name)
{
// check if var is on the stack
if let Some(offset) = location.stack {
code.push(Instruction::stw_reg_offset(
*reg,
Register::Spr,
offset - self.stack_offset,
));
return Ok(code);
}
// Track that we pushed one word
self.stack_offset -= 4;
// if the variable is not on the stack:
// push register to stack (spr decrements automatically)
let offset = self.stack_offset;
code.push(Instruction::push(*reg));
// Update variable location - it's now at current spr
// Note: We track offset from bpr for consistency
location.stack = Some(offset);
Ok(code)
} else {
Err(CompilerError::Generic(format!(
"Register {} does not contain a variable to spill!",
reg
)))
}
}
/// free a register by spilling it to the stack.
/// Returns assembly code to perform the spill
pub fn free_register(
&mut self,
reg: &Register,
) -> Result<(i32, Instruction), CompilerError> {
// check if the variable is declared.
if let Some(var_name) = self.register_contents.get(reg).cloned()
&& let Some(location) = self.variable_locations.get_mut(&var_name)
{
// check if var name is on the stack
if let Some(offset) = location.stack {
// store current register value in stack location
let code = Instruction::stw_reg_offset(
*reg,
Register::Spr,
offset - self.stack_offset,
);
// free the register.
location.register = None;
self.register_contents.remove(reg);
return Ok((offset, code));
}
// Track that we pushed one word
self.stack_offset -= 4;
let offset = self.stack_offset;
let code = Instruction::push(*reg);
// Update variable location
// Note: We track offset from bpr for consistency
location.stack = Some(offset);
location.register = None;
self.register_contents.remove(reg);
Ok((offset, code))
} else {
Err(CompilerError::Generic(format!(
"Register {} does not contain a variable to spill!",
reg
)))
}
}
/// Find a free register (not currently in use)
fn find_free_register(&self) -> Option<Register> {
self.in_use
.iter()
.filter(|(_, in_use)| !*in_use)
.map(|(reg, _)| *reg)
.next()
}
/// Spill all registers to stack (useful before function calls)
pub fn _spill_all(&mut self) -> InsBlock {
let mut code = InsBlock::new();
let regs_to_spill: Vec<Register> =
self.register_contents.keys().cloned().collect();
for reg in regs_to_spill {
if let Ok(spill_code) = self.free_register(&reg) {
code.push(spill_code.1);
}
}
code
}
/// Get the total stack offset
pub fn get_stack_offset(&self) -> i32 {
self.stack_offset
}
/// Get the total stack space needed for local variables
pub fn _get_stack_size(&self) -> i32 {
-self.stack_offset // Convert negative offset to positive size
}
/// Reset allocator for a new function
pub fn reset(&mut self) {
self.variable_locations.clear();
self.register_contents.clear();
self.stack_offset = -4;
self.in_use = vec![
Register::Rg0,
Register::Rg1,
Register::Rg2,
Register::Rg3,
Register::Rg4,
Register::Rg5,
Register::Rg6,
Register::Rg7,
Register::Rg8,
Register::Rg9,
Register::Rga,
Register::Rgb,
Register::Rgc,
Register::Rgd,
Register::Rge,
Register::Rgf,
]
.iter()
.map(|&reg| (reg, false))
.collect();
}
/// Get list of registers that contain variables and are in use
/// These need to be saved before function calls
pub fn get_caller_saved_registers(&self) -> Vec<Register> {
self.register_contents
.iter()
.filter(|(reg, _)| {
self.in_use
.get(**reg as usize)
.unwrap_or(&(Register::Null, false))
.1
})
.map(|(reg, _)| *reg)
.collect()
}
}
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum Register {
// general purpose
Rg0 = 0,
Rg1 = 1,
Rg2 = 2,
Rg3 = 3,
Rg4 = 4,
Rg5 = 5,
Rg6 = 6,
Rg7 = 7,
Rg8 = 8,
Rg9 = 9,
Rga = 10,
Rgb = 11,
Rgc = 12,
Rgd = 13,
Rge = 14,
Rgf = 15,
// special
Bpr,
Spr,
Ret,
Acc,
// read only
Pcx,
Zero,
// null
Null,
}
impl Register {
pub fn get_gp() -> [Register; 16] {
[
Register::Rg0,
Register::Rg1,
Register::Rg2,
Register::Rg3,
Register::Rg4,
Register::Rg5,
Register::Rg6,
Register::Rg7,
Register::Rg8,
Register::Rg9,
Register::Rga,
Register::Rgb,
Register::Rgc,
Register::Rgd,
Register::Rge,
Register::Rgf,
]
}
pub fn is_gp(&self) -> bool {
(*self as u8) < 16
}
pub fn from_index(idx: usize) -> Register {
match idx {
0 => Register::Rg0,
1 => Register::Rg1,
2 => Register::Rg2,
3 => Register::Rg3,
4 => Register::Rg4,
5 => Register::Rg5,
6 => Register::Rg6,
7 => Register::Rg7,
8 => Register::Rg8,
9 => Register::Rg9,
10 => Register::Rga,
11 => Register::Rgb,
12 => Register::Rgc,
13 => Register::Rgd,
14 => Register::Rge,
15 => Register::Rgf,
_ => unreachable!("this function shouldn't ever be called with idx>15"),
}
}
}
impl fmt::Display for Register {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Rg0 => write!(f, "rg0"),
Self::Rg1 => write!(f, "rg1"),
Self::Rg2 => write!(f, "rg2"),
Self::Rg3 => write!(f, "rg3"),
Self::Rg4 => write!(f, "rg4"),
Self::Rg5 => write!(f, "rg5"),
Self::Rg6 => write!(f, "rg6"),
Self::Rg7 => write!(f, "rg7"),
Self::Rg8 => write!(f, "rg8"),
Self::Rg9 => write!(f, "rg9"),
Self::Rga => write!(f, "rga"),
Self::Rgb => write!(f, "rgb"),
Self::Rgc => write!(f, "rgc"),
Self::Rgd => write!(f, "rgd"),
Self::Rge => write!(f, "rge"),
Self::Rgf => write!(f, "rgf"),
Self::Acc => write!(f, "acc"),
Self::Ret => write!(f, "ret"),
Self::Bpr => write!(f, "bpr"),
Self::Spr => write!(f, "spr"),
Self::Zero => write!(f, "zero"),
Self::Pcx => write!(f, "pcx"),
Self::Null => write!(f, "null"),
}
}
}
+394
View File
@@ -0,0 +1,394 @@
use std::{cell::RefCell, collections::HashMap, ops::Deref, rc::Rc};
use crate::{
backend::dsa::{
instruction::{InsBlock, Instruction},
registers::{Register, RegisterAllocator},
},
error,
model::{CompilerError, Name, TypeId},
};
/// scope object
pub struct Scope<'a> {
/// outer scope, for a function this will be the global scope.
parent: Option<&'a mut Scope<'a>>,
alloc: Rc<RefCell<Allocator>>,
/// is the scope a function body or just a loop?
/// depending on the type, ending a scope will have different behaviour
r#type: ScopeType,
/// variables
variables: HashMap<String, Variable>,
entry_stack_offset: i32,
}
impl<'a> Scope<'a> {
pub fn new() -> Scope<'a> {
let alloc = Rc::new(RefCell::new(Allocator::new()));
let entry_stack_offset = alloc.borrow().get_stack_offset();
Self {
alloc,
entry_stack_offset,
parent: None,
r#type: ScopeType::Function,
variables: HashMap::new(),
}
}
pub fn new_from(parent: &'a mut Scope<'a>, r#type: ScopeType) -> Scope<'a> {
let alloc = Rc::clone(&parent.alloc);
let entry_stack_offset = alloc.borrow().get_stack_offset();
Self {
alloc,
entry_stack_offset,
parent: Some(parent),
r#type,
variables: HashMap::new(),
}
}
pub fn create_var(
&mut self,
name: String,
r#type: TypeId,
) -> Result<(), CompilerError> {
let mut var = Variable::new(name, r#type.clone());
if r#type.size() > 4 {
let slot = self.alloc.borrow_mut().allocate_stack_slot(r#type.size());
var.stack_slot = Some(slot);
} else {
let reg = self.alloc.borrow_mut().allocate_var()?;
var.register = Some(reg);
}
self.variables.insert(var.name.clone(), var);
Ok(())
}
pub fn alloc_temp(&mut self) -> Result<TempReg, CompilerError> {
self.alloc.borrow_mut().allocate_temp()
}
pub fn free_temp(&mut self, temp: &TempReg) {
self.alloc.borrow_mut().free_temp(temp)
}
pub fn free_var(&mut self, reg: &AssignedReg) {
self.alloc.borrow_mut().free_var(reg);
}
pub fn close(&mut self) {
// tell the allocator that this scope is closing
// this reverts the stack offset to what it was before this scope was created.
self.alloc.clone().borrow_mut().destroy_scope(self);
for var in self.variables.clone().values() {
if let Some(reg) = var.register {
self.free_var(&reg);
}
}
}
fn get_var(&mut self, var: &str) -> Option<&mut Variable> {
self.variables.get_mut(var)
}
pub fn offset_read(
&mut self,
var: &str,
offset: i32,
) -> Result<(TempReg, Instruction), CompilerError> {
if let Some(var) = self.get_var(var) {
let slot = var.stack_slot.ok_or_else(|| {
error("Attempt to read from a var without a stack slot!")
})?;
return self.alloc.borrow_mut().offset_read(&slot, offset);
}
Err(CompilerError::Undefined(Name::new(var, None)))
}
pub fn offset_write(
&mut self,
reg: &TempReg,
var: &str,
offset: i32,
) -> Result<Instruction, CompilerError> {
if let Some(var) = self.get_var(var) {
let slot = var.stack_slot.ok_or_else(|| {
error("Attempt to write to a var without a stack slot!")
})?;
return Ok(self.alloc.borrow_mut().offset_write(reg, &slot, offset));
}
Err(CompilerError::Undefined(Name::new(var, None)))
}
pub fn load_var(
&mut self,
var: &str,
) -> Result<(AssignedReg, Instruction), CompilerError> {
if let Some(v) = self.get_var(var).cloned()
&& let Some(slot) = v.stack_slot
{
let res = self.alloc.borrow_mut().load_var(&slot)?;
self.get_var(var).unwrap().register = Some(res.0);
return Ok(res);
}
panic!("e")
}
pub fn spill_var(&mut self, var: &str) -> Result<Instruction, CompilerError> {
if let Some(v) = self.get_var(var).cloned()
&& let Some(rg) = v.register
{
let mut slot = v.stack_slot;
let res = self.alloc.borrow_mut().spill_var(&rg, &mut slot);
self.get_var(var).unwrap().stack_slot = slot;
return res;
}
Err(CompilerError::Undefined(Name::new(var, None)))
}
}
impl Drop for Scope<'_> {
fn drop(&mut self) {
self.close()
}
}
#[derive(PartialEq, Copy, Clone, Debug)]
pub enum ScopeType {
Function,
IfBlock,
LoopBlock,
}
#[derive(Clone)]
pub struct Variable {
pub name: String,
/// the type of the variable.
r#type: TypeId,
/// size taken up in bytes.
/// if size > 4, value must be stored on the stack.
pub size: usize,
pub stack_slot: Option<StackSlot>,
pub register: Option<AssignedReg>,
}
impl Variable {
pub fn new(name: String, r#type: TypeId) -> Self {
Self {
name,
size: r#type.size(),
r#type,
stack_slot: None,
register: None,
}
}
}
pub struct Allocator {
stack_offset: i32,
in_use: [(Register, bool); 16],
}
impl Allocator {
pub fn new() -> Self {
let mut in_use = [(Register::Null, false); 16];
in_use.copy_from_slice(&Register::get_gp().map(|r| (r, false))[0..16]);
Self {
stack_offset: 0,
in_use,
}
}
pub fn get_stack_offset(&self) -> i32 {
self.stack_offset
}
pub fn destroy_scope(&mut self, scope: &mut Scope) {
self.stack_offset = scope.entry_stack_offset;
for var in scope.variables.drain() {
if let Some(assigned) = var.1.register {
self.free_var(&assigned);
}
}
}
// what we need:
// - create var in register from temporary register. free temp and use it.
//
// - create var on stack from struct/array literal. return stack offset to write to.
//
// - spill var from register to stack. return stack offset to write to.
//
// - read/write var from stack+offset into register to use while preserving the stack
// slot.
//
// - read / write bytes from the stack+offset in a larger variable into a register.
pub fn offset_read(
&mut self,
slot: &StackSlot,
offset: i32,
) -> Result<(TempReg, Instruction), CompilerError> {
let register = self.allocate_temp()?;
// instruction: reg = *(&var + offset)
Ok((
register.clone(),
Instruction::ldw_reg_offset(
Register::Spr,
*register,
(**slot + offset) - self.stack_offset,
),
))
}
pub fn offset_write(
&mut self,
reg: &TempReg,
slot: &StackSlot,
offset: i32,
) -> Instruction {
// instruction: *(&var + offset) = reg
Instruction::stw_reg_offset(
**reg,
Register::Spr,
(**slot + offset) - self.stack_offset,
)
}
pub fn load_var(
&mut self,
slot: &StackSlot,
) -> Result<(AssignedReg, Instruction), CompilerError> {
let reg = self.allocate_var()?;
Ok((
reg.clone(),
Instruction::ldw_reg_offset(Register::Spr, *reg, **slot - self.stack_offset),
))
}
pub fn spill_var(
&mut self,
reg: &AssignedReg,
slot: &mut Option<StackSlot>,
// var: &mut Variable,
) -> Result<Instruction, CompilerError> {
if let Some(slot) = &slot {
let block = Instruction::stw_reg_offset(
**reg,
Register::Spr,
**slot - self.stack_offset,
);
self.free_var(reg);
return Ok(block);
}
// var doesn't have a stack slot so we need to create one
let new_slot = self.allocate_stack_slot(4); // alloc 4 bytes for reg value.
let block = Instruction::push(**reg);
self.free_var(reg);
*slot = Some(new_slot);
Ok(block)
}
pub fn allocate_stack_slot(&mut self, size: usize) -> StackSlot {
self.stack_offset -= size as i32;
let offset = self.stack_offset;
StackSlot(offset)
}
pub fn allocate_var(&mut self) -> Result<AssignedReg, CompilerError> {
if let Some(reg) = self.find_free_register() {
Ok(AssignedReg(reg))
} else {
Err(CompilerError::Generic(
"No free registers available".to_string(),
))
}
}
pub fn allocate_temp(&mut self) -> Result<TempReg, CompilerError> {
// allocates a temporary register
if let Some(reg) = self.find_free_register() {
Ok(TempReg(reg))
} else {
todo!("an efficient stack spilling algorithm. needs scope awareness.");
}
}
pub fn free_temp(&mut self, temp: &TempReg) {
// frees a temporary register.
self.in_use[**temp as usize].1 = false;
}
pub fn free_var(&mut self, reg: &AssignedReg) {
// frees a register.
self.in_use[**reg as usize].1 = false;
}
// if we have register(s) free, return the first one.
fn find_free_register(&mut self) -> Option<Register> {
self.in_use.iter_mut().find_map(|(reg, used)| {
if !*used {
*used = true;
Some(*reg)
} else {
None
}
})
}
}
#[derive(Clone, Copy, Debug)]
pub struct TempReg(Register);
#[derive(Clone, Copy, Debug)]
pub struct AssignedReg(Register);
#[derive(Clone, Copy, Debug)]
pub struct StackSlot(i32);
impl Deref for TempReg {
type Target = Register;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl Deref for AssignedReg {
type Target = Register;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl Deref for StackSlot {
type Target = i32;
fn deref(&self) -> &Self::Target {
&self.0
}
}
+13
View File
@@ -0,0 +1,13 @@
use crate::model::{CompilerError, Program};
mod dsa;
pub fn compiler_backend(ext: &str, ast: &Program) -> Result<String, CompilerError> {
match ext {
"dsa" => Ok(dsa::generate_code(ast)?),
_ => Err(CompilerError::Generic(format!(
"File type {} not supported",
ext
))),
}
}
+336
View File
@@ -0,0 +1,336 @@
// ============================================================================
// Token Types
// ============================================================================
#[derive(Debug, Clone, PartialEq)]
pub enum TokenType {
// Keywords
Int,
If,
Else,
While,
Return,
Include,
// Identifiers and literals
Identifier(String),
Number(i32),
String(String),
Char(char),
// Operators
Plus,
Minus,
Star,
Slash,
Assign,
Eq,
Ne,
Lt,
Gt,
Le,
Ge,
// Delimiters
LParen,
RParen,
LBrace,
RBrace,
Semicolon,
Comma,
Colon,
Namespace,
Eof,
}
#[allow(unused)]
pub enum Type {
Int32,
Int16,
Int8,
Uint32,
Uint16,
Uint8,
Char,
}
#[derive(Debug, Clone)]
pub struct Token {
pub token_type: TokenType,
pub line: usize,
pub col: usize,
}
impl Token {
pub fn new(token_type: TokenType, line: usize, col: usize) -> Self {
Self {
token_type,
line,
col,
}
}
}
// ============================================================================
// Lexer
// ============================================================================
pub struct Lexer {
source: Vec<char>,
pos: usize,
line: usize,
col: usize,
}
impl Lexer {
pub fn new(source: &str) -> Self {
Self {
source: source.chars().collect(),
pos: 0,
line: 1,
col: 1,
}
}
fn error(&self, msg: &str) -> String {
format!(
"Lexer error at line {}, col {}: {}",
self.line, self.col, msg
)
}
fn peek(&self, offset: usize) -> Option<char> {
self.source.get(self.pos + offset).copied()
}
fn advance(&mut self) -> Option<char> {
if self.pos >= self.source.len() {
return None;
}
let ch = self.source[self.pos];
self.pos += 1;
if ch == '\n' {
self.line += 1;
self.col = 1;
} else {
self.col += 1;
}
Some(ch)
}
fn skip_whitespace(&mut self) {
while let Some(ch) = self.peek(0) {
if ch.is_whitespace() {
self.advance();
} else {
break;
}
}
}
fn skip_comment(&mut self) {
if self.peek(0) == Some('/') && self.peek(1) == Some('/') {
while let Some(ch) = self.peek(0) {
if ch == '\n' {
break;
}
self.advance();
}
}
}
fn read_number(&mut self) -> i32 {
let mut num_str = String::new();
while let Some(ch) = self.peek(0) {
if ch.is_ascii_digit() {
num_str.push(ch);
self.advance();
} else {
break;
}
}
num_str.parse().unwrap_or(0)
}
fn read_identifier(&mut self) -> String {
let mut ident = String::new();
while let Some(ch) = self.peek(0) {
if ch.is_alphanumeric() || ch == '_' {
ident.push(ch);
self.advance();
} else {
break;
}
}
ident
}
fn read_string(&mut self) -> Result<String, String> {
let mut string = String::new();
self.advance(); // Consume the opening quote
while let Some(ch) = self.peek(0) {
if ch == '"' {
self.advance(); // Consume the closing quote
return Ok(string);
} else if ch == '\\' {
self.advance(); // Consume the backslash
if let Some(escaped_char) = self.peek(0) {
string.push(escaped_char);
self.advance();
}
} else {
string.push(ch);
self.advance();
}
}
Err(String::from("Unexpected EOF"))
}
fn read_char(&mut self) -> Result<char, String> {
self.advance(); // Consume the opening quote
if let Some(ch) = self.peek(0) {
self.advance();
if self.peek(0) == Some('\'') {
self.advance();
return Ok(ch);
} else {
Err(String::from("expected closing quote"))
}
} else {
Err(String::from("expected character"))
}
}
pub fn tokenize(&mut self) -> Result<Vec<Token>, String> {
let mut tokens = Vec::new();
loop {
self.skip_whitespace();
self.skip_comment();
if self.pos >= self.source.len() {
break;
}
let line = self.line;
let col = self.col;
let ch = self.peek(0).unwrap();
let token_type = if ch.is_ascii_digit() {
let num = self.read_number();
TokenType::Number(num)
} else if ch == '"' {
let string = self.read_string()?;
TokenType::String(string)
} else if ch == '\'' {
let char = self.read_char()?;
TokenType::Char(char)
} else if ch.is_alphabetic() || ch == '_' {
let ident = self.read_identifier();
match ident.as_str() {
"int" => TokenType::Int,
"if" => TokenType::If,
"else" => TokenType::Else,
"while" => TokenType::While,
"return" => TokenType::Return,
"include" => TokenType::Include,
_ => TokenType::Identifier(ident),
}
} else {
match ch {
':' if self.peek(1) == Some(':') => {
self.advance();
self.advance();
TokenType::Namespace
}
':' => {
self.advance();
TokenType::Colon
}
'=' if self.peek(1) == Some('=') => {
self.advance();
self.advance();
TokenType::Eq
}
'!' if self.peek(1) == Some('=') => {
self.advance();
self.advance();
TokenType::Ne
}
'<' if self.peek(1) == Some('=') => {
self.advance();
self.advance();
TokenType::Le
}
'>' if self.peek(1) == Some('=') => {
self.advance();
self.advance();
TokenType::Ge
}
'+' => {
self.advance();
TokenType::Plus
}
'-' => {
self.advance();
TokenType::Minus
}
'*' => {
self.advance();
TokenType::Star
}
'/' => {
self.advance();
TokenType::Slash
}
'=' => {
self.advance();
TokenType::Assign
}
'<' => {
self.advance();
TokenType::Lt
}
'>' => {
self.advance();
TokenType::Gt
}
'(' => {
self.advance();
TokenType::LParen
}
')' => {
self.advance();
TokenType::RParen
}
'{' => {
self.advance();
TokenType::LBrace
}
'}' => {
self.advance();
TokenType::RBrace
}
';' => {
self.advance();
TokenType::Semicolon
}
',' => {
self.advance();
TokenType::Comma
}
_ => return Err(self.error(&format!("Unexpected character: {}", ch))),
}
};
tokens.push(Token::new(token_type, line, col));
}
tokens.push(Token::new(TokenType::Eof, self.line, self.col));
Ok(tokens)
}
}
+25
View File
@@ -0,0 +1,25 @@
use common::logging::log;
use crate::model::{CompilerError, Program};
use parser::Parser;
pub mod lexer;
pub mod parser;
pub fn generate_ast(input: &str) -> Result<Program, CompilerError> {
log("Tokenising Input...");
let mut lexer = lexer::Lexer::new(&input);
let tokens = lexer.tokenize().map_err(|e| CompilerError::Generic(e))?;
// println!("{tokens:?}");
log(&format!("Parsing {} Tokens...", tokens.len()));
let mut parser = Parser::new(tokens);
let ast = match parser.parse() {
Ok(ast) => ast,
Err(e) => return Err(CompilerError::Generic(e)),
};
Ok(ast)
}
+482
View File
@@ -0,0 +1,482 @@
// ============================================================================
// AST Node Types
// ============================================================================
use crate::model::{
BinaryOperator, Block, ConstExpr, Declaration, Dependency, Expression, Name, Program,
Statement, TypeId, UnaryOperator, Variable,
};
use super::lexer::{Token, TokenType};
// ============================================================================
// Parser
// ============================================================================
pub struct Parser {
tokens: Vec<Token>,
pos: usize,
}
impl Parser {
pub fn new(tokens: Vec<Token>) -> Self {
Self { tokens, pos: 0 }
}
fn error(&self, msg: &str) -> String {
let token = self.current();
format!(
"Parser error at line {}, col {}: {}",
token.line, token.col, msg
)
}
fn current(&self) -> &Token {
self.tokens
.get(self.pos)
.unwrap_or_else(|| self.tokens.last().unwrap())
}
fn peek(&self, offset: usize) -> &Token {
self.tokens
.get(self.pos + offset)
.unwrap_or_else(|| self.tokens.last().unwrap())
}
fn advance(&mut self) -> &Token {
if self.pos < self.tokens.len() - 1 {
self.pos += 1;
}
self.current()
}
fn expect(&mut self, expected: TokenType) -> Result<Token, String> {
let token = self.current().clone();
if std::mem::discriminant(&token.token_type) != std::mem::discriminant(&expected)
{
return Err(self.error(&format!(
"Expected {:?}, got {:?}",
expected, token.token_type
)));
}
self.advance();
Ok(token)
}
pub fn parse(&mut self) -> Result<Program, String> {
let mut declarations = Vec::new();
while !matches!(self.current().token_type, TokenType::Eof) {
declarations.push(self.parse_declaration()?);
}
Ok(Program { declarations })
}
fn parse_declaration(&mut self) -> Result<Declaration, String> {
// check for an import
if let TokenType::Include = self.current().token_type {
self.advance();
let name =
if let TokenType::Identifier(id) = self.current().clone().token_type {
Some(id)
} else {
None
}
.ok_or(String::from("Expected identifier"))?;
self.advance();
self.expect(TokenType::Colon)?;
let path = if let TokenType::String(id) = self.current().clone().token_type {
Some(id)
} else {
None
}
.ok_or(String::from("Expected string literal"))?;
self.advance();
return Ok(Declaration::Dependency(Dependency { name, path }));
}
self.expect(TokenType::Int)?;
let name = match &self.current().token_type {
TokenType::Identifier(s) => s.clone(),
_ => return Err(self.error("Expected identifier")),
};
self.advance();
match &self.current().token_type {
TokenType::LParen => {
// Function declaration
self.advance();
let mut params = Vec::<Variable>::new();
if !matches!(self.current().token_type, TokenType::RParen) {
self.expect(TokenType::Int)?;
match &self.current().token_type {
TokenType::Identifier(s) => {
params.push(Variable {
name: s.clone(),
type_id: TypeId::U32,
});
self.advance();
}
_ => return Err(self.error("Expected parameter name")),
}
while matches!(self.current().token_type, TokenType::Comma) {
self.advance();
self.expect(TokenType::Int)?;
match &self.current().token_type {
TokenType::Identifier(s) => {
params.push(Variable {
name: s.clone(),
type_id: TypeId::U32,
});
self.advance();
}
_ => return Err(self.error("Expected parameter name")),
}
}
}
self.expect(TokenType::RParen)?;
let body = self.parse_block()?;
Ok(Declaration::Function {
name,
params,
body,
return_type: TypeId::U32,
})
}
_ => {
// Variable declaration
let init = if matches!(self.current().token_type, TokenType::Assign) {
self.advance();
if let TokenType::Number(n) = self.current().token_type {
self.advance();
Some(ConstExpr::Number(n))
} else {
return Err(self
.error("Expected constant in global variable declaration"));
}
} else {
None
};
self.expect(TokenType::Semicolon)?;
Ok(Declaration::Variable {
var: Variable {
name,
type_id: TypeId::U32,
},
init,
is_const: false,
})
}
}
}
fn parse_block(&mut self) -> Result<Block, String> {
self.expect(TokenType::LBrace)?;
let mut statements = Vec::new();
while !matches!(self.current().token_type, TokenType::RBrace) {
statements.push(self.parse_statement()?);
}
self.expect(TokenType::RBrace)?;
Ok(statements)
}
fn parse_statement(&mut self) -> Result<Statement, String> {
match &self.current().token_type {
TokenType::LBrace => Ok(Statement::Block(self.parse_block()?)),
TokenType::If => self.parse_if_stmt(),
TokenType::While => self.parse_while_stmt(),
TokenType::Return => self.parse_return_stmt(),
TokenType::Identifier(name) => {
let name = name.clone();
// peek ahead for open paren (func call expr)
if matches!(self.peek(1).token_type, TokenType::LParen) {
let expr = self.parse_expression()?; // a function call expr
self.expect(TokenType::Semicolon)?;
return Ok(Statement::Expression { expr });
}
self.advance(); // advance past identifier
// assignment expression
if matches!(self.current().token_type, TokenType::Assign) {
self.advance();
let expr = self.parse_expression()?;
self.expect(TokenType::Semicolon)?;
Ok(Statement::Assign {
varname: name,
value: expr,
})
}
// var expression
else {
self.expect(TokenType::Semicolon)?;
Ok(Statement::Expression {
expr: Expression::Variable {
name: Name {
name,
namespace: None,
},
expr_type: None,
},
})
}
}
TokenType::Int => {
// Local variable declaration
self.advance();
let name = match &self.current().token_type {
TokenType::Identifier(s) => s.clone(),
_ => return Err(self.error("Expected variable name")),
};
self.advance();
let init = if matches!(self.current().token_type, TokenType::Assign) {
self.advance();
Some(self.parse_expression()?)
} else {
None
};
self.expect(TokenType::Semicolon)?;
// Convert to assignment expression statement
let expr = if let Some(init_expr) = init {
Statement::Assign {
varname: name,
value: init_expr,
}
} else {
Statement::Assign {
varname: name,
value: Expression::Empty,
}
};
Ok(expr)
}
_ => {
let expr = if matches!(self.current().token_type, TokenType::Semicolon) {
Expression::Empty
} else {
self.parse_expression()?
};
self.expect(TokenType::Semicolon)?;
Ok(Statement::Expression { expr })
}
}
}
fn parse_if_stmt(&mut self) -> Result<Statement, String> {
self.expect(TokenType::If)?;
self.expect(TokenType::LParen)?;
let condition = self.parse_expression()?;
self.expect(TokenType::RParen)?;
let then_stmt = self.parse_block()?;
let else_stmt = if matches!(self.current().token_type, TokenType::Else) {
self.advance();
self.parse_block()?
} else {
Vec::new()
};
Ok(Statement::If {
condition,
then_stmt,
else_stmt,
})
}
fn parse_while_stmt(&mut self) -> Result<Statement, String> {
self.expect(TokenType::While)?;
self.expect(TokenType::LParen)?;
let condition = self.parse_expression()?;
self.expect(TokenType::RParen)?;
let body = self.parse_block()?;
Ok(Statement::While { condition, body })
}
fn parse_return_stmt(&mut self) -> Result<Statement, String> {
self.expect(TokenType::Return)?;
let expr = if matches!(self.current().token_type, TokenType::Semicolon) {
None
} else {
Some(self.parse_expression()?)
};
self.expect(TokenType::Semicolon)?;
Ok(Statement::Return(expr))
}
fn parse_expression(&mut self) -> Result<Expression, String> {
self.parse_comparison()
}
fn parse_comparison(&mut self) -> Result<Expression, String> {
let mut expr = self.parse_additive()?;
while let Some(op) = match &self.current().token_type {
TokenType::Eq => Some(BinaryOperator::Eq),
TokenType::Ne => Some(BinaryOperator::Ne),
TokenType::Lt => Some(BinaryOperator::Lt),
TokenType::Gt => Some(BinaryOperator::Gt),
TokenType::Le => Some(BinaryOperator::Le),
TokenType::Ge => Some(BinaryOperator::Ge),
_ => None,
} {
self.advance();
let right = Box::new(self.parse_additive()?);
expr = Expression::Binary {
op,
left: Box::new(expr),
right,
type_id: None,
};
}
Ok(expr)
}
fn parse_additive(&mut self) -> Result<Expression, String> {
let mut expr = self.parse_multiplicative()?;
while let Some(op) = match &self.current().token_type {
TokenType::Plus => Some(BinaryOperator::Add),
TokenType::Minus => Some(BinaryOperator::Sub),
_ => None,
} {
self.advance();
let right = Box::new(self.parse_multiplicative()?);
expr = Expression::Binary {
op,
left: Box::new(expr),
right,
type_id: None,
};
}
Ok(expr)
}
fn parse_multiplicative(&mut self) -> Result<Expression, String> {
let mut expr = self.parse_unary()?;
while let Some(op) = match &self.current().token_type {
TokenType::Star => Some(BinaryOperator::Mul),
TokenType::Slash => Some(BinaryOperator::Div),
_ => None,
} {
self.advance();
let right = Box::new(self.parse_unary()?);
expr = Expression::Binary {
op,
left: Box::new(expr),
right,
type_id: None,
};
}
Ok(expr)
}
fn parse_unary(&mut self) -> Result<Expression, String> {
let op = match &self.current().token_type {
TokenType::Plus => Some(UnaryOperator::Plus),
TokenType::Minus => Some(UnaryOperator::Minus),
_ => None,
};
if let Some(op) = op {
self.advance();
let operand = Box::new(self.parse_unary()?);
return Ok(Expression::Unary {
op,
operand,
type_id: None,
});
}
self.parse_primary()
}
fn parse_primary(&mut self) -> Result<Expression, String> {
match &self.current().token_type.clone() {
TokenType::Number(n) => {
let value = *n;
self.advance();
Ok(Expression::Number {
value: value as isize,
type_id: None,
})
}
TokenType::Identifier(name) => {
let name = name.clone();
self.advance();
if matches!(self.current().token_type, TokenType::LParen) {
// Function call
self.advance();
let mut args = Vec::new();
if !matches!(self.current().token_type, TokenType::RParen) {
args.push(self.parse_expression()?);
while matches!(self.current().token_type, TokenType::Comma) {
self.advance();
args.push(self.parse_expression()?);
}
}
self.expect(TokenType::RParen)?;
Ok(Expression::Call {
name: Name {
name,
namespace: None,
},
args,
type_id: None,
})
} else {
Ok(Expression::Variable {
name: Name {
name,
namespace: None,
},
expr_type: None,
})
}
}
TokenType::LParen => {
self.advance();
let expr = self.parse_expression()?;
self.expect(TokenType::RParen)?;
Ok(expr)
}
_ => Err(self.error(&format!(
"Unexpected token: {:?}",
self.current().token_type
))),
}
}
}
File diff suppressed because it is too large Load Diff
+38
View File
@@ -0,0 +1,38 @@
use crate::model::{CompilerError, Program};
use common::logging::Logger;
use parser::{ParseResult, Parser};
// use semantic_analyser::Analyser;
pub mod lexer;
pub mod parser;
// pub mod semantic_analyser;
pub fn generate_ast(input: &str, logger: &Logger) -> Result<Program, CompilerError> {
logger.info("Tokenising Input...");
let lexer = lexer::Lexer::new(&input);
let tokens = lexer.collect::<Vec<_>>();
// println!("{tokens:#?}");
logger.info(&format!("Parsing {} Tokens...", tokens.len()));
let mut parser = Parser::new(tokens);
let ast = match parser.parse() {
ParseResult::Accept(ast) => ast,
ParseResult::Reject(e) => return Err(e),
ParseResult::Deny => {
return Err(CompilerError::Generic("Parser used ::Deny".to_string()));
}
};
// println!("{ast:#?}");
logger.info("Analyzing AST...");
logger.info("Checking Type Information...");
// let mut analyser = Analyser::new();
// analyser.analyse(ast.clone()).unwrap();
logger.info("Type Checking Complete...");
Ok(ast)
}
+985
View File
@@ -0,0 +1,985 @@
use super::lexer::Token;
use crate::model::{
AssignmentOperator, BinaryOperator, Block, Call, CompilerError, ConstExpr,
Declaration, Dependency, Expression, Number, Program, Statement, TypeId,
UnaryOperator, Variable,
};
use crate::{expect_tt, expect_value};
use std::ops::{ControlFlow, FromResidual, Try};
#[derive(Debug, Clone)]
pub enum ParseResult<T, E> {
Accept(T),
Deny,
Reject(E),
}
pub struct Parser {
tokens: Vec<Token>,
idx: usize,
}
impl Parser {
pub fn new(tokens: Vec<Token>) -> Self {
Self { tokens, idx: 0 }
}
pub fn parse(&mut self) -> ParseResult<Program, CompilerError> {
let mut declarations = Vec::new();
while let ParseResult::Accept(_) = self.peek_next() {
declarations.push(self.parse_declaration()?);
}
ParseResult::Accept(Program { declarations })
}
fn parse_declaration(&mut self) -> ParseResult<Declaration, CompilerError> {
if expect_tt!(self.peek_next()?, Fn).accepted() {
return self.parse_func();
}
if expect_tt!(self.peek_next()?, Struct).accepted() {
return self.parse_struct();
}
if expect_tt!(self.peek_next()?, Include).accepted() {
// expect include keyword
let _ = self.next();
// expect namespace identifier
let name = expect_value!(self.next()?, Identifier)?;
// expect colon
let _ = expect_tt!(self.next()?, Colon)?;
// expect string literal (module path)
let path = expect_value!(self.next()?, String)?;
// expect semicolon
let _ = expect_tt!(self.next()?, Semicolon)?;
return ParseResult::Accept(Declaration::Dependency(Dependency {
name: name.name,
path,
}));
}
if expect_tt!(self.peek_next()?, Const, Static).accepted() {
let is_const = match self.next()? {
Token::Const => true,
Token::Static => false,
_ => {
return ParseResult::Reject(CompilerError::Generic(String::from(
"This can't happen!",
)));
}
};
let var = self.parse_var_decl()?;
let _ = expect_tt!(self.next()?, Assign)?;
let value = self.next()?;
let init = match value {
Token::String(x) => Some(ConstExpr::String(x)),
Token::SignedInt(x, _) => Some(ConstExpr::Number(x)),
Token::UnsignedInt(x, _) => Some(ConstExpr::Number(x as i32)),
_ => {
return ParseResult::Reject(CompilerError::UnexpectedToken(
value.tt().to_string(),
));
}
};
let _ = expect_tt!(self.next()?, Semicolon)?;
return ParseResult::Accept(Declaration::Variable {
var,
init,
is_const,
});
}
ParseResult::Reject(CompilerError::UnexpectedEndOfInput)
}
fn parse_struct(&mut self) -> ParseResult<Declaration, CompilerError> {
let _ = expect_tt!(self.next()?, Struct)?;
let name = expect_value!(self.next()?, Identifier)?;
let _ = expect_tt!(self.next()?, LeftBrace)?;
let mut fields = Vec::new();
while expect_tt!(self.peek_next()?, Identifier).accepted() {
let arg = self.parse_var_decl()?;
fields.push(arg);
if expect_tt!(self.peek_next()?, Comma).accepted() {
self.next()?;
} else {
break;
}
}
let _ = expect_tt!(self.next()?, RightBrace)?;
ParseResult::Accept(Declaration::Struct { name, fields })
}
fn parse_func(&mut self) -> ParseResult<Declaration, CompilerError> {
// expect function keyword
let _ = expect_tt!(self.next()?, Fn);
// expect function name
let name = expect_value!(self.next()?, Identifier)?;
// expect left paren
let _ = expect_tt!(self.next()?, LeftParen)?;
let mut params = Vec::new();
while expect_tt!(self.peek_next()?, Identifier).accepted() {
let arg = self.parse_var_decl()?;
params.push(arg);
if expect_tt!(self.peek_next()?, Comma).accepted() {
self.next()?;
} else {
break;
}
}
// expect right paren
let _ = expect_tt!(self.next()?, RightParen)?;
// see if we can parse the return type!
let mut return_type = TypeId::Void;
if expect_tt!(self.peek_next()?, RightArrow).accepted() {
let _ = self.next();
return_type = self.parse_type()?;
}
// expect vald block
if expect_tt!(self.peek_next()?, LeftBrace).accepted() {
ParseResult::Accept(Declaration::Function {
name: name.name,
params,
return_type,
body: self.parse_block()?,
})
} else {
ParseResult::Reject(CompilerError::UnexpectedToken(
self.peek_next()?.tt().to_string(),
))
}
}
fn parse_block(&mut self) -> ParseResult<Block, CompilerError> {
// expect left brace
let _ = expect_tt!(self.next()?, LeftBrace)?;
let mut block = Vec::new();
while !expect_tt!(self.peek_next()?, RightBrace).accepted() {
block.push(self.parse_statement()?);
}
// expect right brace
let _ = expect_tt!(self.next()?, RightBrace)?;
ParseResult::Accept(block)
}
fn parse_statement(&mut self) -> ParseResult<Statement, CompilerError> {
// handle if statements
if expect_tt!(self.peek_next()?, If).accepted() {
self.next()?;
let condition = self.parse_expression()?;
let then_stmt = self.parse_block()?;
if !expect_tt!(self.peek_next()?, Else).accepted() {
return ParseResult::Accept(Statement::If {
condition,
then_stmt,
else_stmt: vec![],
});
}
let _ = expect_tt!(self.next()?, Else)?;
let else_stmt = self.parse_block()?;
return ParseResult::Accept(Statement::If {
condition,
then_stmt,
else_stmt,
});
}
// handle while loops
if expect_tt!(self.peek_next()?, While).accepted() {
self.next()?;
// expect valid expression
let expr = self.parse_expression()?;
// expect valid block after
let block = self.parse_block()?;
// return result
return ParseResult::Accept(Statement::While {
condition: expr,
body: block,
});
}
// handle indefinite loops
if expect_tt!(self.peek_next()?, Loop).accepted() {
self.next()?;
// parse the inner block
return ParseResult::Accept(Statement::Loop(self.parse_block()?));
}
if expect_tt!(self.peek_next()?, Return).accepted() {
self.next()?;
// handle case where nothing is returned
if expect_tt!(self.peek_next()?, Semicolon).accepted() {
return ParseResult::Accept(Statement::Return(None));
}
let expr = self.parse_expression()?;
expect_tt!(self.next()?, Semicolon)?;
return ParseResult::Accept(Statement::Return(Some(expr)));
}
if expect_tt!(self.peek_next()?, Break).accepted() {
self.next()?;
// expect semicolon
expect_tt!(self.next()?, Semicolon)?;
// return result
return ParseResult::Accept(Statement::Break);
}
if expect_tt!(self.peek_next()?, Continue).accepted() {
self.next()?;
// expect semicolon
expect_tt!(self.next()?, Semicolon)?;
// return result
return ParseResult::Accept(Statement::Continue);
}
// handle writes to pointers!
if expect_tt!(self.peek_next()?, Star).accepted() {
self.next()?;
let left = if expect_tt!(self.peek_next()?, Identifier).accepted() {
let identifier = expect_value!(self.next()?, Identifier)?;
Expression::Variable {
name: identifier,
expr_type: None,
}
} else if expect_tt!(self.peek_next()?, LeftParen).accepted() {
self.next()?;
let expr = self.parse_expression()?;
let _ = expect_tt!(self.next()?, RightParen).accepted();
expr
} else {
return ParseResult::Reject(CompilerError::UnexpectedToken(
self.peek_next()?.tt().to_string(),
));
};
let _ = expect_tt!(self.next()?, Assign)?;
let right = self.parse_expression()?;
// expect semicolon
expect_tt!(self.next()?, Semicolon)?;
// return result
return ParseResult::Accept(Statement::PtrWrite {
ptr: left,
value: right,
});
}
// handle let statements (declarations)
if expect_tt!(self.peek_next()?, Let).accepted() {
self.next();
// expect variable name and type.
let name = self.parse_var_decl()?;
// handle uninitialised variable case
if expect_tt!(self.peek_next()?, Semicolon).accepted() {
self.next();
return ParseResult::Accept(Statement::Declaration {
var: name,
value: None,
});
}
// handle initialised case
// expect equals
let _ = expect_tt!(self.next()?, Assign)?;
// expect a valid expression
let expr = self.parse_expression()?;
let _ = expect_tt!(self.next()?, Semicolon);
// return statement
return ParseResult::Accept(Statement::Declaration {
var: name,
value: Some(expr),
});
}
// handle an in-place function call
if let ParseResult::Accept(name) = expect_value!(self.peek_next()?, Identifier)
&& let ParseResult::Accept(operator) = expect_tt!(
self.peek(1)?,
Assign,
PlusEqual,
MinusEqual,
StarEqual,
SlashEqual,
PercentEqual,
AndEqual,
OrEqual,
XorEqual,
ShlEqual,
ShrEqual
)
{
// consume name token
self.next()?;
// pattern match to find operator
let operator = match operator {
Token::Assign => AssignmentOperator::Assign,
Token::PlusEqual => AssignmentOperator::AddAssign,
Token::MinusEqual => AssignmentOperator::SubAssign,
Token::StarEqual => AssignmentOperator::MulAssign,
Token::SlashEqual => AssignmentOperator::DivAssign,
Token::PercentEqual => AssignmentOperator::ModAssign,
Token::AndEqual => AssignmentOperator::AndAssign,
Token::OrEqual => AssignmentOperator::OrAssign,
Token::XorEqual => AssignmentOperator::XorAssign,
Token::ShlEqual => AssignmentOperator::LeftShiftAssign,
Token::ShrEqual => AssignmentOperator::RightShiftAssign,
_ => {
return ParseResult::Reject(CompilerError::UnexpectedToken(
self.peek_next()?.tt().to_string(),
));
}
};
// consume operator token
self.next()?;
let value = self.parse_expression()?;
let _ = expect_tt!(self.next()?, Semicolon);
return ParseResult::Accept(Statement::Assign {
varname: name.name,
operator,
value,
});
}
// parse an expression and a semicolon
let expr = self.parse_expression()?;
let _ = expect_tt!(self.next()?, Semicolon)?;
ParseResult::Accept(Statement::Expression { expr })
}
fn parse_expression(&mut self) -> ParseResult<Expression, CompilerError> {
self.parse_logical_or()
}
fn parse_logical_or(&mut self) -> ParseResult<Expression, CompilerError> {
let left = self.parse_logical_and()?;
let op = match self.peek_next()? {
Token::LogicalOr => BinaryOperator::LogicalOr,
_ => return ParseResult::Accept(left),
};
self.next()?;
ParseResult::Accept(Expression::Binary {
op,
left: Box::new(left),
right: Box::new(self.parse_logical_or()?),
type_id: Some(TypeId::U32),
})
}
fn parse_logical_and(&mut self) -> ParseResult<Expression, CompilerError> {
let left = self.parse_bitwise_or()?;
let op = match self.peek_next()? {
Token::LogicalAnd => BinaryOperator::LogicalAnd,
_ => return ParseResult::Accept(left),
};
self.next()?;
ParseResult::Accept(Expression::Binary {
op,
left: Box::new(left),
right: Box::new(self.parse_logical_and()?),
type_id: Some(TypeId::U32),
})
}
fn parse_bitwise_or(&mut self) -> ParseResult<Expression, CompilerError> {
let left = self.parse_bitwise_xor()?;
let op = match self.peek_next()? {
Token::Pipe => BinaryOperator::BitwiseOr,
_ => return ParseResult::Accept(left),
};
self.next()?;
ParseResult::Accept(Expression::Binary {
op,
left: Box::new(left),
right: Box::new(self.parse_bitwise_or()?),
type_id: Some(TypeId::U32),
})
}
fn parse_bitwise_xor(&mut self) -> ParseResult<Expression, CompilerError> {
let left = self.parse_bitwise_and()?;
let op = match self.peek_next()? {
Token::Caret => BinaryOperator::BitwiseXor,
_ => return ParseResult::Accept(left),
};
self.next()?;
ParseResult::Accept(Expression::Binary {
op,
left: Box::new(left),
right: Box::new(self.parse_bitwise_xor()?),
type_id: Some(TypeId::U32),
})
}
fn parse_bitwise_and(&mut self) -> ParseResult<Expression, CompilerError> {
let left = self.parse_comparison()?;
let op = match self.peek_next()? {
Token::Ampersand => BinaryOperator::BitwiseAnd,
_ => return ParseResult::Accept(left),
};
self.next()?;
ParseResult::Accept(Expression::Binary {
op,
left: Box::new(left),
right: Box::new(self.parse_bitwise_and()?),
type_id: Some(TypeId::U32),
})
}
fn parse_comparison(&mut self) -> ParseResult<Expression, CompilerError> {
let left = self.parse_shift()?;
let op = match self.peek_next()? {
Token::EqualEqual => BinaryOperator::Equal,
Token::BangEqual => BinaryOperator::NotEqual,
Token::Less => BinaryOperator::LessThan,
Token::Greater => BinaryOperator::GreaterThan,
Token::LessEqual => BinaryOperator::LessOrEqual,
Token::GreaterEqual => BinaryOperator::GreaterOrEqual,
_ => return ParseResult::Accept(left),
};
self.next()?;
ParseResult::Accept(Expression::Binary {
op,
left: Box::new(left),
right: Box::new(self.parse_comparison()?),
type_id: Some(TypeId::Bool),
})
}
fn parse_shift(&mut self) -> ParseResult<Expression, CompilerError> {
let left = self.parse_additive()?;
let op = match self.peek_next()? {
Token::LeftShift => BinaryOperator::LeftShift,
Token::RightShift => BinaryOperator::RightShift,
_ => return ParseResult::Accept(left),
};
self.next()?;
ParseResult::Accept(Expression::Binary {
op,
left: Box::new(left),
right: Box::new(self.parse_shift()?),
type_id: Some(TypeId::U32),
})
}
fn parse_additive(&mut self) -> ParseResult<Expression, CompilerError> {
let left = self.parse_multiplicative()?;
let op = match self.peek_next()? {
Token::Plus => BinaryOperator::Add,
Token::Minus => BinaryOperator::Sub,
_ => return ParseResult::Accept(left),
};
self.next()?;
ParseResult::Accept(Expression::Binary {
op,
left: Box::new(left),
right: Box::new(self.parse_additive()?),
type_id: Some(TypeId::U32),
})
}
fn parse_multiplicative(&mut self) -> ParseResult<Expression, CompilerError> {
let left = self.parse_unary()?;
let op = match self.peek_next()? {
Token::Star => BinaryOperator::Mul,
Token::Slash => BinaryOperator::Div,
_ => return ParseResult::Accept(left),
};
self.next()?;
ParseResult::Accept(Expression::Binary {
op,
left: Box::new(left),
right: Box::new(self.parse_multiplicative()?),
type_id: None,
})
}
fn parse_unary(&mut self) -> ParseResult<Expression, CompilerError> {
let op = match self.peek_next()? {
// prefix inc/dec
Token::PlusPlus => UnaryOperator::Increment,
Token::MinusMinus => UnaryOperator::Decrement,
// arithmetic
Token::Plus => UnaryOperator::Plus,
Token::Minus => UnaryOperator::Minus,
// pointer
Token::Star => UnaryOperator::Dereference,
Token::Ampersand => UnaryOperator::AddressOf,
// boolean
Token::Bang => UnaryOperator::LogicalNot,
Token::Tilde => UnaryOperator::BitwiseNot,
Token::SizeOf => UnaryOperator::SizeOf,
_ => {
let expr = self.parse_primary()?;
return self.parse_postfix(expr);
}
};
self.next()?;
let operand = Box::new(self.parse_unary()?);
ParseResult::Accept(Expression::Unary {
op,
operand,
type_id: None,
})
}
fn parse_postfix(
&mut self,
mut expr: Expression,
) -> ParseResult<Expression, CompilerError> {
loop {
match self.peek_next()? {
// Type cast: expr as Type
Token::As => {
self.next()?; // consume 'as'
let target_type = self.parse_type()?;
expr = Expression::TypeCast {
expr: Box::new(expr),
target_type,
type_id: None,
};
}
// Postfix increment/decrement
Token::PlusPlus => {
self.next()?;
expr = Expression::UnaryPostfix {
op: UnaryOperator::Increment,
operand: Box::new(expr),
type_id: None,
};
}
Token::MinusMinus => {
self.next()?;
expr = Expression::UnaryPostfix {
op: UnaryOperator::Decrement,
operand: Box::new(expr),
type_id: None,
};
}
// Array indexing: expr[index]
Token::LeftBracket => {
self.next()?; // consume '['
let index = Box::new(self.parse_expression()?);
let _ = expect_tt!(self.next()?, RightBracket)?;
expr = Expression::IndexAccess {
expr: Box::new(expr),
index,
type_id: None,
};
}
// Function call: expr(args...)
Token::LeftParen => {
self.next()?; // consume '('
let mut args = Vec::new();
if !matches!(self.peek_next()?, Token::RightParen) {
loop {
args.push(self.parse_expression()?);
if !matches!(self.peek_next()?, Token::Comma) {
break;
}
self.next()?; // consume comma
}
}
let _ = expect_tt!(self.next()?, RightParen)?;
if let Expression::Variable { name, .. } = expr {
expr = Expression::Call {
func: Call { name, args },
type_id: None,
};
}
}
// Member access: expr.member (if you support structs)
Token::Dot => {
self.next()?;
let field_name = expect_value!(self.next()?, Identifier)?;
expr = Expression::MemberAccess {
expr: Box::new(expr),
field_name,
type_id: None,
};
}
// No more postfix operations
_ => break,
}
}
ParseResult::Accept(expr)
}
fn parse_primary(&mut self) -> ParseResult<Expression, CompilerError> {
match self.peek_next()? {
Token::UnsignedInt(value, type_id) => {
self.next()?;
ParseResult::Accept(Expression::Number(Number::Unsigned(value, type_id)))
}
Token::SignedInt(value, type_id) => {
self.next()?;
ParseResult::Accept(Expression::Number(Number::Signed(value, type_id)))
}
Token::String(value) => {
self.next()?;
ParseResult::Accept(Expression::StringLiteral(value))
}
Token::Char(value) => {
self.next()?;
ParseResult::Accept(Expression::CharLiteral(value))
}
Token::Identifier(name) => {
self.next()?;
// if the next token isn't the beginning of a struct literal this is just
// an identifier.
if !expect_tt!(self.peek_next()?, LeftBrace).accepted() {
return ParseResult::Accept(Expression::Variable {
name,
expr_type: None,
});
}
let _ = self.next()?;
let mut fields = Vec::new();
while !expect_tt!(self.peek_next()?, RightBrace).accepted() {
let name = expect_value!(self.next()?, Identifier)?;
let _ = expect_tt!(self.next()?, Colon)?;
let expr = self.parse_expression()?;
fields.push((name, expr));
if expect_tt!(self.peek_next()?, Comma).accepted() {
self.next()?;
} else {
break;
}
}
let _ = expect_tt!(self.next()?, RightBrace)?;
ParseResult::Accept(Expression::StructLiteral {
name,
fields,
type_id: None,
})
}
Token::LeftBracket => {
self.next()?; // consume '['
let mut elements = Vec::new();
if !matches!(self.peek_next()?, Token::RightBracket) {
loop {
elements.push(self.parse_expression()?);
if !matches!(self.peek_next()?, Token::Comma) {
break;
}
self.next()?; // consume comma
}
}
expect_tt!(self.next()?, RightBracket)?;
ParseResult::Accept(Expression::ArrayLiteral {
elements,
type_id: None,
})
}
Token::LeftParen => {
self.next()?;
let expr = self.parse_expression()?;
let _ = expect_tt!(self.next()?, RightParen)?;
ParseResult::Accept(expr)
}
_ => ParseResult::Reject(CompilerError::UnexpectedToken(
self.peek_next()?.tt().to_string(),
)),
}
}
fn parse_var_decl(&mut self) -> ParseResult<Variable, CompilerError> {
let name = expect_value!(self.next()?, Identifier)?;
let _ = expect_tt!(self.next()?, Colon)?;
let type_id = self.parse_type()?;
ParseResult::Accept(Variable {
name: name.name,
type_id,
})
}
fn parse_type(&mut self) -> ParseResult<TypeId, CompilerError> {
// parse primitive or named type
if expect_tt!(self.peek_next()?, Identifier).accepted() {
return self.parse_type_identifier();
}
// parse array type
if expect_tt!(self.peek_next()?, LeftBracket).accepted() {
let _ = self.next()?;
let internal_type = self.parse_type()?;
let _ = expect_tt!(self.next()?, Semicolon)?;
let size = expect_value!(self.next()?, UnsignedInt)?;
let _ = expect_tt!(self.next()?, RightBracket)?;
return ParseResult::Accept(TypeId::Array {
r#type: Box::new(internal_type),
size: size as usize,
});
}
// parse tuple type
if expect_tt!(self.peek_next()?, LeftParen).accepted() {
let _ = self.next()?;
let mut types = Vec::new();
while !expect_tt!(self.peek_next()?, RightParen).accepted() {
types.push(self.parse_type()?);
if !expect_tt!(self.peek_next()?, Comma).accepted() {
break;
}
let _ = self.next()?;
}
let _ = expect_tt!(self.next()?, RightParen)?;
return ParseResult::Accept(TypeId::Tuple(types));
}
ParseResult::Reject(CompilerError::Generic(format!(
"Parsing type but no valid type was detected: {:?}",
self.peek_next()?
)))
}
fn parse_type_identifier(&mut self) -> ParseResult<TypeId, CompilerError> {
// get the type name incl namespace
let name = expect_value!(self.next()?, Identifier)?;
let type_id = match name.name.as_str() {
"u32" => TypeId::U32,
"u16" => TypeId::U16,
"u8" => TypeId::U8,
"i32" => TypeId::I32,
"i16" => TypeId::I16,
"i8" => TypeId::I8,
"void" => TypeId::Void,
"char" => TypeId::Char,
"str" => TypeId::Ptr(Box::new(TypeId::Char)),
_ => {
let mut generics = Vec::new();
if expect_tt!(self.peek_next()?, Less).accepted() {
let _ = self.next()?;
// loop until we find the closing '>'
while !expect_tt!(self.peek_next()?, Greater).accepted() {
generics.push(self.parse_type()?);
if !expect_tt!(self.peek_next()?, Comma).accepted() {
break;
}
let _ = self.next()?;
}
let _ = expect_tt!(self.next()?, Greater)?;
}
TypeId::UnknownCustom { name, generics }
}
};
ParseResult::Accept(type_id)
}
fn next(&mut self) -> ParseResult<Token, CompilerError> {
if self.idx >= self.tokens.len() {
ParseResult::Reject(CompilerError::UnexpectedEndOfInput)
} else {
let token = self.tokens[self.idx].clone();
self.idx += 1;
ParseResult::Accept(token)
}
}
fn peek_next(&self) -> ParseResult<Token, CompilerError> {
if self.idx >= self.tokens.len() {
ParseResult::Reject(CompilerError::UnexpectedEndOfInput)
} else {
ParseResult::Accept(self.tokens[self.idx].clone())
}
}
fn peek(&self, offset: usize) -> ParseResult<Token, CompilerError> {
if self.idx + offset >= self.tokens.len() {
ParseResult::Reject(CompilerError::UnexpectedEndOfInput)
} else {
ParseResult::Accept(self.tokens[self.idx + offset].clone())
}
}
}
impl<T, E> ParseResult<T, E> {
pub fn accepted(&self) -> bool {
matches!(self, ParseResult::Accept(_))
}
}
pub enum ParseResultResidual<T> {
Deny,
Reject(T),
}
impl<T, E> Try for ParseResult<T, E> {
type Output = T;
type Residual = ParseResultResidual<E>;
fn from_output(output: T) -> Self {
ParseResult::Accept(output)
}
fn branch(self) -> ControlFlow<Self::Residual, Self::Output> {
match self {
ParseResult::Accept(v) => ControlFlow::Continue(v),
ParseResult::Deny => ControlFlow::Break(ParseResultResidual::Deny),
ParseResult::Reject(e) => ControlFlow::Break(ParseResultResidual::Reject(e)),
}
}
}
impl<T, E> FromResidual for ParseResult<T, E> {
fn from_residual(residual: ParseResultResidual<E>) -> Self {
match residual {
ParseResultResidual::Deny => ParseResult::Deny,
ParseResultResidual::Reject(e) => ParseResult::Reject(e),
}
}
}
#[macro_export]
macro_rules! expect_tt {
($token:expr, $($variant:ident),+) => {{
let token = $token.clone();
let tt = token.tt().to_string();
let mut vs = String::new();
$(
let s = stringify!($variant);
vs.push_str(s);
vs.push_str("|");
)+
match tt.as_str() {
$(
stringify!($variant) => ParseResult::Accept(token),
)+
_ => {
// let expected = format!("[{}]", vec![$(stringify!($variant)),+].join(" | "));
ParseResult::Reject(CompilerError::UnexpectedToken(tt))
}
}
}};
}
#[macro_export]
macro_rules! expect_value {
($expr:expr, $variant:ident) => {{
let tok = $expr;
match tok.clone() {
Token::$variant(first, ..) => ParseResult::Accept(first),
_ => {
ParseResult::Reject(CompilerError::UnexpectedToken(tok.tt().to_string()))
}
}
}};
}
@@ -0,0 +1,226 @@
use std::collections::HashMap;
use crate::model::{
BinaryOperator, // You'll need to add this to your imports
CompilerError,
Declaration,
Dependency,
Expression,
Program,
TypeId,
UnaryOperator,
};
pub struct Analyser {
symbol_table: HashMap<String, Declaration>,
}
const NUMERIC_TYPES: &[TypeId] = &[
TypeId::U32,
TypeId::I32,
TypeId::I16,
TypeId::U16,
TypeId::I8,
TypeId::U8,
];
impl Analyser {
pub fn new() -> Self {
Self {
symbol_table: HashMap::new(),
}
}
pub fn analyse(&mut self, ast: Program) -> Result<(), CompilerError> {
// build table of global symbols.
for dec in ast.declarations {
let name = match dec.clone() {
Declaration::Function { name, .. } => name,
Declaration::Variable { var, .. } => var.name,
Declaration::Dependency(Dependency { name, .. }) => name,
};
self.symbol_table.insert(name, dec);
}
Ok(())
}
fn match_type(
actual: TypeId,
expected: Option<TypeId>,
) -> Result<TypeId, CompilerError> {
match expected {
Some(id) => {
if id != actual {
Err(CompilerError::TypeMismatch(id, actual))
} else {
Ok(actual)
}
}
None => Ok(actual),
}
}
fn get_type(
&mut self, // Changed from &self to &mut self since we modify expr
expr: &mut Expression,
expected_type: Option<TypeId>,
) -> Result<TypeId, CompilerError> {
match expr {
// Correct IFF we're expecting a void type
Expression::Empty => Self::match_type(TypeId::Void, expected_type),
// Correct IFF we're expecting a char type
Expression::CharLiteral(_) => Self::match_type(TypeId::Char, expected_type),
// Correct IFF we're expecting a string slice type
Expression::StringLiteral(_) => {
Self::match_type(TypeId::Ptr(Box::new(TypeId::Char)), expected_type)
}
Expression::Variable { name, expr_type } => {
let actual = expr_type.clone().ok_or(CompilerError::UnknownType)?;
Self::match_type(actual, expected_type)
}
Expression::Number { value, type_id } => {
// If we already know the TypeId
if let Some(id) = type_id {
return Self::match_type(id.clone(), expected_type);
}
// If we're expecting a type id, check it's numeric.
// TODO: add checks to make sure it's valid for its size eg u8 cant be
// more than 255
if let Some(expected) = expected_type {
if NUMERIC_TYPES.contains(&expected) {
*type_id = Some(expected.clone());
return Ok(expected);
} else {
return Err(CompilerError::TypeMismatch(expected, TypeId::U32));
}
}
// Default to i32 if no type information is available
*type_id = Some(TypeId::I32);
Ok(TypeId::I32)
}
Expression::Binary {
op,
left,
right,
type_id,
} => {
// For binary operations, both operands should have compatible types
// and the result type depends on the operation
let left_type = self.get_type(left, None)?;
let right_type = self.get_type(right, Some(left_type.clone()))?;
// For numeric operations, result has the same type as operands
if NUMERIC_TYPES.contains(&left_type)
&& NUMERIC_TYPES.contains(&right_type)
{
*type_id = Some(left_type);
Self::match_type(left_type, expected_type)
} else {
Err(CompilerError::TypeMismatch(left_type, right_type))
}
}
Expression::Unary {
op,
operand,
type_id,
} => {
match op {
UnaryOperator::Plus | UnaryOperator::Minus => {
// Unary +/- require numeric operands
let inner_type = self.get_type(operand, None)?;
if NUMERIC_TYPES.contains(&inner_type) {
*type_id = Some(inner_type.clone());
Self::match_type(inner_type, expected_type)
} else {
Err(CompilerError::TypeMismatch(inner_type, TypeId::I32))
}
}
UnaryOperator::Dereference => {
// For dereference (*ptr), the operand must be a pointer
// and the result type is what the pointer points to
let inner_type = self.get_type(operand, None)?;
match inner_type {
TypeId::Ptr(inner) => {
let deref_type = *inner;
*type_id = Some(deref_type.clone());
Self::match_type(deref_type, expected_type)
}
_ => Err(CompilerError::Generic(format!(
"Cannot dereference non-pointer type: {:?}",
inner_type
))),
}
}
UnaryOperator::Reference => {
// For reference (&var), we need to determine what we're taking
// a reference to, then wrap it in a Ptr
// If expected_type is Ptr(T), then operand should have type T
let expected_inner = match expected_type.clone() {
Some(TypeId::Ptr(inner)) => Some(*inner),
_ => None,
};
let inner_type = self.get_type(operand, expected_inner)?;
let ref_type = TypeId::Ptr(Box::new(inner_type));
*type_id = Some(ref_type.clone());
Self::match_type(ref_type, expected_type)
}
}
}
Expression::Call {
name,
args,
type_id,
} => match self.symbol_table.get(&name.name) {
Some(Declaration::Function {
params,
return_type,
..
}) => {
// check that we've given the right number of arguments.
if args.len() != params.len() {
return Err(CompilerError::Generic(format!(
"Function {} expected {} arguments but received {}",
name.name,
params.len(),
args.len()
)));
}
for (arg, param) in args.iter_mut().zip(params.iter()) {
// check that the argument type matches the parameter type.
let provided_type = self.get_type(arg, Some(param.type_id))?;
if provided_type != param.type_id {
return Err(CompilerError::TypeMismatch(
param.type_id,
provided_type,
));
}
}
*type_id = Some(return_type.clone());
Self::match_type(return_type.clone(), expected_type)
}
_ => Err(CompilerError::Generic(format!(
"Function {} not found in symbol table",
name.name
))),
},
}
}
}
+21
View File
@@ -0,0 +1,21 @@
use common::logging::Logger;
use crate::model::{CompilerError, Program};
// mod c;
mod dsc;
pub fn compiler_frontend(
ext: &str,
data: &str,
logger: &Logger,
) -> Result<Program, CompilerError> {
match ext {
"dsc" => Ok(dsc::generate_ast(&data, &logger)?),
// "c" => Ok(c::generate_ast(&data)?),
_ => Err(CompilerError::Generic(format!(
"File type {} not supported",
ext
))),
}
}
+93
View File
@@ -0,0 +1,93 @@
#![feature(try_trait_v2)]
use std::path::{Path, PathBuf};
use common::{
build::{BuildError, Builder},
logging::LogReceiver,
};
use crate::{model::CompilerError, specialised::build_specialised};
mod backend;
mod frontend;
mod model;
mod specialised;
pub struct Compiler {
src_path: PathBuf,
result: Option<Result<String, BuildError>>,
logger: LogReceiver,
}
impl Compiler {
fn build(&mut self) -> Result<String, Box<dyn std::error::Error>> {
let input =
std::fs::read_to_string(&self.src_path).expect("Failed to read input file");
let input_ext = self
.src_path
.extension()
.and_then(|s| s.to_str())
.unwrap_or("");
// check if we're using a specialised compiler
if let Some(output) = build_specialised(input_ext, &input) {
return output.map_err(|err| format!("Compilation failed: {err:?}").into());
}
// Parse the input using the frontend, providing the file extension and data.
let ast =
match frontend::compiler_frontend(input_ext, &input, &self.logger.logger()) {
Ok(ast) => ast,
Err(err) => return Err(format!("Compilation failed: {err:?}").into()),
};
// println!("Parsed AST: {:#?}", ast);
// Generate the output using the backend with the parsed result.
let result = match backend::compiler_backend("dsa", &ast) {
Ok(result) => result,
Err(err) => return Err(format!("Compilation failed: {err:?}").into()),
};
Ok(result)
}
}
impl Builder for Compiler {
type Output = String;
fn new(src_path: impl Into<PathBuf>) -> Self {
Self {
src_path: src_path.into(),
result: None,
logger: LogReceiver::new(true),
}
}
fn start(&mut self) {
match self.build() {
Ok(x) => self.result = Some(Ok(x)),
Err(err) => self.result = Some(Err(err.into())),
}
}
fn poll(&mut self) -> Option<Result<Self::Output, BuildError>> {
self.result.take()
}
fn output(&mut self) -> Result<Self::Output, BuildError> {
self.result.clone().ok_or(BuildError::Generic(String::from(
"Compiler was never started",
)))?
}
fn logs(&self) -> Vec<String> {
todo!()
}
}
pub fn error(msg: impl Into<String>) -> CompilerError {
CompilerError::Generic(msg.into())
}
+33
View File
@@ -0,0 +1,33 @@
use std::path::{Path, PathBuf};
use common::{build::Builder, logging::info};
use compiler::Compiler;
fn main() {
// read from input file: syntax "c_compiler <src.c> [output.dsa]"
let args: Vec<String> = std::env::args().collect();
if args.len() < 2 {
eprintln!("Usage: c_compiler <src.dsc> [output.dsa]");
return;
}
let input_file = &args[1];
let output_file = if args.len() > 2 {
&args[2]
} else {
"output.dsa"
};
{
let mut builder = Compiler::new(PathBuf::from(input_file));
builder.start();
let result = builder.output().unwrap();
std::fs::write(output_file, &result).expect("Failed to write output");
info(&format!(
"Compilation Successful ✅ \n\tSource: {}\n\tOutput: {}\n",
input_file, output_file,
));
}
}
+503
View File
@@ -0,0 +1,503 @@
use core::fmt;
use common::build::BuildError;
#[allow(unused)]
#[derive(Debug, Clone)]
pub enum CompilerError {
UnexpectedToken(String),
UnexpectedEndOfInput,
UnexpectedCharacter(char),
Undefined(Name),
InvalidSyntax(String),
Generic(String),
UnknownType,
TypeMismatch(TypeId, TypeId),
Unimplemented(String),
}
impl From<CompilerError> for BuildError {
fn from(err: CompilerError) -> Self {
BuildError::Generic(format!("{:?}", err))
}
}
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct Name {
pub name: String,
pub namespace: Option<String>,
}
impl Name {
pub fn new(name: impl Into<String>, namespace: Option<String>) -> Self {
Self {
name: name.into(),
namespace,
}
}
}
#[derive(Debug, Clone)]
pub struct Program {
pub declarations: Vec<Declaration>,
}
#[allow(unused)]
#[derive(Debug, Clone)]
pub enum Declaration {
Function {
name: String,
return_type: TypeId,
params: Vec<Variable>,
body: Block,
},
Variable {
var: Variable,
init: Option<ConstExpr>,
is_const: bool,
},
Dependency(Dependency),
Struct {
name: Name,
fields: Vec<Variable>,
},
}
#[derive(Debug, Clone)]
pub struct Dependency {
pub name: String,
pub path: String,
}
#[allow(unused)]
#[derive(Debug, Clone, PartialEq)]
pub enum TypeId {
U8,
U16,
U32,
I8,
I16,
I32,
Bool,
Char,
Void,
Ptr(Box<TypeId>),
Ref(Box<TypeId>),
Tuple(Vec<TypeId>),
Array {
r#type: Box<TypeId>,
size: usize,
},
UnknownCustom {
name: Name,
generics: Vec<TypeId>,
},
Struct {
name: Name,
fields: Vec<TypeId>,
generics: Vec<TypeId>,
},
}
impl TypeId {
pub fn size(&self) -> usize {
match self {
Self::U8 => 1,
Self::U16 => 2,
Self::U32 => 4,
Self::I8 => 1,
Self::I16 => 2,
Self::I32 => 4,
Self::Bool => 1,
Self::Char => 1,
Self::Void => 0,
Self::Ptr(t) => t.size(),
Self::Ref(t) => t.size(),
Self::Tuple(types) => types.iter().map(|t| t.size()).sum(),
Self::Array { r#type, size } => r#type.size() * size,
Self::UnknownCustom { .. } => 1, /* TODO: calculate type size during */
// semantic analysis
Self::Struct { fields, .. } => fields.iter().map(|t| t.size()).sum(),
}
}
}
impl fmt::Display for TypeId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::U8 => write!(f, "u8"),
Self::U16 => write!(f, "u16"),
Self::U32 => write!(f, "u32"),
Self::I8 => write!(f, "i8"),
Self::I16 => write!(f, "i16"),
Self::I32 => write!(f, "i32"),
Self::Bool => write!(f, "bool"),
Self::Char => write!(f, "char"),
Self::Void => write!(f, "void"),
Self::Ptr(t) => write!(f, "*{}", t),
Self::Ref(t) => write!(f, "&{}", t),
Self::Tuple(elems) => write!(
f,
"({})",
elems
.iter()
.map(|t| t.to_string())
.collect::<Vec<String>>()
.join(", ")
),
Self::Array { r#type, size } => write!(f, "[{}; {}]", r#type, size),
Self::UnknownCustom { name, generics } => {
write!(
f,
"{}<{}>",
name,
generics
.iter()
.map(|t| t.to_string())
.collect::<Vec<String>>()
.join(", ")
)
}
Self::Struct {
name,
fields,
generics,
} => write!(
f,
"struct<{}> {} {{{}}}",
generics
.iter()
.map(|t| t.to_string())
.collect::<Vec<String>>()
.join(", "),
name,
fields
.iter()
.map(|t| t.to_string())
.collect::<Vec<String>>()
.join(", ")
),
}
}
}
pub type Block = Vec<Statement>;
#[allow(unused)]
#[derive(Debug, Clone, PartialEq)]
pub struct Variable {
pub name: String,
pub type_id: TypeId,
}
#[allow(unused)]
#[derive(Debug, Clone)]
pub enum Statement {
Block(Block),
Declaration {
var: Variable,
value: Option<Expression>,
},
Assign {
varname: String,
operator: AssignmentOperator,
value: Expression,
},
PtrWrite {
ptr: Expression,
value: Expression,
},
Expression {
expr: Expression,
},
If {
condition: Expression,
then_stmt: Block,
else_stmt: Block,
},
While {
condition: Expression,
body: Vec<Statement>,
},
Loop(Block),
Defer(Call),
Break,
Continue,
Return(Option<Expression>),
}
#[derive(Debug, Clone)]
pub enum ConstExpr {
Number(i32),
String(String),
}
impl fmt::Display for ConstExpr {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
ConstExpr::Number(n) => write!(f, "{}", n),
ConstExpr::String(s) => write!(f, "\"{}\"", s),
}
}
}
#[allow(unused)]
#[derive(Debug, Clone)]
pub enum Expression {
Empty,
Binary {
op: BinaryOperator,
left: Box<Expression>,
right: Box<Expression>,
// Post-Semantic Analysis
type_id: Option<TypeId>,
},
Unary {
op: UnaryOperator,
operand: Box<Expression>,
// Post-Semantic Analysis
type_id: Option<TypeId>,
},
UnaryPostfix {
op: UnaryOperator,
operand: Box<Expression>,
// Post-Semantic Analysis
type_id: Option<TypeId>,
},
Variable {
name: Name,
expr_type: Option<TypeId>,
},
TypeCast {
expr: Box<Expression>,
target_type: TypeId,
// Post-Semantic Analysis
type_id: Option<TypeId>,
},
IndexAccess {
expr: Box<Expression>,
index: Box<Expression>,
// Post-Semantic Analysis
type_id: Option<TypeId>,
},
MemberAccess {
expr: Box<Expression>,
field_name: Name,
// Post-Semantic Analysis
type_id: Option<TypeId>,
},
Call {
func: Call,
// Post-Semantic Analysis
type_id: Option<TypeId>,
},
Number(Number),
StringLiteral(String),
CharLiteral(char),
ArrayLiteral {
elements: Vec<Expression>,
type_id: Option<TypeId>,
},
StructLiteral {
name: Name,
fields: Vec<(Name, Expression)>,
type_id: Option<TypeId>,
},
}
#[derive(Debug, Clone)]
pub enum Number {
Signed(i32, Option<TypeId>),
Unsigned(u32, Option<TypeId>),
}
#[derive(Debug, Clone)]
pub struct Call {
pub name: Name,
pub args: Vec<Expression>,
}
impl Expression {
pub fn is_pure(&self) -> bool {
match self {
Expression::Number { .. } => true,
Expression::StringLiteral(_) => true,
Expression::CharLiteral(_) => true,
Expression::Call { .. } => false,
Expression::Binary { left, right, .. } => left.is_pure() && right.is_pure(),
Expression::Unary { operand, .. } => operand.is_pure(),
Expression::UnaryPostfix { operand, .. } => operand.is_pure(),
Expression::Empty => true,
Expression::Variable { .. } => true,
Expression::TypeCast { expr, .. } => expr.is_pure(),
Expression::IndexAccess { expr, index, .. } => {
expr.is_pure() && index.is_pure()
}
Expression::MemberAccess { expr, .. } => expr.is_pure(),
Expression::ArrayLiteral { elements, .. } => {
elements.iter().all(|element| element.is_pure())
}
Expression::StructLiteral { fields, .. } => {
fields.iter().all(|(_, expr)| expr.is_pure())
}
}
}
pub fn type_id(&self) -> Result<TypeId, CompilerError> {
match self {
Expression::Number(
Number::Signed(_, type_id) | Number::Unsigned(_, type_id),
) => type_id.clone().ok_or(CompilerError::UnknownType),
Expression::StringLiteral(_) => Ok(TypeId::Ptr(Box::new(TypeId::Char))),
Expression::CharLiteral(_) => Ok(TypeId::Char),
Expression::Call { type_id, .. } => {
type_id.clone().ok_or(CompilerError::UnknownType)
}
Expression::Binary { type_id, .. } => {
type_id.clone().ok_or(CompilerError::UnknownType)
}
Expression::Unary { type_id, .. } => {
type_id.clone().ok_or(CompilerError::UnknownType)
}
Expression::UnaryPostfix { type_id, .. } => {
type_id.clone().ok_or(CompilerError::UnknownType)
}
Expression::Empty => Ok(TypeId::Void),
Expression::Variable { expr_type, .. } => {
expr_type.clone().ok_or(CompilerError::UnknownType)
}
Expression::TypeCast { type_id, .. } => {
type_id.clone().ok_or(CompilerError::UnknownType)
}
Expression::IndexAccess { expr, .. } => expr.type_id(),
Expression::MemberAccess { expr, .. } => expr.type_id(),
Expression::ArrayLiteral { elements, .. } => {
let element_type = elements
.first()
.map_or(TypeId::Void, |e| e.type_id().unwrap_or(TypeId::Void));
Ok(TypeId::Array {
r#type: Box::new(element_type),
size: elements.len(),
})
}
Expression::StructLiteral { name, fields, .. } => {
let fields = fields
.iter()
.map(|(_, expr)| expr.type_id())
.collect::<Result<Vec<_>, _>>()?;
Ok(TypeId::Struct {
name: name.clone(),
fields,
generics: Vec::new(),
})
}
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum AssignmentOperator {
Assign,
AddAssign,
SubAssign,
MulAssign,
DivAssign,
ModAssign,
AndAssign,
OrAssign,
XorAssign,
LeftShiftAssign,
RightShiftAssign,
}
#[allow(unused)]
#[derive(Debug, Clone, PartialEq)]
pub enum BinaryOperator {
// arithmetic
Add,
Sub,
Mul,
Div,
Mod,
// comparison
Equal,
NotEqual,
LessThan,
GreaterThan,
LessOrEqual,
GreaterOrEqual,
// bitwise
BitwiseAnd,
BitwiseOr,
BitwiseXor,
// logical
LogicalAnd,
LogicalOr,
// shift
LeftShift,
RightShift,
}
impl fmt::Display for BinaryOperator {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::Add => write!(f, "+"),
Self::Sub => write!(f, "-"),
Self::Mul => write!(f, "*"),
Self::Div => write!(f, "/"),
Self::Mod => write!(f, "%"),
Self::Equal => write!(f, "=="),
Self::NotEqual => write!(f, "!="),
Self::LessThan => write!(f, "<"),
Self::GreaterThan => write!(f, ">"),
Self::LessOrEqual => write!(f, "<="),
Self::GreaterOrEqual => write!(f, ">="),
Self::BitwiseAnd => write!(f, "&"),
Self::BitwiseOr => write!(f, "|"),
Self::BitwiseXor => write!(f, "^"),
Self::LogicalAnd => write!(f, "&&"),
Self::LogicalOr => write!(f, "||"),
Self::LeftShift => write!(f, "<<"),
Self::RightShift => write!(f, ">>"),
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum UnaryOperator {
Plus,
Minus,
AddressOf,
Dereference,
BitwiseNot,
LogicalNot,
Increment,
Decrement,
SizeOf,
}
impl fmt::Display for UnaryOperator {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::Increment => write!(f, "++"),
Self::Decrement => write!(f, "--"),
Self::Plus => write!(f, "+"),
Self::Minus => write!(f, "-"),
Self::Dereference => write!(f, "*"),
Self::AddressOf => write!(f, "&"),
Self::BitwiseNot => write!(f, "~"),
Self::LogicalNot => write!(f, "!"),
Self::SizeOf => write!(f, "sizeof"),
}
}
}
+135
View File
@@ -0,0 +1,135 @@
#[must_use]
pub fn build(src: &str) -> String {
parse(src).join("\n")
}
#[must_use]
#[expect(clippy::too_many_lines)]
pub fn parse(src: &str) -> Vec<String> {
let stack = "0x10000";
let acc = "acc";
let rga = "rga";
let bpr = "bpr";
let spr = "spr";
let mut instrs = Vec::<String>::new();
// Define symbols
let print_start = "print";
let tokens = lex(src);
let mut idstack = Vec::<u32>::new();
// set up a stack
instrs.push(format!("\tlwi {}, {}", stack, bpr));
instrs.push(format!("\tmov {}, {}", bpr, spr));
// set up the data pointer
instrs.push(format!("{}: \t lwi 0x30000, {}", "main", rga));
for (id, tok) in tokens.iter().enumerate() {
match tok {
BfToken::Inc => {
instrs.push(format!("\tinc {}", acc));
}
BfToken::Dec => {
instrs.push(format!("\tdec {}", acc));
}
BfToken::IncPtr => {
instrs.push(format!("\tstw {}, {}, 0", acc, rga));
instrs.push(format!("\taddi {}, 4, {}", rga, rga));
instrs.push(format!("\tlwd {}, {}, 0", rga, acc));
}
BfToken::DecPtr => {
instrs.push(format!("\tstw {}, {}, 0", acc, rga));
instrs.push(format!("\tsubi {}, 4, {}", rga, rga));
instrs.push(format!("\tlwd {}, {}, 0", rga, acc));
}
BfToken::Out => {
instrs.push(format!("\tpush {}", acc));
instrs.push(format!("\tcall {}", print_start));
instrs.push(format!("\tpop zero"));
}
BfToken::In => {
instrs.push(format!("\tlwd 0x40000, {}, 0", acc));
}
BfToken::Forward => {
let loop_start = format!("loop_start_{}", id);
let loop_end = format!("loop_end_{}", id);
idstack.push(id as u32);
instrs.push(format!("\tcmp {}, zero", acc));
instrs.push(format!("\tjeq {}, zero", loop_end));
instrs.push(format!("{}: \tnop", loop_start));
}
BfToken::Back => {
if let Some(start_id) = idstack.pop() {
let loop_start = format!("loop_start_{}", start_id);
let loop_end = format!("loop_end_{}", start_id);
instrs.push(format!("\tcmp {}, zero", acc));
instrs.push(format!("\tjne {}, zero", loop_start));
instrs.push(format!("{}: \tnop", loop_end));
} else {
eprintln!("Warning: Unmatched ']' at position {}", id);
}
}
}
}
instrs.push("\thlt".to_string());
insert_lib(&mut instrs);
instrs
}
fn insert_lib(instrs: &mut Vec<String>) {
let bpr = "bpr";
let spr = "spr";
let rg0 = "rg0";
let rg1 = "rg1";
let print_start = "print";
let current = "current";
instrs.push(format!("\tdw {}, 0x20000", current));
instrs.push(format!("{}: \tpush {}", print_start, bpr));
instrs.push(format!("\tmov {}, {}", spr, bpr));
instrs.push(format!("\tlwd {}, {}, 8", bpr, rg0));
instrs.push(format!("\tlwd {}, {}, 0", current, rg1));
instrs.push(format!("\tstb {}, {}, 0", rg0, rg1));
instrs.push(format!("\taddi {}, 1, {}", rg1, rg1));
instrs.push(format!("\tstw {}, {}, 0", rg1, current));
instrs.push(format!("\tmov {}, {}", bpr, spr));
instrs.push(format!("\tpop {}", bpr));
instrs.push("\treturn".to_string());
}
enum BfToken {
Inc,
Dec,
IncPtr,
DecPtr,
Out,
In,
Forward,
Back,
}
fn lex(src: &str) -> Vec<BfToken> {
src.chars()
.filter_map(|c| match c {
'+' => Some(BfToken::Inc),
'-' => Some(BfToken::Dec),
'>' => Some(BfToken::IncPtr),
'<' => Some(BfToken::DecPtr),
'.' => Some(BfToken::Out),
',' => Some(BfToken::In),
'[' => Some(BfToken::Forward),
']' => Some(BfToken::Back),
_ => None,
})
.collect()
}
fn _create_symbol(id: u32) -> String {
format!("label_{}", id)
}
+13
View File
@@ -0,0 +1,13 @@
use crate::model::CompilerError;
pub mod brainf;
pub fn build_specialised(ext: &str, data: &str) -> Option<Result<String, CompilerError>> {
match ext {
"bf" => {
let res = brainf::build(data);
Some(Ok(res))
}
_ => None,
}
}