- C frontend broken for now

- If statements work properly now (hopefully)
- still issues with while loops pushing vars to the stack. need scoping
  implemented to fix this!

- refactored registers.rs and fixed faulty logic.
- made register allocation optimisations
This commit is contained in:
2026-02-08 00:14:18 +00:00
parent e9329eca95
commit 6699333b2c
9 changed files with 745 additions and 232 deletions
+88 -57
View File
@@ -5,11 +5,12 @@ use std::time::SystemTime;
use chrono::{DateTime, Local};
use super::registers::RegisterAllocator;
use crate::backend::dsa::registers::Register;
use crate::{block, comment, dsa};
use crate::model::{
BinaryOperator, CompilerError, ConstExpr, Declaration, Dependency, Expression,
Program, Statement, UnaryOperator, Variable,
BinaryOperator, Call, CompilerError, ConstExpr, Declaration, Dependency, Expression,
Program, Statement, TypeId, UnaryOperator, Variable,
};
pub struct CodeGenerator {
@@ -149,9 +150,14 @@ impl CodeGenerator {
self.generate_global(&var.name, init)
}
Declaration::Function {
name, params, body, ..
name,
params,
body,
return_type,
} => {
let func = self.generate_function(&name, &params, &body).join("\n");
let func = self
.generate_function(&name, &params, &body, return_type)
.join("\n");
self.functions.push(format!("{func}\n"));
}
@@ -169,12 +175,23 @@ impl CodeGenerator {
name: &str,
params: &[Variable],
body: &[Statement],
return_type: TypeId,
) -> Vec<String> {
let mut code = Vec::new();
// Reset allocator for new function
self.allocator.reset();
code.push(format!(
"// fn {name}({}) -> {}",
params
.iter()
.map(|p| format!("{}: {}", p.name, p.type_id))
.collect::<Vec<String>>()
.join(", "),
return_type
));
// Function prologue
code.push(format!("{}:", name));
code.push("\tpush bpr".to_string());
@@ -192,7 +209,7 @@ impl CodeGenerator {
// Generate code for function body
for stmt in body {
let stmt_code = self.generate_statement(stmt).unwrap();
let stmt_code = self.generate_statement(stmt, &mut code).unwrap();
code.extend(stmt_code);
}
@@ -211,6 +228,7 @@ impl CodeGenerator {
fn generate_statement(
&mut self,
stmt: &Statement,
func_body: &mut Vec<String>,
) -> Result<Vec<String>, CompilerError> {
let mut code = Vec::new();
@@ -218,7 +236,8 @@ impl CodeGenerator {
Statement::Declaration { var, value } => {
if let Some(expr) = value {
// Evaluate expression
let (result_reg, expr_code) = self.generate_expression(expr, true)?;
let (result_reg, expr_code) =
self.generate_expression(expr, true, func_body)?;
code.extend(expr_code);
// Store result in variable
@@ -233,14 +252,17 @@ impl CodeGenerator {
}
}
Statement::Break => unimplemented!(),
Statement::Continue => unimplemented!(),
Statement::Break => unimplemented!("need scope tracking first!"),
Statement::Continue => unimplemented!("need scope tracking first!"),
Statement::Defer(_func) => unimplemented!("we need scope tracking first!"),
Statement::PtrWrite { ptr, value } => {
let (result_reg, expr_code) = self.generate_expression(value, true)?;
let (result_reg, expr_code) =
self.generate_expression(value, true, func_body)?;
code.extend(expr_code);
let (ptr_reg, ptr_code) = self.generate_expression(ptr, true)?;
let (ptr_reg, ptr_code) =
self.generate_expression(ptr, true, func_body)?;
code.extend(ptr_code);
code.push(format!("\tstw {}, {}", result_reg, ptr_reg));
@@ -251,7 +273,8 @@ impl CodeGenerator {
Statement::Assign { varname, value } => {
// Evaluate expression
let (result_reg, expr_code) = self.generate_expression(value, true)?;
let (result_reg, expr_code) =
self.generate_expression(value, true, func_body)?;
code.extend(expr_code);
// Check if this is a global variable
@@ -270,7 +293,8 @@ impl CodeGenerator {
Statement::Return(expr) => {
if let Some(e) = expr {
let (result_reg, expr_code) = self.generate_expression(e, true)?;
let (result_reg, expr_code) =
self.generate_expression(e, true, func_body)?;
code.extend(expr_code);
code.push(format!("\tstw {}, bpr, 8", result_reg));
code.push(format!("\tjmp _ret"));
@@ -284,7 +308,8 @@ impl CodeGenerator {
else_stmt,
} => {
// Generate condition
let (cond_reg, cond_code) = self.generate_expression(condition, true)?;
let (cond_reg, cond_code) =
self.generate_expression(condition, true, func_body)?;
code.extend(cond_code);
// Compare with zero
@@ -302,7 +327,7 @@ impl CodeGenerator {
// Then block
code.push(format!("{}:", then_label));
for s in then_stmt {
code.extend(self.generate_statement(s)?);
code.extend(self.generate_statement(s, func_body)?);
}
if then_stmt.len() == 0 {
@@ -314,7 +339,7 @@ impl CodeGenerator {
// Else block
code.push(format!("{}:", else_label));
for s in else_stmt {
code.extend(self.generate_statement(s)?);
code.extend(self.generate_statement(s, func_body)?);
}
if else_stmt.len() == 0 {
@@ -331,7 +356,8 @@ impl CodeGenerator {
code.push(format!("{}:", loop_start));
// Generate condition
let (cond_reg, cond_code) = self.generate_expression(condition, true)?;
let (cond_reg, cond_code) =
self.generate_expression(condition, true, func_body)?;
code.extend(cond_code);
code.push(format!("\tcmp {}, zero", cond_reg));
@@ -341,7 +367,7 @@ impl CodeGenerator {
// Loop body
for s in body {
code.extend(self.generate_statement(s)?);
code.extend(self.generate_statement(s, func_body)?);
}
code.push(format!("\tjmp {}", loop_start));
@@ -354,21 +380,22 @@ impl CodeGenerator {
code.push(format!("{}:", loop_start));
for s in body {
code.extend(self.generate_statement(s)?);
code.extend(self.generate_statement(s, func_body)?);
}
code.push(format!("\tjmp {}", loop_start));
}
Statement::Expression { expr } => {
let (result_reg, expr_code) = self.generate_expression(expr, false)?;
let (result_reg, expr_code) =
self.generate_expression(expr, false, func_body)?;
code.extend(expr_code);
self.allocator.free_temp(&result_reg);
}
Statement::Block(statements) => {
for s in statements {
code.extend(self.generate_statement(s)?);
code.extend(self.generate_statement(s, func_body)?);
}
}
}
@@ -382,14 +409,10 @@ impl CodeGenerator {
&mut self,
expr: &Expression,
use_result: bool,
) -> Result<(String, Vec<String>), CompilerError> {
func_body: &mut Vec<String>,
) -> Result<(Register, Vec<String>), CompilerError> {
let mut code = Vec::new();
// optimisation to prevent generating dead code!
if expr.is_pure() && !use_result {
return Ok((String::new(), code));
}
match expr {
Expression::StringLiteral(value) => {
let (reg, alloc_code) = self.allocator.alloc_temp()?;
@@ -397,7 +420,7 @@ impl CodeGenerator {
// write string into memory
let uuid = self.get_unique_label();
code.push(format!("\tdb str_{uuid}: \"{value}\""));
func_body.insert(0, format!("db str_{uuid}: \"{value}\""));
// Load pointer to string
code.push(format!("\tlwi str_{uuid}, {reg}"));
@@ -415,7 +438,7 @@ impl CodeGenerator {
Ok((reg, code))
}
Expression::Number(value) => {
Expression::Number { value, .. } => {
let (reg, alloc_code) = self.allocator.alloc_temp()?;
code.extend(alloc_code);
@@ -446,13 +469,17 @@ impl CodeGenerator {
}
}
Expression::Binary { op, left, right } => {
Expression::Binary {
op, left, right, ..
} => {
// Evaluate left operand
let (left_reg, left_code) = self.generate_expression(left, true)?;
let (left_reg, left_code) =
self.generate_expression(left, true, func_body)?;
code.extend(left_code);
// Evaluate right operand
let (right_reg, right_code) = self.generate_expression(right, true)?;
let (right_reg, right_code) =
self.generate_expression(right, true, func_body)?;
code.extend(right_code);
// Allocate result register
@@ -485,50 +512,50 @@ impl CodeGenerator {
// Comparison operators - return 1 (true) or 0 (false)
BinaryOperator::Eq => {
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
code.push(format!("\tlli 0, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjne {}", end_label)); // If not equal, skip setting to 1
code.push(format!("\tlli 1, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjeq {}", end_label));
code.push(format!("\tlli 0, {}", result_reg));
code.push(format!("{}:", end_label));
}
BinaryOperator::Ne => {
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
code.push(format!("\tlli 0, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjeq {}", end_label)); // If equal, skip setting to 1
code.push(format!("\tlli 1, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjne {}", end_label));
code.push(format!("\tlli 0, {}", result_reg));
code.push(format!("{}:", end_label));
}
BinaryOperator::Lt => {
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
code.push(format!("\tlli 0, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjge {}", end_label)); // If greater or equal, skip setting to 1
code.push(format!("\tlli 1, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjlt {}", end_label));
code.push(format!("\tlli 0, {}", result_reg));
code.push(format!("{}:", end_label));
}
BinaryOperator::Le => {
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
code.push(format!("\tlli 0, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjgt {}", end_label)); // If greater than, skip setting to 1
code.push(format!("\tlli 1, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjle {}", end_label));
code.push(format!("\tlli 0, {}", result_reg));
code.push(format!("{}:", end_label));
}
BinaryOperator::Gt => {
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
code.push(format!("\tlli 0, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjle {}", end_label)); // If less or equal, skip setting to 1
code.push(format!("\tlli 1, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjgt {}", end_label));
code.push(format!("\tlli 0, {}", result_reg));
code.push(format!("{}:", end_label));
}
BinaryOperator::Ge => {
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
code.push(format!("\tlli 0, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjlt {}", end_label)); // If less than, skip setting to 1
code.push(format!("\tlli 1, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjge {}", end_label));
code.push(format!("\tlli 8, {}", result_reg));
code.push(format!("{}:", end_label));
}
_ => unimplemented!(),
@@ -541,11 +568,15 @@ impl CodeGenerator {
Ok((result_reg, code))
}
Expression::Call { name, args } => {
Expression::Call {
func: Call { name, args },
..
} => {
// first evaluate all the args we're going to need
let mut arg_regs = Vec::new();
for arg in args.iter().rev() {
let (arg_reg, arg_code) = self.generate_expression(arg, true)?;
let (arg_reg, arg_code) =
self.generate_expression(arg, true, func_body)?;
code.extend(arg_code);
arg_regs.push(arg_reg);
}
@@ -561,7 +592,7 @@ impl CodeGenerator {
let saved_regs = self.allocator.get_caller_saved_registers();
for reg in &saved_regs {
// spill variables to stack
code.extend(self.allocator.spill_register(reg).unwrap());
code.extend(self.allocator.free_register(reg).unwrap());
}
// Evaluate and push arguments in reverse order
@@ -587,7 +618,7 @@ impl CodeGenerator {
return Err(CompilerError::Undefined(name.clone()));
}
let result_reg: String;
let result_reg: Register;
if use_result {
let (temp_result_reg, result_alloc) = self.allocator.alloc_temp()?;
@@ -603,7 +634,7 @@ impl CodeGenerator {
}
}
} else {
result_reg = "zero".to_string();
result_reg = Register::Zero;
// Clean up arguments
if args.len() > 0 {
@@ -626,9 +657,9 @@ impl CodeGenerator {
Ok((result_reg, code))
}
Expression::Unary { op, operand } => {
Expression::Unary { op, operand, .. } => {
let (operand_reg, operand_code) =
self.generate_expression(operand, true)?;
self.generate_expression(operand, true, func_body)?;
code.extend(operand_code);
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
@@ -660,7 +691,7 @@ impl CodeGenerator {
Ok((result_reg, code))
}
Expression::Empty => Ok(("zero".to_string(), code)),
Expression::Empty => Ok((Register::Null, code)),
}
}
+315 -148
View File
@@ -1,4 +1,4 @@
use std::collections::HashMap;
use std::{collections::HashMap, fmt};
use crate::model::CompilerError;
@@ -6,78 +6,109 @@ use crate::model::CompilerError;
/// Manages general-purpose registers (rg0-rgf) and handles stack spilling
pub struct RegisterAllocator {
/// Available general-purpose registers
available_registers: Vec<String>,
/// Maps variable names to their current location (register or stack offset)
variable_locations: HashMap<String, Location>,
/// Maps registers to the variables they currently hold
register_contents: HashMap<String, String>,
register_contents: HashMap<Register, String>,
/// Current stack offset for local variables (relative to bpr)
/// Starts at -4 (going downward from base pointer)
stack_offset: i32,
/// Track which registers are currently in use
in_use: HashMap<String, bool>,
in_use: HashMap<Register, bool>,
}
#[derive(Debug, Clone)]
pub enum Location {
Register(String),
Stack(i32), // offset from bpr
pub struct Location {
register: Option<Register>,
stack: Option<i32>,
}
impl Location {
pub fn stack(offset: i32) -> Self {
Location {
register: None,
stack: Some(offset),
}
}
pub fn register(register: Register) -> Self {
Location {
register: Some(register),
stack: None,
}
}
}
impl RegisterAllocator {
pub fn new() -> Self {
// Initialize with available GP registers (rg0-rgf = 16 registers)
let registers = vec![
"rg0", "rg1", "rg2", "rg3", "rg4", "rg5", "rg6", "rg7", "rg8", "rg9", "rga",
"rgb", "rgc", "rgd", "rge", "rgf",
let in_use = vec![
Register::Rg0,
Register::Rg1,
Register::Rg2,
Register::Rg3,
Register::Rg4,
Register::Rg5,
Register::Rg6,
Register::Rg7,
Register::Rg8,
Register::Rg9,
Register::Rga,
Register::Rgb,
Register::Rgc,
Register::Rgd,
Register::Rge,
Register::Rgf,
]
.into_iter()
.map(String::from)
.iter()
.map(|&reg| (reg, false))
.collect();
RegisterAllocator {
available_registers: registers,
// available_registers: registers,
variable_locations: HashMap::new(),
register_contents: HashMap::new(),
stack_offset: -4, // Start at -4 (first local below saved bpr)
in_use: HashMap::new(),
in_use,
}
}
/// Allocate a temporary register for expression evaluation
/// Returns the register name and optionally assembly code to save it
pub fn alloc_temp(&mut self) -> Result<(String, Vec<String>), CompilerError> {
let mut code = Vec::new();
pub fn alloc_temp(&mut self) -> Result<(Register, Vec<String>), CompilerError> {
// Try to find an unused register
for reg in &self.available_registers {
if !self.in_use.get(reg).unwrap_or(&false) {
self.in_use.insert(reg.clone(), true);
return Ok((reg.clone(), code));
}
println!("finding! {:#?}", self.in_use);
if let Some(reg) = self.find_free_register() {
self.in_use.insert(reg, true);
return Ok((reg, Vec::new()));
}
// All registers in use - need to spill one
// Choose the first register with a variable we can spill
// Find a register to spill
let reg_to_spill = self
.available_registers
.iter()
.find(|reg| self.register_contents.contains_key(*reg))
.cloned();
if let Some(reg) = reg_to_spill {
// Spill this variable to stack
let spill_code = self.spill_register(&reg)?;
code.extend(spill_code);
// let reg_to_spill = self
// .available_registers
// .iter()
// .find(|reg| self.register_contents.contains_key(*reg))
// .cloned();
self.in_use.insert(reg.clone(), true);
return Ok((reg, code));
}
// if let Some(reg) = reg_to_spill {
// // Spill this variable to stack
// let spill_code = self.spill_register(&reg)?;
// code.extend(spill_code);
// self.in_use.insert(reg.clone(), true);
// return Ok((reg, code));
// }
todo!("an efficient stack spilling algorithm. needs scope awareness.");
Err(CompilerError::Generic(
"All registers are used up yet there are no variables to spill to the stack"
@@ -88,7 +119,7 @@ impl RegisterAllocator {
/// Free a temporary register after use
/// NOTE: This will NOT free registers that contain variables!
/// Variables persist throughout their scope and must not be freed
pub fn free_temp(&mut self, reg: &str) {
pub fn free_temp(&mut self, reg: &Register) {
// Check if this register contains a variable
if self.register_contents.contains_key(reg) {
// This register holds a variable - don't free it!
@@ -97,7 +128,19 @@ impl RegisterAllocator {
}
// This is a true temporary - safe to free
self.in_use.insert(reg.to_string(), false);
self.in_use.insert(*reg, false);
}
pub fn free_var(&mut self, var: &str) {
// Check if this variable is in a register
if let Some(location) = self.variable_locations.get(var).cloned() {
if let Some(reg) = location.register {
self.register_contents.remove(&reg);
self.in_use.insert(reg, false);
}
self.variable_locations.remove(var);
}
}
/// Allocate a register for a named variable
@@ -105,41 +148,46 @@ impl RegisterAllocator {
pub fn alloc_var(
&mut self,
var_name: &str,
) -> Result<(String, Vec<String>), CompilerError> {
if let Some(location) = self.variable_locations.get(var_name).cloned() {
match location {
Location::Register(reg) => {
return Ok((reg.clone(), Vec::new()));
}
Location::Stack(offset) => {
// Variable was pushed, need to calculate actual position
let (reg, mut code) = self.alloc_temp()?;
) -> Result<(Register, Vec<String>), CompilerError> {
if let Some(mut location) = self.variable_locations.get(var_name).cloned() {
// if the var is in a register we can use it already.
if let Some(reg) = location.register {
return Ok((reg, Vec::new()));
}
// Load from bpr + offset (offset is negative)
code.push(format!("\tsubi bpr {} {}", -(offset + 4), reg));
code.push(format!(
"\tldw {}, {} // bpr{}: {}",
reg,
reg,
offset - 4,
var_name
));
// if the variable is on the stack only, we need to get it in a register.
if let Some(offset) = location.stack {
// Variable was pushed, need to calculate actual position and update its
// location.
let (reg, mut code) = self.alloc_temp()?;
// Update location to register
self.variable_locations
.insert(var_name.to_string(), Location::Register(reg.clone()));
self.register_contents
.insert(reg.clone(), var_name.to_string());
// acknowledge var is now in a reg as well.
location.register = Some(reg);
return Ok((reg, code));
}
// Load from bpr + offset (offset is negative)
code.push(format!("\tsubi bpr {} {}", -(offset + 4), reg));
code.push(format!(
"\tldw {}, {} // bpr{}: {}",
reg,
reg,
offset - 4,
var_name
));
// Update location to register
self.variable_locations
.insert(var_name.to_string(), location);
self.register_contents
.insert(reg.clone(), var_name.to_string());
return Ok((reg, code));
}
}
// Variable doesn't have a location yet, allocate a new register
let (reg, code) = self.alloc_temp()?;
self.variable_locations
.insert(var_name.to_string(), Location::Register(reg.clone()));
.insert(var_name.to_string(), Location::register(reg));
self.register_contents
.insert(reg.clone(), var_name.to_string());
@@ -156,83 +204,89 @@ impl RegisterAllocator {
pub fn load_var(
&mut self,
var_name: &str,
) -> Result<(String, Vec<String>), CompilerError> {
) -> Result<(Register, Vec<String>), CompilerError> {
self.alloc_var(var_name)
}
/// Store a value from a register into a variable
/// Updates tracking and returns any necessary assembly code
pub fn store_var(&mut self, var_name: &str, source_reg: &str) -> Vec<String> {
let mut code = Vec::new();
pub fn store_var(&mut self, var_name: &str, source_reg: &Register) -> Vec<String> {
// Check if variable already has a location
if let Some(location) = self.variable_locations.get(var_name) {
match location {
Location::Register(dest_reg) => {
if dest_reg != source_reg {
code.push(format!(
"\tmov {}, {} // var {}",
source_reg, dest_reg, var_name
));
}
}
Location::Stack(offset) => {
code.push(format!(
"\tstw {}, bpr, {} // var {}",
source_reg, offset, var_name
));
// if the variable exists in a register we write to that.
if let Some(reg) = location.register {
if reg == *source_reg {
return vec![format!(
"\tmov {}, {} // var {}",
source_reg, reg, var_name
)];
}
}
} else {
// Variable doesn't exist yet, we can just use the same reg.
// self.variable_locations.insert(
// var_name.to_string(),
// Location::Register(source_reg.to_string()),
// );
// self.register_contents
// .insert(source_reg.to_string(), var_name.to_string());
// self.in_use.insert(source_reg.to_string(), true);
let source_reg = source_reg.to_string();
// if we can avoid a move, absolutely do that.
if self.available_registers.contains(&source_reg) {
self.variable_locations
.insert(var_name.to_string(), Location::Register(source_reg.clone()));
self.register_contents
.insert(source_reg.clone(), var_name.to_string());
self.in_use.insert(source_reg, true);
} else if let Some(free_reg) = self.find_free_register() {
code.push(format!("\tmov {}, {}", source_reg, free_reg));
self.variable_locations
.insert(var_name.to_string(), Location::Register(free_reg.clone()));
self.register_contents
.insert(free_reg.clone(), var_name.to_string());
self.in_use.insert(free_reg, true);
} else {
// No free registers - allocate on stack
// code.push(format!("\tstw {}, bpr, {}", source_reg, self.stack_offset));
// self.variable_locations
// .insert(var_name.to_string(), Location::Stack(self.stack_offset));
// self.stack_offset -= 4; // Move to next stack slot
//
todo!(
"we should spill other registers and keep this variable on the stack as it's more recent!"
);
// if the variable exists on the stack but not a register we write here.
if let Some(offset) = location.stack {
return vec![format!(
"\tstw {}, bpr, {} // var {}",
source_reg, offset, var_name
)];
}
}
code
// Variable doesn't exist yet, we can just use the same reg.
// if we can avoid a move, absolutely do that.
// if this is true then there's no permanent variable here so it's safe to use.
if !self.register_contents.contains_key(source_reg) {
self.variable_locations
.insert(var_name.to_string(), Location::register(*source_reg));
self.register_contents
.insert(*source_reg, var_name.to_string());
self.in_use.insert(*source_reg, true);
return Vec::new();
}
// if current register isn't free, (eg is another variable) we assign somewhere
// else.
if let Some(free_reg) = self.find_free_register() {
self.variable_locations
.insert(var_name.to_string(), Location::register(free_reg));
self.register_contents
.insert(free_reg.clone(), var_name.to_string());
self.in_use.insert(free_reg, true);
return vec![format!("\tmov {}, {}", source_reg, free_reg)];
}
// No free registers - allocate on stack
// code.push(format!("\tstw {}, bpr, {}", source_reg, self.stack_offset));
// self.variable_locations
// .insert(var_name.to_string(), Location::Stack(self.stack_offset));
// self.stack_offset -= 4; // Move to next stack slot
//
todo!("an efficient stack spilling algorithm. needs scope awareness.");
}
/// Spill a register to the stack
/// Returns assembly code to perform the spill
pub fn spill_register(&mut self, reg: &str) -> Result<Vec<String>, CompilerError> {
/// spill a register to the stack (WITHOUT FREEING)
pub fn spill_register(
&mut self,
reg: &Register,
) -> Result<Vec<String>, CompilerError> {
let mut code = Vec::new();
if let Some(var_name) = self.register_contents.get(reg).cloned() {
// PUSH register to stack (spr decrements automatically)
// check if the variable is declared.
if let Some(var_name) = self.register_contents.get(reg).cloned()
&& let Some(location) = self.variable_locations.get_mut(&var_name)
{
// check if var is on the stack
if let Some(offset) = location.stack {
// ensure stack value is up to date with register value.
code.push(format!("\tstw {}, {}", reg, offset));
return Ok(code);
}
// if the variable is not on the stack:
// push register to stack (spr decrements automatically)
code.push(format!(
"\tpush {} // bpr{}: {}",
reg, self.stack_offset, var_name
@@ -240,37 +294,83 @@ impl RegisterAllocator {
// Track that we pushed one word
self.stack_offset -= 4;
// Update variable location - it's now at current spr
// Note: We track offset from bpr for consistency
self.variable_locations
.insert(var_name.clone(), Location::Stack(self.stack_offset));
// Remove from register tracking
self.register_contents.remove(reg);
location.stack = Some(self.stack_offset);
Ok(code)
} else {
Err(CompilerError::Generic(format!(
"Register {} does not contain a variable to spill!",
reg
)))
}
}
Ok(code)
/// free a register by spilling it to the stack.
/// Returns assembly code to perform the spill
pub fn free_register(
&mut self,
reg: &Register,
) -> Result<Vec<String>, CompilerError> {
let mut code = Vec::new();
// check if the variable is declared.
if let Some(var_name) = self.register_contents.get(reg).cloned()
&& let Some(location) = self.variable_locations.get_mut(&var_name)
{
// check if var name is on the stack
if let Some(offset) = location.stack {
// store current register value in stack location
code.push(format!("\tstw {}, {}", reg, offset));
// free the register.
location.register = None;
self.register_contents.remove(reg);
return Ok(code);
}
// if the variable is not on the stack:
// push register to stack (spr decrements automatically)
code.push(format!(
"\tpush {} // bpr{}: {}",
reg, self.stack_offset, var_name
));
// Track that we pushed one word
self.stack_offset -= 4;
// Update variable location - it's now at current spr
// Note: We track offset from bpr for consistency
location.stack = Some(self.stack_offset);
location.register = None;
self.register_contents.remove(reg);
Ok(code)
} else {
Err(CompilerError::Generic(format!(
"Register {} does not contain a variable to spill!",
reg
)))
}
}
/// Find a free register (not currently in use)
fn find_free_register(&self) -> Option<String> {
for reg in &self.available_registers {
if !self.in_use.get(reg).unwrap_or(&false) {
return Some(reg.clone());
}
}
None
fn find_free_register(&self) -> Option<Register> {
self.in_use
.iter()
.filter(|(_, in_use)| !**in_use)
.map(|(reg, _)| *reg)
.next()
}
/// Spill all registers to stack (useful before function calls)
pub fn _spill_all(&mut self) -> Vec<String> {
let mut code = Vec::new();
let regs_to_spill: Vec<String> = self.register_contents.keys().cloned().collect();
let regs_to_spill: Vec<Register> =
self.register_contents.keys().cloned().collect();
for reg in regs_to_spill {
if let Ok(spill_code) = self.spill_register(&reg) {
if let Ok(spill_code) = self.free_register(&reg) {
code.extend(spill_code);
}
}
@@ -293,23 +393,43 @@ impl RegisterAllocator {
self.variable_locations.clear();
self.register_contents.clear();
self.stack_offset = -4;
self.in_use.clear();
self.in_use = vec![
Register::Rg0,
Register::Rg1,
Register::Rg2,
Register::Rg3,
Register::Rg4,
Register::Rg5,
Register::Rg6,
Register::Rg7,
Register::Rg8,
Register::Rg9,
Register::Rga,
Register::Rgb,
Register::Rgc,
Register::Rgd,
Register::Rge,
Register::Rgf,
]
.iter()
.map(|&reg| (reg, false))
.collect();
}
/// Mark a variable as dead (no longer needed)
/// Frees its register if it's in one
pub fn _free_var(&mut self, var_name: &str) {
if let Some(Location::Register(reg)) = self.variable_locations.get(var_name) {
let reg = reg.clone();
self.register_contents.remove(&reg);
self.in_use.insert(reg, false);
}
self.variable_locations.remove(var_name);
}
// pub fn _free_var(&mut self, var_name: &str) {
// if let Some(Location::Register(reg)) = self.variable_locations.get(var_name) {
// let reg = reg.clone();
// self.register_contents.remove(&reg);
// self.in_use.insert(reg, false);
// }
// self.variable_locations.remove(var_name);
// }
/// Get list of registers that contain variables and are in use
/// These need to be saved before function calls
pub fn get_caller_saved_registers(&self) -> Vec<String> {
pub fn get_caller_saved_registers(&self) -> Vec<Register> {
self.register_contents
.iter()
.filter(|(reg, _)| *self.in_use.get(*reg).unwrap_or(&false))
@@ -346,3 +466,50 @@ impl RegisterAllocator {
code
}
}
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum Register {
Rg0,
Rg1,
Rg2,
Rg3,
Rg4,
Rg5,
Rg6,
Rg7,
Rg8,
Rg9,
Rga,
Rgb,
Rgc,
Rgd,
Rge,
Rgf,
Zero,
Null,
}
impl fmt::Display for Register {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Rg0 => write!(f, "rg0"),
Self::Rg1 => write!(f, "rg1"),
Self::Rg2 => write!(f, "rg2"),
Self::Rg3 => write!(f, "rg3"),
Self::Rg4 => write!(f, "rg4"),
Self::Rg5 => write!(f, "rg5"),
Self::Rg6 => write!(f, "rg6"),
Self::Rg7 => write!(f, "rg7"),
Self::Rg8 => write!(f, "rg8"),
Self::Rg9 => write!(f, "rg9"),
Self::Rga => write!(f, "rga"),
Self::Rgb => write!(f, "rgb"),
Self::Rgc => write!(f, "rgc"),
Self::Rgd => write!(f, "rgd"),
Self::Rge => write!(f, "rge"),
Self::Rgf => write!(f, "rgf"),
Self::Zero => write!(f, "zero"),
Self::Null => write!(f, "null"),
}
}
}
+13 -2
View File
@@ -351,6 +351,7 @@ impl Parser {
op,
left: Box::new(expr),
right,
type_id: None,
};
}
@@ -371,6 +372,7 @@ impl Parser {
op,
left: Box::new(expr),
right,
type_id: None,
};
}
@@ -391,6 +393,7 @@ impl Parser {
op,
left: Box::new(expr),
right,
type_id: None,
};
}
@@ -407,7 +410,11 @@ impl Parser {
if let Some(op) = op {
self.advance();
let operand = Box::new(self.parse_unary()?);
return Ok(Expression::Unary { op, operand });
return Ok(Expression::Unary {
op,
operand,
type_id: None,
});
}
self.parse_primary()
@@ -418,7 +425,10 @@ impl Parser {
TokenType::Number(n) => {
let value = *n;
self.advance();
Ok(Expression::Number(value as isize))
Ok(Expression::Number {
value: value as isize,
type_id: None,
})
}
TokenType::Identifier(name) => {
let name = name.clone();
@@ -445,6 +455,7 @@ impl Parser {
namespace: None,
},
args,
type_id: None,
})
} else {
Ok(Expression::Variable {
+4 -4
View File
@@ -2,11 +2,11 @@ use common::logging::log;
use crate::model::{CompilerError, Program};
use parser::{ParseResult, Parser};
use semantic_analyser::Analyser;
// use semantic_analyser::Analyser;
pub mod lexer;
pub mod parser;
pub mod semantic_analyser;
// pub mod semantic_analyser;
pub fn generate_ast(input: &str) -> Result<Program, CompilerError> {
log("Tokenising Input...");
@@ -30,8 +30,8 @@ pub fn generate_ast(input: &str) -> Result<Program, CompilerError> {
log("Analyzing AST...");
log("Checking Type Information...");
let analyser = Analyser::new();
analyser.analyse(ast.clone()).unwrap();
// let mut analyser = Analyser::new();
// analyser.analyse(ast.clone()).unwrap();
log("Type Checking Complete...");
Ok(ast)
+24 -7
View File
@@ -1,7 +1,7 @@
use super::lexer::Token;
use crate::model::{
BinaryOperator, Block, CompilerError, ConstExpr, Declaration, Dependency, Expression,
Program, Statement, TypeId, UnaryOperator, Variable,
BinaryOperator, Block, Call, CompilerError, ConstExpr, Declaration, Dependency,
Expression, Program, Statement, TypeId, UnaryOperator, Variable,
};
use crate::{expect_tt, expect_value};
use std::ops::{ControlFlow, FromResidual, Try};
@@ -353,7 +353,7 @@ impl Parser {
let mut expr = self.parse_additive()?;
while let Some(op) = match self.peek_next()? {
Token::EqualEqual => Some(BinaryOperator::Ne),
Token::EqualEqual => Some(BinaryOperator::Eq),
Token::BangEqual => Some(BinaryOperator::Ne),
Token::Less => Some(BinaryOperator::Lt),
Token::Greater => Some(BinaryOperator::Gt),
@@ -367,7 +367,8 @@ impl Parser {
op,
left: Box::new(expr),
right,
}
type_id: Some(TypeId::Bool),
};
}
ParseResult::Accept(expr)
@@ -387,6 +388,7 @@ impl Parser {
op,
left: Box::new(left),
right: Box::new(self.parse_additive()?),
type_id: Some(TypeId::U32),
})
}
@@ -404,6 +406,7 @@ impl Parser {
op,
left: Box::new(left),
right: Box::new(self.parse_multiplicative()?),
type_id: None,
})
}
@@ -418,14 +421,21 @@ impl Parser {
self.next()?;
let operand = Box::new(self.parse_unary()?);
ParseResult::Accept(Expression::Unary { op, operand })
ParseResult::Accept(Expression::Unary {
op,
operand,
type_id: None,
})
}
fn parse_primary(&mut self) -> ParseResult<Expression, CompilerError> {
match self.peek_next()? {
Token::Integer(value) => {
self.next()?;
ParseResult::Accept(Expression::Number(value as isize))
ParseResult::Accept(Expression::Number {
value: value as isize,
type_id: None,
})
}
Token::String(value) => {
self.next()?;
@@ -450,7 +460,14 @@ impl Parser {
let _ = expect_tt!(self.next()?, RightParen)?;
ParseResult::Accept(Expression::Call { name, args })
ParseResult::Accept(Expression::Call {
func: Call {
name: name.clone(),
args,
},
type_id: None,
})
} else {
ParseResult::Accept(Expression::Variable {
name,
+217 -4
View File
@@ -1,13 +1,226 @@
use crate::model::{CompilerError, Program};
use std::collections::HashMap;
pub struct Analyser;
use crate::model::{
BinaryOperator, // You'll need to add this to your imports
CompilerError,
Declaration,
Dependency,
Expression,
Program,
TypeId,
UnaryOperator,
};
pub struct Analyser {
symbol_table: HashMap<String, Declaration>,
}
const NUMERIC_TYPES: &[TypeId] = &[
TypeId::U32,
TypeId::I32,
TypeId::I16,
TypeId::U16,
TypeId::I8,
TypeId::U8,
];
impl Analyser {
pub fn new() -> Self {
Self
Self {
symbol_table: HashMap::new(),
}
}
pub fn analyse(&self, _ast: Program) -> Result<(), CompilerError> {
pub fn analyse(&mut self, ast: Program) -> Result<(), CompilerError> {
// build table of global symbols.
for dec in ast.declarations {
let name = match dec.clone() {
Declaration::Function { name, .. } => name,
Declaration::Variable { var, .. } => var.name,
Declaration::Dependency(Dependency { name, .. }) => name,
};
self.symbol_table.insert(name, dec);
}
Ok(())
}
fn match_type(
actual: TypeId,
expected: Option<TypeId>,
) -> Result<TypeId, CompilerError> {
match expected {
Some(id) => {
if id != actual {
Err(CompilerError::TypeMismatch(id, actual))
} else {
Ok(actual)
}
}
None => Ok(actual),
}
}
fn get_type(
&mut self, // Changed from &self to &mut self since we modify expr
expr: &mut Expression,
expected_type: Option<TypeId>,
) -> Result<TypeId, CompilerError> {
match expr {
// Correct IFF we're expecting a void type
Expression::Empty => Self::match_type(TypeId::Void, expected_type),
// Correct IFF we're expecting a char type
Expression::CharLiteral(_) => Self::match_type(TypeId::Char, expected_type),
// Correct IFF we're expecting a string slice type
Expression::StringLiteral(_) => {
Self::match_type(TypeId::Ptr(Box::new(TypeId::Char)), expected_type)
}
Expression::Variable { name, expr_type } => {
let actual = expr_type.clone().ok_or(CompilerError::UnknownType)?;
Self::match_type(actual, expected_type)
}
Expression::Number { value, type_id } => {
// If we already know the TypeId
if let Some(id) = type_id {
return Self::match_type(id.clone(), expected_type);
}
// If we're expecting a type id, check it's numeric.
// TODO: add checks to make sure it's valid for its size eg u8 cant be
// more than 255
if let Some(expected) = expected_type {
if NUMERIC_TYPES.contains(&expected) {
*type_id = Some(expected.clone());
return Ok(expected);
} else {
return Err(CompilerError::TypeMismatch(expected, TypeId::U32));
}
}
// Default to i32 if no type information is available
*type_id = Some(TypeId::I32);
Ok(TypeId::I32)
}
Expression::Binary {
op,
left,
right,
type_id,
} => {
// For binary operations, both operands should have compatible types
// and the result type depends on the operation
let left_type = self.get_type(left, None)?;
let right_type = self.get_type(right, Some(left_type.clone()))?;
// For numeric operations, result has the same type as operands
if NUMERIC_TYPES.contains(&left_type)
&& NUMERIC_TYPES.contains(&right_type)
{
*type_id = Some(left_type);
Self::match_type(left_type, expected_type)
} else {
Err(CompilerError::TypeMismatch(left_type, right_type))
}
}
Expression::Unary {
op,
operand,
type_id,
} => {
match op {
UnaryOperator::Plus | UnaryOperator::Minus => {
// Unary +/- require numeric operands
let inner_type = self.get_type(operand, None)?;
if NUMERIC_TYPES.contains(&inner_type) {
*type_id = Some(inner_type.clone());
Self::match_type(inner_type, expected_type)
} else {
Err(CompilerError::TypeMismatch(inner_type, TypeId::I32))
}
}
UnaryOperator::Dereference => {
// For dereference (*ptr), the operand must be a pointer
// and the result type is what the pointer points to
let inner_type = self.get_type(operand, None)?;
match inner_type {
TypeId::Ptr(inner) => {
let deref_type = *inner;
*type_id = Some(deref_type.clone());
Self::match_type(deref_type, expected_type)
}
_ => Err(CompilerError::Generic(format!(
"Cannot dereference non-pointer type: {:?}",
inner_type
))),
}
}
UnaryOperator::Reference => {
// For reference (&var), we need to determine what we're taking
// a reference to, then wrap it in a Ptr
// If expected_type is Ptr(T), then operand should have type T
let expected_inner = match expected_type.clone() {
Some(TypeId::Ptr(inner)) => Some(*inner),
_ => None,
};
let inner_type = self.get_type(operand, expected_inner)?;
let ref_type = TypeId::Ptr(Box::new(inner_type));
*type_id = Some(ref_type.clone());
Self::match_type(ref_type, expected_type)
}
}
}
Expression::Call {
name,
args,
type_id,
} => match self.symbol_table.get(&name.name) {
Some(Declaration::Function {
params,
return_type,
..
}) => {
// check that we've given the right number of arguments.
if args.len() != params.len() {
return Err(CompilerError::Generic(format!(
"Function {} expected {} arguments but received {}",
name.name,
params.len(),
args.len()
)));
}
for (arg, param) in args.iter_mut().zip(params.iter()) {
// check that the argument type matches the parameter type.
let provided_type = self.get_type(arg, Some(param.type_id))?;
if provided_type != param.type_id {
return Err(CompilerError::TypeMismatch(
param.type_id,
provided_type,
));
}
}
*type_id = Some(return_type.clone());
Self::match_type(return_type.clone(), expected_type)
}
_ => Err(CompilerError::Generic(format!(
"Function {} not found in symbol table",
name.name
))),
},
}
}
}
+2 -2
View File
@@ -1,12 +1,12 @@
use crate::model::{CompilerError, Program};
mod c;
// mod c;
mod dsc;
pub fn compiler_frontend(ext: &str, data: &str) -> Result<Program, CompilerError> {
match ext {
"dsc" => Ok(dsc::generate_ast(&data)?),
"c" => Ok(c::generate_ast(&data)?),
// "c" => Ok(c::generate_ast(&data)?),
_ => Err(CompilerError::Generic(format!(
"File type {} not supported",
ext
+2
View File
@@ -46,6 +46,8 @@ pub fn compile_file(
Err(err) => return Err(format!("Compilation failed: {err:?}").into()),
};
println!("Parsed AST: {:#?}", ast);
let output_ext = output_path
.extension()
.and_then(|s| s.to_str())
+80 -8
View File
@@ -9,9 +9,11 @@ pub enum CompilerError {
Undefined(Name),
InvalidSyntax(String),
Generic(String),
UnknownType,
TypeMismatch(TypeId, TypeId),
}
#[derive(Debug, PartialEq, Clone)]
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct Name {
pub name: String,
pub namespace: Option<String>,
@@ -46,7 +48,7 @@ pub struct Dependency {
}
#[allow(unused)]
#[derive(Debug, Clone)]
#[derive(Debug, Clone, PartialEq)]
pub enum TypeId {
U8,
U16,
@@ -54,18 +56,45 @@ pub enum TypeId {
I8,
I16,
I32,
Bool,
Char,
Void,
Ptr(Box<TypeId>),
Ref(Box<TypeId>),
Array(Box<TypeId>, usize),
Struct { name: Name, fields: Vec<Variable> },
Struct { name: Name, fields: Vec<TypeId> },
}
impl fmt::Display for TypeId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::U8 => write!(f, "u8"),
Self::U16 => write!(f, "u16"),
Self::U32 => write!(f, "u32"),
Self::I8 => write!(f, "i8"),
Self::I16 => write!(f, "i16"),
Self::I32 => write!(f, "i32"),
Self::Bool => write!(f, "bool"),
Self::Char => write!(f, "char"),
Self::Void => write!(f, "void"),
Self::Ptr(t) => write!(f, "*{}", t),
Self::Ref(t) => write!(f, "&{}", t),
Self::Array(t, len) => write!(f, "[{}; {}]", t, len),
Self::Struct { name, fields } => {
write!(f, "struct {} {{", name)?;
for (i, field) in fields.iter().enumerate() {
write!(f, "{}: {}", i, field)?;
}
write!(f, "}}")
}
}
}
}
pub type Block = Vec<Statement>;
#[allow(unused)]
#[derive(Debug, Clone)]
#[derive(Debug, Clone, PartialEq)]
pub struct Variable {
pub name: String,
pub type_id: TypeId,
@@ -100,6 +129,7 @@ pub enum Statement {
body: Vec<Statement>,
},
Loop(Block),
Defer(Call),
Break,
Continue,
Return(Option<Expression>),
@@ -128,28 +158,47 @@ pub enum Expression {
op: BinaryOperator,
left: Box<Expression>,
right: Box<Expression>,
// Post-Semantic Analysis
type_id: Option<TypeId>,
},
Unary {
op: UnaryOperator,
operand: Box<Expression>,
// Post-Semantic Analysis
type_id: Option<TypeId>,
},
Variable {
name: Name,
expr_type: Option<TypeId>,
},
Call {
name: Name,
args: Vec<Expression>,
func: Call,
// Post-Semantic Analysis
type_id: Option<TypeId>,
},
Number {
value: isize,
// Post-Semantic Analysis
type_id: Option<TypeId>,
},
Number(isize),
StringLiteral(String),
CharLiteral(char),
}
#[derive(Debug, Clone)]
pub struct Call {
pub name: Name,
pub args: Vec<Expression>,
}
impl Expression {
pub fn is_pure(&self) -> bool {
match self {
Expression::Number(_) => true,
Expression::Number { .. } => true,
Expression::StringLiteral(_) => true,
Expression::CharLiteral(_) => true,
Expression::Call { .. } => false,
@@ -159,6 +208,29 @@ impl Expression {
Expression::Variable { .. } => true,
}
}
pub fn type_id(&self) -> Result<TypeId, CompilerError> {
match self {
Expression::Number { type_id, .. } => {
type_id.clone().ok_or(CompilerError::UnknownType)
}
Expression::StringLiteral(_) => Ok(TypeId::Ptr(Box::new(TypeId::Char))),
Expression::CharLiteral(_) => Ok(TypeId::Char),
Expression::Call { type_id, .. } => {
type_id.clone().ok_or(CompilerError::UnknownType)
}
Expression::Binary { type_id, .. } => {
type_id.clone().ok_or(CompilerError::UnknownType)
}
Expression::Unary { type_id, .. } => {
type_id.clone().ok_or(CompilerError::UnknownType)
}
Expression::Empty => Ok(TypeId::Void),
Expression::Variable { expr_type, .. } => {
expr_type.clone().ok_or(CompilerError::UnknownType)
}
}
}
}
#[allow(unused)]