diff --git a/compiler/src/backend/dsa/codegen.rs b/compiler/src/backend/dsa/codegen.rs index e6a4dca..680f9ad 100644 --- a/compiler/src/backend/dsa/codegen.rs +++ b/compiler/src/backend/dsa/codegen.rs @@ -5,11 +5,12 @@ use std::time::SystemTime; use chrono::{DateTime, Local}; use super::registers::RegisterAllocator; +use crate::backend::dsa::registers::Register; use crate::{block, comment, dsa}; use crate::model::{ - BinaryOperator, CompilerError, ConstExpr, Declaration, Dependency, Expression, - Program, Statement, UnaryOperator, Variable, + BinaryOperator, Call, CompilerError, ConstExpr, Declaration, Dependency, Expression, + Program, Statement, TypeId, UnaryOperator, Variable, }; pub struct CodeGenerator { @@ -149,9 +150,14 @@ impl CodeGenerator { self.generate_global(&var.name, init) } Declaration::Function { - name, params, body, .. + name, + params, + body, + return_type, } => { - let func = self.generate_function(&name, ¶ms, &body).join("\n"); + let func = self + .generate_function(&name, ¶ms, &body, return_type) + .join("\n"); self.functions.push(format!("{func}\n")); } @@ -169,12 +175,23 @@ impl CodeGenerator { name: &str, params: &[Variable], body: &[Statement], + return_type: TypeId, ) -> Vec { let mut code = Vec::new(); // Reset allocator for new function self.allocator.reset(); + code.push(format!( + "// fn {name}({}) -> {}", + params + .iter() + .map(|p| format!("{}: {}", p.name, p.type_id)) + .collect::>() + .join(", "), + return_type + )); + // Function prologue code.push(format!("{}:", name)); code.push("\tpush bpr".to_string()); @@ -192,7 +209,7 @@ impl CodeGenerator { // Generate code for function body for stmt in body { - let stmt_code = self.generate_statement(stmt).unwrap(); + let stmt_code = self.generate_statement(stmt, &mut code).unwrap(); code.extend(stmt_code); } @@ -211,6 +228,7 @@ impl CodeGenerator { fn generate_statement( &mut self, stmt: &Statement, + func_body: &mut Vec, ) -> Result, CompilerError> { let mut code = Vec::new(); @@ -218,7 +236,8 @@ impl CodeGenerator { Statement::Declaration { var, value } => { if let Some(expr) = value { // Evaluate expression - let (result_reg, expr_code) = self.generate_expression(expr, true)?; + let (result_reg, expr_code) = + self.generate_expression(expr, true, func_body)?; code.extend(expr_code); // Store result in variable @@ -233,14 +252,17 @@ impl CodeGenerator { } } - Statement::Break => unimplemented!(), - Statement::Continue => unimplemented!(), + Statement::Break => unimplemented!("need scope tracking first!"), + Statement::Continue => unimplemented!("need scope tracking first!"), + Statement::Defer(_func) => unimplemented!("we need scope tracking first!"), Statement::PtrWrite { ptr, value } => { - let (result_reg, expr_code) = self.generate_expression(value, true)?; + let (result_reg, expr_code) = + self.generate_expression(value, true, func_body)?; code.extend(expr_code); - let (ptr_reg, ptr_code) = self.generate_expression(ptr, true)?; + let (ptr_reg, ptr_code) = + self.generate_expression(ptr, true, func_body)?; code.extend(ptr_code); code.push(format!("\tstw {}, {}", result_reg, ptr_reg)); @@ -251,7 +273,8 @@ impl CodeGenerator { Statement::Assign { varname, value } => { // Evaluate expression - let (result_reg, expr_code) = self.generate_expression(value, true)?; + let (result_reg, expr_code) = + self.generate_expression(value, true, func_body)?; code.extend(expr_code); // Check if this is a global variable @@ -270,7 +293,8 @@ impl CodeGenerator { Statement::Return(expr) => { if let Some(e) = expr { - let (result_reg, expr_code) = self.generate_expression(e, true)?; + let (result_reg, expr_code) = + self.generate_expression(e, true, func_body)?; code.extend(expr_code); code.push(format!("\tstw {}, bpr, 8", result_reg)); code.push(format!("\tjmp _ret")); @@ -284,7 +308,8 @@ impl CodeGenerator { else_stmt, } => { // Generate condition - let (cond_reg, cond_code) = self.generate_expression(condition, true)?; + let (cond_reg, cond_code) = + self.generate_expression(condition, true, func_body)?; code.extend(cond_code); // Compare with zero @@ -302,7 +327,7 @@ impl CodeGenerator { // Then block code.push(format!("{}:", then_label)); for s in then_stmt { - code.extend(self.generate_statement(s)?); + code.extend(self.generate_statement(s, func_body)?); } if then_stmt.len() == 0 { @@ -314,7 +339,7 @@ impl CodeGenerator { // Else block code.push(format!("{}:", else_label)); for s in else_stmt { - code.extend(self.generate_statement(s)?); + code.extend(self.generate_statement(s, func_body)?); } if else_stmt.len() == 0 { @@ -331,7 +356,8 @@ impl CodeGenerator { code.push(format!("{}:", loop_start)); // Generate condition - let (cond_reg, cond_code) = self.generate_expression(condition, true)?; + let (cond_reg, cond_code) = + self.generate_expression(condition, true, func_body)?; code.extend(cond_code); code.push(format!("\tcmp {}, zero", cond_reg)); @@ -341,7 +367,7 @@ impl CodeGenerator { // Loop body for s in body { - code.extend(self.generate_statement(s)?); + code.extend(self.generate_statement(s, func_body)?); } code.push(format!("\tjmp {}", loop_start)); @@ -354,21 +380,22 @@ impl CodeGenerator { code.push(format!("{}:", loop_start)); for s in body { - code.extend(self.generate_statement(s)?); + code.extend(self.generate_statement(s, func_body)?); } code.push(format!("\tjmp {}", loop_start)); } Statement::Expression { expr } => { - let (result_reg, expr_code) = self.generate_expression(expr, false)?; + let (result_reg, expr_code) = + self.generate_expression(expr, false, func_body)?; code.extend(expr_code); self.allocator.free_temp(&result_reg); } Statement::Block(statements) => { for s in statements { - code.extend(self.generate_statement(s)?); + code.extend(self.generate_statement(s, func_body)?); } } } @@ -382,14 +409,10 @@ impl CodeGenerator { &mut self, expr: &Expression, use_result: bool, - ) -> Result<(String, Vec), CompilerError> { + func_body: &mut Vec, + ) -> Result<(Register, Vec), CompilerError> { let mut code = Vec::new(); - // optimisation to prevent generating dead code! - if expr.is_pure() && !use_result { - return Ok((String::new(), code)); - } - match expr { Expression::StringLiteral(value) => { let (reg, alloc_code) = self.allocator.alloc_temp()?; @@ -397,7 +420,7 @@ impl CodeGenerator { // write string into memory let uuid = self.get_unique_label(); - code.push(format!("\tdb str_{uuid}: \"{value}\"")); + func_body.insert(0, format!("db str_{uuid}: \"{value}\"")); // Load pointer to string code.push(format!("\tlwi str_{uuid}, {reg}")); @@ -415,7 +438,7 @@ impl CodeGenerator { Ok((reg, code)) } - Expression::Number(value) => { + Expression::Number { value, .. } => { let (reg, alloc_code) = self.allocator.alloc_temp()?; code.extend(alloc_code); @@ -446,13 +469,17 @@ impl CodeGenerator { } } - Expression::Binary { op, left, right } => { + Expression::Binary { + op, left, right, .. + } => { // Evaluate left operand - let (left_reg, left_code) = self.generate_expression(left, true)?; + let (left_reg, left_code) = + self.generate_expression(left, true, func_body)?; code.extend(left_code); // Evaluate right operand - let (right_reg, right_code) = self.generate_expression(right, true)?; + let (right_reg, right_code) = + self.generate_expression(right, true, func_body)?; code.extend(right_code); // Allocate result register @@ -485,50 +512,50 @@ impl CodeGenerator { // Comparison operators - return 1 (true) or 0 (false) BinaryOperator::Eq => { code.push(format!("\tcmp {}, {}", left_reg, right_reg)); - code.push(format!("\tlli 0, {}", result_reg)); - let end_label = format!("_cmp_end_{}", self.get_unique_label()); - code.push(format!("\tjne {}", end_label)); // If not equal, skip setting to 1 code.push(format!("\tlli 1, {}", result_reg)); + let end_label = format!("_cmp_end_{}", self.get_unique_label()); + code.push(format!("\tjeq {}", end_label)); + code.push(format!("\tlli 0, {}", result_reg)); code.push(format!("{}:", end_label)); } BinaryOperator::Ne => { code.push(format!("\tcmp {}, {}", left_reg, right_reg)); - code.push(format!("\tlli 0, {}", result_reg)); - let end_label = format!("_cmp_end_{}", self.get_unique_label()); - code.push(format!("\tjeq {}", end_label)); // If equal, skip setting to 1 code.push(format!("\tlli 1, {}", result_reg)); + let end_label = format!("_cmp_end_{}", self.get_unique_label()); + code.push(format!("\tjne {}", end_label)); + code.push(format!("\tlli 0, {}", result_reg)); code.push(format!("{}:", end_label)); } BinaryOperator::Lt => { code.push(format!("\tcmp {}, {}", left_reg, right_reg)); - code.push(format!("\tlli 0, {}", result_reg)); - let end_label = format!("_cmp_end_{}", self.get_unique_label()); - code.push(format!("\tjge {}", end_label)); // If greater or equal, skip setting to 1 code.push(format!("\tlli 1, {}", result_reg)); + let end_label = format!("_cmp_end_{}", self.get_unique_label()); + code.push(format!("\tjlt {}", end_label)); + code.push(format!("\tlli 0, {}", result_reg)); code.push(format!("{}:", end_label)); } BinaryOperator::Le => { code.push(format!("\tcmp {}, {}", left_reg, right_reg)); - code.push(format!("\tlli 0, {}", result_reg)); - let end_label = format!("_cmp_end_{}", self.get_unique_label()); - code.push(format!("\tjgt {}", end_label)); // If greater than, skip setting to 1 code.push(format!("\tlli 1, {}", result_reg)); + let end_label = format!("_cmp_end_{}", self.get_unique_label()); + code.push(format!("\tjle {}", end_label)); + code.push(format!("\tlli 0, {}", result_reg)); code.push(format!("{}:", end_label)); } BinaryOperator::Gt => { code.push(format!("\tcmp {}, {}", left_reg, right_reg)); - code.push(format!("\tlli 0, {}", result_reg)); - let end_label = format!("_cmp_end_{}", self.get_unique_label()); - code.push(format!("\tjle {}", end_label)); // If less or equal, skip setting to 1 code.push(format!("\tlli 1, {}", result_reg)); + let end_label = format!("_cmp_end_{}", self.get_unique_label()); + code.push(format!("\tjgt {}", end_label)); + code.push(format!("\tlli 0, {}", result_reg)); code.push(format!("{}:", end_label)); } BinaryOperator::Ge => { code.push(format!("\tcmp {}, {}", left_reg, right_reg)); - code.push(format!("\tlli 0, {}", result_reg)); - let end_label = format!("_cmp_end_{}", self.get_unique_label()); - code.push(format!("\tjlt {}", end_label)); // If less than, skip setting to 1 code.push(format!("\tlli 1, {}", result_reg)); + let end_label = format!("_cmp_end_{}", self.get_unique_label()); + code.push(format!("\tjge {}", end_label)); + code.push(format!("\tlli 8, {}", result_reg)); code.push(format!("{}:", end_label)); } _ => unimplemented!(), @@ -541,11 +568,15 @@ impl CodeGenerator { Ok((result_reg, code)) } - Expression::Call { name, args } => { + Expression::Call { + func: Call { name, args }, + .. + } => { // first evaluate all the args we're going to need let mut arg_regs = Vec::new(); for arg in args.iter().rev() { - let (arg_reg, arg_code) = self.generate_expression(arg, true)?; + let (arg_reg, arg_code) = + self.generate_expression(arg, true, func_body)?; code.extend(arg_code); arg_regs.push(arg_reg); } @@ -561,7 +592,7 @@ impl CodeGenerator { let saved_regs = self.allocator.get_caller_saved_registers(); for reg in &saved_regs { // spill variables to stack - code.extend(self.allocator.spill_register(reg).unwrap()); + code.extend(self.allocator.free_register(reg).unwrap()); } // Evaluate and push arguments in reverse order @@ -587,7 +618,7 @@ impl CodeGenerator { return Err(CompilerError::Undefined(name.clone())); } - let result_reg: String; + let result_reg: Register; if use_result { let (temp_result_reg, result_alloc) = self.allocator.alloc_temp()?; @@ -603,7 +634,7 @@ impl CodeGenerator { } } } else { - result_reg = "zero".to_string(); + result_reg = Register::Zero; // Clean up arguments if args.len() > 0 { @@ -626,9 +657,9 @@ impl CodeGenerator { Ok((result_reg, code)) } - Expression::Unary { op, operand } => { + Expression::Unary { op, operand, .. } => { let (operand_reg, operand_code) = - self.generate_expression(operand, true)?; + self.generate_expression(operand, true, func_body)?; code.extend(operand_code); let (result_reg, result_alloc) = self.allocator.alloc_temp()?; @@ -660,7 +691,7 @@ impl CodeGenerator { Ok((result_reg, code)) } - Expression::Empty => Ok(("zero".to_string(), code)), + Expression::Empty => Ok((Register::Null, code)), } } diff --git a/compiler/src/backend/dsa/registers.rs b/compiler/src/backend/dsa/registers.rs index 00f59e7..3e3453b 100644 --- a/compiler/src/backend/dsa/registers.rs +++ b/compiler/src/backend/dsa/registers.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use std::{collections::HashMap, fmt}; use crate::model::CompilerError; @@ -6,78 +6,109 @@ use crate::model::CompilerError; /// Manages general-purpose registers (rg0-rgf) and handles stack spilling pub struct RegisterAllocator { /// Available general-purpose registers - available_registers: Vec, /// Maps variable names to their current location (register or stack offset) variable_locations: HashMap, /// Maps registers to the variables they currently hold - register_contents: HashMap, + register_contents: HashMap, /// Current stack offset for local variables (relative to bpr) /// Starts at -4 (going downward from base pointer) stack_offset: i32, /// Track which registers are currently in use - in_use: HashMap, + in_use: HashMap, } #[derive(Debug, Clone)] -pub enum Location { - Register(String), - Stack(i32), // offset from bpr +pub struct Location { + register: Option, + stack: Option, +} + +impl Location { + pub fn stack(offset: i32) -> Self { + Location { + register: None, + stack: Some(offset), + } + } + + pub fn register(register: Register) -> Self { + Location { + register: Some(register), + stack: None, + } + } } impl RegisterAllocator { pub fn new() -> Self { // Initialize with available GP registers (rg0-rgf = 16 registers) - let registers = vec![ - "rg0", "rg1", "rg2", "rg3", "rg4", "rg5", "rg6", "rg7", "rg8", "rg9", "rga", - "rgb", "rgc", "rgd", "rge", "rgf", + let in_use = vec![ + Register::Rg0, + Register::Rg1, + Register::Rg2, + Register::Rg3, + Register::Rg4, + Register::Rg5, + Register::Rg6, + Register::Rg7, + Register::Rg8, + Register::Rg9, + Register::Rga, + Register::Rgb, + Register::Rgc, + Register::Rgd, + Register::Rge, + Register::Rgf, ] - .into_iter() - .map(String::from) + .iter() + .map(|®| (reg, false)) .collect(); RegisterAllocator { - available_registers: registers, + // available_registers: registers, variable_locations: HashMap::new(), register_contents: HashMap::new(), stack_offset: -4, // Start at -4 (first local below saved bpr) - in_use: HashMap::new(), + in_use, } } /// Allocate a temporary register for expression evaluation /// Returns the register name and optionally assembly code to save it - pub fn alloc_temp(&mut self) -> Result<(String, Vec), CompilerError> { - let mut code = Vec::new(); - + pub fn alloc_temp(&mut self) -> Result<(Register, Vec), CompilerError> { // Try to find an unused register - for reg in &self.available_registers { - if !self.in_use.get(reg).unwrap_or(&false) { - self.in_use.insert(reg.clone(), true); - return Ok((reg.clone(), code)); - } + + println!("finding! {:#?}", self.in_use); + + if let Some(reg) = self.find_free_register() { + self.in_use.insert(reg, true); + return Ok((reg, Vec::new())); } // All registers in use - need to spill one // Choose the first register with a variable we can spill // Find a register to spill - let reg_to_spill = self - .available_registers - .iter() - .find(|reg| self.register_contents.contains_key(*reg)) - .cloned(); - if let Some(reg) = reg_to_spill { - // Spill this variable to stack - let spill_code = self.spill_register(®)?; - code.extend(spill_code); + // let reg_to_spill = self + // .available_registers + // .iter() + // .find(|reg| self.register_contents.contains_key(*reg)) + // .cloned(); - self.in_use.insert(reg.clone(), true); - return Ok((reg, code)); - } + // if let Some(reg) = reg_to_spill { + // // Spill this variable to stack + // let spill_code = self.spill_register(®)?; + // code.extend(spill_code); + + // self.in_use.insert(reg.clone(), true); + // return Ok((reg, code)); + // } + + todo!("an efficient stack spilling algorithm. needs scope awareness."); Err(CompilerError::Generic( "All registers are used up yet there are no variables to spill to the stack" @@ -88,7 +119,7 @@ impl RegisterAllocator { /// Free a temporary register after use /// NOTE: This will NOT free registers that contain variables! /// Variables persist throughout their scope and must not be freed - pub fn free_temp(&mut self, reg: &str) { + pub fn free_temp(&mut self, reg: &Register) { // Check if this register contains a variable if self.register_contents.contains_key(reg) { // This register holds a variable - don't free it! @@ -97,7 +128,19 @@ impl RegisterAllocator { } // This is a true temporary - safe to free - self.in_use.insert(reg.to_string(), false); + self.in_use.insert(*reg, false); + } + + pub fn free_var(&mut self, var: &str) { + // Check if this variable is in a register + if let Some(location) = self.variable_locations.get(var).cloned() { + if let Some(reg) = location.register { + self.register_contents.remove(®); + self.in_use.insert(reg, false); + } + + self.variable_locations.remove(var); + } } /// Allocate a register for a named variable @@ -105,41 +148,46 @@ impl RegisterAllocator { pub fn alloc_var( &mut self, var_name: &str, - ) -> Result<(String, Vec), CompilerError> { - if let Some(location) = self.variable_locations.get(var_name).cloned() { - match location { - Location::Register(reg) => { - return Ok((reg.clone(), Vec::new())); - } - Location::Stack(offset) => { - // Variable was pushed, need to calculate actual position - let (reg, mut code) = self.alloc_temp()?; + ) -> Result<(Register, Vec), CompilerError> { + if let Some(mut location) = self.variable_locations.get(var_name).cloned() { + // if the var is in a register we can use it already. + if let Some(reg) = location.register { + return Ok((reg, Vec::new())); + } - // Load from bpr + offset (offset is negative) - code.push(format!("\tsubi bpr {} {}", -(offset + 4), reg)); - code.push(format!( - "\tldw {}, {} // bpr{}: {}", - reg, - reg, - offset - 4, - var_name - )); + // if the variable is on the stack only, we need to get it in a register. + if let Some(offset) = location.stack { + // Variable was pushed, need to calculate actual position and update its + // location. + let (reg, mut code) = self.alloc_temp()?; - // Update location to register - self.variable_locations - .insert(var_name.to_string(), Location::Register(reg.clone())); - self.register_contents - .insert(reg.clone(), var_name.to_string()); + // acknowledge var is now in a reg as well. + location.register = Some(reg); - return Ok((reg, code)); - } + // Load from bpr + offset (offset is negative) + code.push(format!("\tsubi bpr {} {}", -(offset + 4), reg)); + code.push(format!( + "\tldw {}, {} // bpr{}: {}", + reg, + reg, + offset - 4, + var_name + )); + + // Update location to register + self.variable_locations + .insert(var_name.to_string(), location); + self.register_contents + .insert(reg.clone(), var_name.to_string()); + + return Ok((reg, code)); } } // Variable doesn't have a location yet, allocate a new register let (reg, code) = self.alloc_temp()?; self.variable_locations - .insert(var_name.to_string(), Location::Register(reg.clone())); + .insert(var_name.to_string(), Location::register(reg)); self.register_contents .insert(reg.clone(), var_name.to_string()); @@ -156,83 +204,89 @@ impl RegisterAllocator { pub fn load_var( &mut self, var_name: &str, - ) -> Result<(String, Vec), CompilerError> { + ) -> Result<(Register, Vec), CompilerError> { self.alloc_var(var_name) } /// Store a value from a register into a variable /// Updates tracking and returns any necessary assembly code - pub fn store_var(&mut self, var_name: &str, source_reg: &str) -> Vec { - let mut code = Vec::new(); - + pub fn store_var(&mut self, var_name: &str, source_reg: &Register) -> Vec { // Check if variable already has a location if let Some(location) = self.variable_locations.get(var_name) { - match location { - Location::Register(dest_reg) => { - if dest_reg != source_reg { - code.push(format!( - "\tmov {}, {} // var {}", - source_reg, dest_reg, var_name - )); - } - } - Location::Stack(offset) => { - code.push(format!( - "\tstw {}, bpr, {} // var {}", - source_reg, offset, var_name - )); + // if the variable exists in a register we write to that. + if let Some(reg) = location.register { + if reg == *source_reg { + return vec![format!( + "\tmov {}, {} // var {}", + source_reg, reg, var_name + )]; } } - } else { - // Variable doesn't exist yet, we can just use the same reg. - // self.variable_locations.insert( - // var_name.to_string(), - // Location::Register(source_reg.to_string()), - // ); - // self.register_contents - // .insert(source_reg.to_string(), var_name.to_string()); - // self.in_use.insert(source_reg.to_string(), true); - - let source_reg = source_reg.to_string(); - - // if we can avoid a move, absolutely do that. - if self.available_registers.contains(&source_reg) { - self.variable_locations - .insert(var_name.to_string(), Location::Register(source_reg.clone())); - self.register_contents - .insert(source_reg.clone(), var_name.to_string()); - self.in_use.insert(source_reg, true); - } else if let Some(free_reg) = self.find_free_register() { - code.push(format!("\tmov {}, {}", source_reg, free_reg)); - self.variable_locations - .insert(var_name.to_string(), Location::Register(free_reg.clone())); - self.register_contents - .insert(free_reg.clone(), var_name.to_string()); - self.in_use.insert(free_reg, true); - } else { - // No free registers - allocate on stack - // code.push(format!("\tstw {}, bpr, {}", source_reg, self.stack_offset)); - // self.variable_locations - // .insert(var_name.to_string(), Location::Stack(self.stack_offset)); - // self.stack_offset -= 4; // Move to next stack slot - // - todo!( - "we should spill other registers and keep this variable on the stack as it's more recent!" - ); + // if the variable exists on the stack but not a register we write here. + if let Some(offset) = location.stack { + return vec![format!( + "\tstw {}, bpr, {} // var {}", + source_reg, offset, var_name + )]; } } - code + // Variable doesn't exist yet, we can just use the same reg. + // if we can avoid a move, absolutely do that. + + // if this is true then there's no permanent variable here so it's safe to use. + if !self.register_contents.contains_key(source_reg) { + self.variable_locations + .insert(var_name.to_string(), Location::register(*source_reg)); + self.register_contents + .insert(*source_reg, var_name.to_string()); + self.in_use.insert(*source_reg, true); + + return Vec::new(); + } + + // if current register isn't free, (eg is another variable) we assign somewhere + // else. + if let Some(free_reg) = self.find_free_register() { + self.variable_locations + .insert(var_name.to_string(), Location::register(free_reg)); + self.register_contents + .insert(free_reg.clone(), var_name.to_string()); + self.in_use.insert(free_reg, true); + + return vec![format!("\tmov {}, {}", source_reg, free_reg)]; + } + + // No free registers - allocate on stack + // code.push(format!("\tstw {}, bpr, {}", source_reg, self.stack_offset)); + // self.variable_locations + // .insert(var_name.to_string(), Location::Stack(self.stack_offset)); + // self.stack_offset -= 4; // Move to next stack slot + // + todo!("an efficient stack spilling algorithm. needs scope awareness."); } - /// Spill a register to the stack - /// Returns assembly code to perform the spill - pub fn spill_register(&mut self, reg: &str) -> Result, CompilerError> { + /// spill a register to the stack (WITHOUT FREEING) + pub fn spill_register( + &mut self, + reg: &Register, + ) -> Result, CompilerError> { let mut code = Vec::new(); - if let Some(var_name) = self.register_contents.get(reg).cloned() { - // PUSH register to stack (spr decrements automatically) + // check if the variable is declared. + if let Some(var_name) = self.register_contents.get(reg).cloned() + && let Some(location) = self.variable_locations.get_mut(&var_name) + { + // check if var is on the stack + if let Some(offset) = location.stack { + // ensure stack value is up to date with register value. + code.push(format!("\tstw {}, {}", reg, offset)); + return Ok(code); + } + + // if the variable is not on the stack: + // push register to stack (spr decrements automatically) code.push(format!( "\tpush {} // bpr{}: {}", reg, self.stack_offset, var_name @@ -240,37 +294,83 @@ impl RegisterAllocator { // Track that we pushed one word self.stack_offset -= 4; - // Update variable location - it's now at current spr // Note: We track offset from bpr for consistency - self.variable_locations - .insert(var_name.clone(), Location::Stack(self.stack_offset)); - - // Remove from register tracking - self.register_contents.remove(reg); + location.stack = Some(self.stack_offset); + Ok(code) + } else { + Err(CompilerError::Generic(format!( + "Register {} does not contain a variable to spill!", + reg + ))) } + } - Ok(code) + /// free a register by spilling it to the stack. + /// Returns assembly code to perform the spill + pub fn free_register( + &mut self, + reg: &Register, + ) -> Result, CompilerError> { + let mut code = Vec::new(); + + // check if the variable is declared. + if let Some(var_name) = self.register_contents.get(reg).cloned() + && let Some(location) = self.variable_locations.get_mut(&var_name) + { + // check if var name is on the stack + if let Some(offset) = location.stack { + // store current register value in stack location + code.push(format!("\tstw {}, {}", reg, offset)); + + // free the register. + location.register = None; + self.register_contents.remove(reg); + return Ok(code); + } + + // if the variable is not on the stack: + // push register to stack (spr decrements automatically) + code.push(format!( + "\tpush {} // bpr{}: {}", + reg, self.stack_offset, var_name + )); + + // Track that we pushed one word + self.stack_offset -= 4; + // Update variable location - it's now at current spr + // Note: We track offset from bpr for consistency + location.stack = Some(self.stack_offset); + location.register = None; + self.register_contents.remove(reg); + + Ok(code) + } else { + Err(CompilerError::Generic(format!( + "Register {} does not contain a variable to spill!", + reg + ))) + } } /// Find a free register (not currently in use) - fn find_free_register(&self) -> Option { - for reg in &self.available_registers { - if !self.in_use.get(reg).unwrap_or(&false) { - return Some(reg.clone()); - } - } - None + fn find_free_register(&self) -> Option { + self.in_use + .iter() + .filter(|(_, in_use)| !**in_use) + .map(|(reg, _)| *reg) + .next() } /// Spill all registers to stack (useful before function calls) pub fn _spill_all(&mut self) -> Vec { let mut code = Vec::new(); - let regs_to_spill: Vec = self.register_contents.keys().cloned().collect(); + let regs_to_spill: Vec = + self.register_contents.keys().cloned().collect(); for reg in regs_to_spill { - if let Ok(spill_code) = self.spill_register(®) { + if let Ok(spill_code) = self.free_register(®) { code.extend(spill_code); } } @@ -293,23 +393,43 @@ impl RegisterAllocator { self.variable_locations.clear(); self.register_contents.clear(); self.stack_offset = -4; - self.in_use.clear(); + self.in_use = vec![ + Register::Rg0, + Register::Rg1, + Register::Rg2, + Register::Rg3, + Register::Rg4, + Register::Rg5, + Register::Rg6, + Register::Rg7, + Register::Rg8, + Register::Rg9, + Register::Rga, + Register::Rgb, + Register::Rgc, + Register::Rgd, + Register::Rge, + Register::Rgf, + ] + .iter() + .map(|®| (reg, false)) + .collect(); } /// Mark a variable as dead (no longer needed) /// Frees its register if it's in one - pub fn _free_var(&mut self, var_name: &str) { - if let Some(Location::Register(reg)) = self.variable_locations.get(var_name) { - let reg = reg.clone(); - self.register_contents.remove(®); - self.in_use.insert(reg, false); - } - self.variable_locations.remove(var_name); - } + // pub fn _free_var(&mut self, var_name: &str) { + // if let Some(Location::Register(reg)) = self.variable_locations.get(var_name) { + // let reg = reg.clone(); + // self.register_contents.remove(®); + // self.in_use.insert(reg, false); + // } + // self.variable_locations.remove(var_name); + // } /// Get list of registers that contain variables and are in use /// These need to be saved before function calls - pub fn get_caller_saved_registers(&self) -> Vec { + pub fn get_caller_saved_registers(&self) -> Vec { self.register_contents .iter() .filter(|(reg, _)| *self.in_use.get(*reg).unwrap_or(&false)) @@ -346,3 +466,50 @@ impl RegisterAllocator { code } } + +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum Register { + Rg0, + Rg1, + Rg2, + Rg3, + Rg4, + Rg5, + Rg6, + Rg7, + Rg8, + Rg9, + Rga, + Rgb, + Rgc, + Rgd, + Rge, + Rgf, + Zero, + Null, +} + +impl fmt::Display for Register { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Rg0 => write!(f, "rg0"), + Self::Rg1 => write!(f, "rg1"), + Self::Rg2 => write!(f, "rg2"), + Self::Rg3 => write!(f, "rg3"), + Self::Rg4 => write!(f, "rg4"), + Self::Rg5 => write!(f, "rg5"), + Self::Rg6 => write!(f, "rg6"), + Self::Rg7 => write!(f, "rg7"), + Self::Rg8 => write!(f, "rg8"), + Self::Rg9 => write!(f, "rg9"), + Self::Rga => write!(f, "rga"), + Self::Rgb => write!(f, "rgb"), + Self::Rgc => write!(f, "rgc"), + Self::Rgd => write!(f, "rgd"), + Self::Rge => write!(f, "rge"), + Self::Rgf => write!(f, "rgf"), + Self::Zero => write!(f, "zero"), + Self::Null => write!(f, "null"), + } + } +} diff --git a/compiler/src/frontend/c/parser.rs b/compiler/src/frontend/c/parser.rs index 9d4c2bf..ec12efb 100644 --- a/compiler/src/frontend/c/parser.rs +++ b/compiler/src/frontend/c/parser.rs @@ -351,6 +351,7 @@ impl Parser { op, left: Box::new(expr), right, + type_id: None, }; } @@ -371,6 +372,7 @@ impl Parser { op, left: Box::new(expr), right, + type_id: None, }; } @@ -391,6 +393,7 @@ impl Parser { op, left: Box::new(expr), right, + type_id: None, }; } @@ -407,7 +410,11 @@ impl Parser { if let Some(op) = op { self.advance(); let operand = Box::new(self.parse_unary()?); - return Ok(Expression::Unary { op, operand }); + return Ok(Expression::Unary { + op, + operand, + type_id: None, + }); } self.parse_primary() @@ -418,7 +425,10 @@ impl Parser { TokenType::Number(n) => { let value = *n; self.advance(); - Ok(Expression::Number(value as isize)) + Ok(Expression::Number { + value: value as isize, + type_id: None, + }) } TokenType::Identifier(name) => { let name = name.clone(); @@ -445,6 +455,7 @@ impl Parser { namespace: None, }, args, + type_id: None, }) } else { Ok(Expression::Variable { diff --git a/compiler/src/frontend/dsc/mod.rs b/compiler/src/frontend/dsc/mod.rs index b4a6666..5718e6f 100644 --- a/compiler/src/frontend/dsc/mod.rs +++ b/compiler/src/frontend/dsc/mod.rs @@ -2,11 +2,11 @@ use common::logging::log; use crate::model::{CompilerError, Program}; use parser::{ParseResult, Parser}; -use semantic_analyser::Analyser; +// use semantic_analyser::Analyser; pub mod lexer; pub mod parser; -pub mod semantic_analyser; +// pub mod semantic_analyser; pub fn generate_ast(input: &str) -> Result { log("Tokenising Input..."); @@ -30,8 +30,8 @@ pub fn generate_ast(input: &str) -> Result { log("Analyzing AST..."); log("Checking Type Information..."); - let analyser = Analyser::new(); - analyser.analyse(ast.clone()).unwrap(); + // let mut analyser = Analyser::new(); + // analyser.analyse(ast.clone()).unwrap(); log("Type Checking Complete..."); Ok(ast) diff --git a/compiler/src/frontend/dsc/parser.rs b/compiler/src/frontend/dsc/parser.rs index 752fbba..2fa533b 100644 --- a/compiler/src/frontend/dsc/parser.rs +++ b/compiler/src/frontend/dsc/parser.rs @@ -1,7 +1,7 @@ use super::lexer::Token; use crate::model::{ - BinaryOperator, Block, CompilerError, ConstExpr, Declaration, Dependency, Expression, - Program, Statement, TypeId, UnaryOperator, Variable, + BinaryOperator, Block, Call, CompilerError, ConstExpr, Declaration, Dependency, + Expression, Program, Statement, TypeId, UnaryOperator, Variable, }; use crate::{expect_tt, expect_value}; use std::ops::{ControlFlow, FromResidual, Try}; @@ -353,7 +353,7 @@ impl Parser { let mut expr = self.parse_additive()?; while let Some(op) = match self.peek_next()? { - Token::EqualEqual => Some(BinaryOperator::Ne), + Token::EqualEqual => Some(BinaryOperator::Eq), Token::BangEqual => Some(BinaryOperator::Ne), Token::Less => Some(BinaryOperator::Lt), Token::Greater => Some(BinaryOperator::Gt), @@ -367,7 +367,8 @@ impl Parser { op, left: Box::new(expr), right, - } + type_id: Some(TypeId::Bool), + }; } ParseResult::Accept(expr) @@ -387,6 +388,7 @@ impl Parser { op, left: Box::new(left), right: Box::new(self.parse_additive()?), + type_id: Some(TypeId::U32), }) } @@ -404,6 +406,7 @@ impl Parser { op, left: Box::new(left), right: Box::new(self.parse_multiplicative()?), + type_id: None, }) } @@ -418,14 +421,21 @@ impl Parser { self.next()?; let operand = Box::new(self.parse_unary()?); - ParseResult::Accept(Expression::Unary { op, operand }) + ParseResult::Accept(Expression::Unary { + op, + operand, + type_id: None, + }) } fn parse_primary(&mut self) -> ParseResult { match self.peek_next()? { Token::Integer(value) => { self.next()?; - ParseResult::Accept(Expression::Number(value as isize)) + ParseResult::Accept(Expression::Number { + value: value as isize, + type_id: None, + }) } Token::String(value) => { self.next()?; @@ -450,7 +460,14 @@ impl Parser { let _ = expect_tt!(self.next()?, RightParen)?; - ParseResult::Accept(Expression::Call { name, args }) + ParseResult::Accept(Expression::Call { + func: Call { + name: name.clone(), + args, + }, + + type_id: None, + }) } else { ParseResult::Accept(Expression::Variable { name, diff --git a/compiler/src/frontend/dsc/semantic_analyser.rs b/compiler/src/frontend/dsc/semantic_analyser.rs index 2b18e2c..9daf780 100644 --- a/compiler/src/frontend/dsc/semantic_analyser.rs +++ b/compiler/src/frontend/dsc/semantic_analyser.rs @@ -1,13 +1,226 @@ -use crate::model::{CompilerError, Program}; +use std::collections::HashMap; -pub struct Analyser; +use crate::model::{ + BinaryOperator, // You'll need to add this to your imports + CompilerError, + Declaration, + Dependency, + Expression, + Program, + TypeId, + UnaryOperator, +}; + +pub struct Analyser { + symbol_table: HashMap, +} + +const NUMERIC_TYPES: &[TypeId] = &[ + TypeId::U32, + TypeId::I32, + TypeId::I16, + TypeId::U16, + TypeId::I8, + TypeId::U8, +]; impl Analyser { pub fn new() -> Self { - Self + Self { + symbol_table: HashMap::new(), + } } - pub fn analyse(&self, _ast: Program) -> Result<(), CompilerError> { + pub fn analyse(&mut self, ast: Program) -> Result<(), CompilerError> { + // build table of global symbols. + for dec in ast.declarations { + let name = match dec.clone() { + Declaration::Function { name, .. } => name, + Declaration::Variable { var, .. } => var.name, + Declaration::Dependency(Dependency { name, .. }) => name, + }; + + self.symbol_table.insert(name, dec); + } + Ok(()) } + + fn match_type( + actual: TypeId, + expected: Option, + ) -> Result { + match expected { + Some(id) => { + if id != actual { + Err(CompilerError::TypeMismatch(id, actual)) + } else { + Ok(actual) + } + } + None => Ok(actual), + } + } + + fn get_type( + &mut self, // Changed from &self to &mut self since we modify expr + expr: &mut Expression, + expected_type: Option, + ) -> Result { + match expr { + // Correct IFF we're expecting a void type + Expression::Empty => Self::match_type(TypeId::Void, expected_type), + + // Correct IFF we're expecting a char type + Expression::CharLiteral(_) => Self::match_type(TypeId::Char, expected_type), + + // Correct IFF we're expecting a string slice type + Expression::StringLiteral(_) => { + Self::match_type(TypeId::Ptr(Box::new(TypeId::Char)), expected_type) + } + + Expression::Variable { name, expr_type } => { + let actual = expr_type.clone().ok_or(CompilerError::UnknownType)?; + Self::match_type(actual, expected_type) + } + + Expression::Number { value, type_id } => { + // If we already know the TypeId + if let Some(id) = type_id { + return Self::match_type(id.clone(), expected_type); + } + + // If we're expecting a type id, check it's numeric. + // TODO: add checks to make sure it's valid for its size eg u8 cant be + // more than 255 + if let Some(expected) = expected_type { + if NUMERIC_TYPES.contains(&expected) { + *type_id = Some(expected.clone()); + return Ok(expected); + } else { + return Err(CompilerError::TypeMismatch(expected, TypeId::U32)); + } + } + + // Default to i32 if no type information is available + *type_id = Some(TypeId::I32); + Ok(TypeId::I32) + } + + Expression::Binary { + op, + left, + right, + type_id, + } => { + // For binary operations, both operands should have compatible types + // and the result type depends on the operation + let left_type = self.get_type(left, None)?; + let right_type = self.get_type(right, Some(left_type.clone()))?; + + // For numeric operations, result has the same type as operands + if NUMERIC_TYPES.contains(&left_type) + && NUMERIC_TYPES.contains(&right_type) + { + *type_id = Some(left_type); + Self::match_type(left_type, expected_type) + } else { + Err(CompilerError::TypeMismatch(left_type, right_type)) + } + } + + Expression::Unary { + op, + operand, + type_id, + } => { + match op { + UnaryOperator::Plus | UnaryOperator::Minus => { + // Unary +/- require numeric operands + let inner_type = self.get_type(operand, None)?; + + if NUMERIC_TYPES.contains(&inner_type) { + *type_id = Some(inner_type.clone()); + Self::match_type(inner_type, expected_type) + } else { + Err(CompilerError::TypeMismatch(inner_type, TypeId::I32)) + } + } + + UnaryOperator::Dereference => { + // For dereference (*ptr), the operand must be a pointer + // and the result type is what the pointer points to + let inner_type = self.get_type(operand, None)?; + + match inner_type { + TypeId::Ptr(inner) => { + let deref_type = *inner; + *type_id = Some(deref_type.clone()); + Self::match_type(deref_type, expected_type) + } + _ => Err(CompilerError::Generic(format!( + "Cannot dereference non-pointer type: {:?}", + inner_type + ))), + } + } + + UnaryOperator::Reference => { + // For reference (&var), we need to determine what we're taking + // a reference to, then wrap it in a Ptr + // If expected_type is Ptr(T), then operand should have type T + let expected_inner = match expected_type.clone() { + Some(TypeId::Ptr(inner)) => Some(*inner), + _ => None, + }; + + let inner_type = self.get_type(operand, expected_inner)?; + let ref_type = TypeId::Ptr(Box::new(inner_type)); + *type_id = Some(ref_type.clone()); + Self::match_type(ref_type, expected_type) + } + } + } + + Expression::Call { + name, + args, + type_id, + } => match self.symbol_table.get(&name.name) { + Some(Declaration::Function { + params, + return_type, + .. + }) => { + // check that we've given the right number of arguments. + if args.len() != params.len() { + return Err(CompilerError::Generic(format!( + "Function {} expected {} arguments but received {}", + name.name, + params.len(), + args.len() + ))); + } + + for (arg, param) in args.iter_mut().zip(params.iter()) { + // check that the argument type matches the parameter type. + let provided_type = self.get_type(arg, Some(param.type_id))?; + if provided_type != param.type_id { + return Err(CompilerError::TypeMismatch( + param.type_id, + provided_type, + )); + } + } + + *type_id = Some(return_type.clone()); + Self::match_type(return_type.clone(), expected_type) + } + _ => Err(CompilerError::Generic(format!( + "Function {} not found in symbol table", + name.name + ))), + }, + } + } } diff --git a/compiler/src/frontend/mod.rs b/compiler/src/frontend/mod.rs index a17ba62..a9ee921 100644 --- a/compiler/src/frontend/mod.rs +++ b/compiler/src/frontend/mod.rs @@ -1,12 +1,12 @@ use crate::model::{CompilerError, Program}; -mod c; +// mod c; mod dsc; pub fn compiler_frontend(ext: &str, data: &str) -> Result { match ext { "dsc" => Ok(dsc::generate_ast(&data)?), - "c" => Ok(c::generate_ast(&data)?), + // "c" => Ok(c::generate_ast(&data)?), _ => Err(CompilerError::Generic(format!( "File type {} not supported", ext diff --git a/compiler/src/lib.rs b/compiler/src/lib.rs index 41fb48b..004cfea 100644 --- a/compiler/src/lib.rs +++ b/compiler/src/lib.rs @@ -46,6 +46,8 @@ pub fn compile_file( Err(err) => return Err(format!("Compilation failed: {err:?}").into()), }; + println!("Parsed AST: {:#?}", ast); + let output_ext = output_path .extension() .and_then(|s| s.to_str()) diff --git a/compiler/src/model.rs b/compiler/src/model.rs index 9c7e68e..df1269f 100644 --- a/compiler/src/model.rs +++ b/compiler/src/model.rs @@ -9,9 +9,11 @@ pub enum CompilerError { Undefined(Name), InvalidSyntax(String), Generic(String), + UnknownType, + TypeMismatch(TypeId, TypeId), } -#[derive(Debug, PartialEq, Clone)] +#[derive(Debug, PartialEq, Eq, Clone)] pub struct Name { pub name: String, pub namespace: Option, @@ -46,7 +48,7 @@ pub struct Dependency { } #[allow(unused)] -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)] pub enum TypeId { U8, U16, @@ -54,18 +56,45 @@ pub enum TypeId { I8, I16, I32, + Bool, Char, Void, Ptr(Box), Ref(Box), Array(Box, usize), - Struct { name: Name, fields: Vec }, + Struct { name: Name, fields: Vec }, +} + +impl fmt::Display for TypeId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::U8 => write!(f, "u8"), + Self::U16 => write!(f, "u16"), + Self::U32 => write!(f, "u32"), + Self::I8 => write!(f, "i8"), + Self::I16 => write!(f, "i16"), + Self::I32 => write!(f, "i32"), + Self::Bool => write!(f, "bool"), + Self::Char => write!(f, "char"), + Self::Void => write!(f, "void"), + Self::Ptr(t) => write!(f, "*{}", t), + Self::Ref(t) => write!(f, "&{}", t), + Self::Array(t, len) => write!(f, "[{}; {}]", t, len), + Self::Struct { name, fields } => { + write!(f, "struct {} {{", name)?; + for (i, field) in fields.iter().enumerate() { + write!(f, "{}: {}", i, field)?; + } + write!(f, "}}") + } + } + } } pub type Block = Vec; #[allow(unused)] -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)] pub struct Variable { pub name: String, pub type_id: TypeId, @@ -100,6 +129,7 @@ pub enum Statement { body: Vec, }, Loop(Block), + Defer(Call), Break, Continue, Return(Option), @@ -128,28 +158,47 @@ pub enum Expression { op: BinaryOperator, left: Box, right: Box, + + // Post-Semantic Analysis + type_id: Option, }, Unary { op: UnaryOperator, operand: Box, + + // Post-Semantic Analysis + type_id: Option, }, Variable { name: Name, expr_type: Option, }, Call { - name: Name, - args: Vec, + func: Call, + + // Post-Semantic Analysis + type_id: Option, + }, + Number { + value: isize, + + // Post-Semantic Analysis + type_id: Option, }, - Number(isize), StringLiteral(String), CharLiteral(char), } +#[derive(Debug, Clone)] +pub struct Call { + pub name: Name, + pub args: Vec, +} + impl Expression { pub fn is_pure(&self) -> bool { match self { - Expression::Number(_) => true, + Expression::Number { .. } => true, Expression::StringLiteral(_) => true, Expression::CharLiteral(_) => true, Expression::Call { .. } => false, @@ -159,6 +208,29 @@ impl Expression { Expression::Variable { .. } => true, } } + + pub fn type_id(&self) -> Result { + match self { + Expression::Number { type_id, .. } => { + type_id.clone().ok_or(CompilerError::UnknownType) + } + Expression::StringLiteral(_) => Ok(TypeId::Ptr(Box::new(TypeId::Char))), + Expression::CharLiteral(_) => Ok(TypeId::Char), + Expression::Call { type_id, .. } => { + type_id.clone().ok_or(CompilerError::UnknownType) + } + Expression::Binary { type_id, .. } => { + type_id.clone().ok_or(CompilerError::UnknownType) + } + Expression::Unary { type_id, .. } => { + type_id.clone().ok_or(CompilerError::UnknownType) + } + Expression::Empty => Ok(TypeId::Void), + Expression::Variable { expr_type, .. } => { + expr_type.clone().ok_or(CompilerError::UnknownType) + } + } + } } #[allow(unused)]