From 201b18069b2dbe23c9371db09cf81041afb630af Mon Sep 17 00:00:00 2001 From: zxq5 Date: Sat, 14 Feb 2026 02:46:29 +0000 Subject: [PATCH] continued on register allocator rewrite, slow progress as scoping is proving to be a challenge --- compiler/Cargo.toml | 1 + compiler/src/backend/dsa/codegen.rs | 685 +++++++++++------------- compiler/src/backend/dsa/instruction.rs | 415 +++++++++----- compiler/src/backend/dsa/registers.rs | 124 ++--- compiler/src/backend/dsa/scope.rs | 288 +++++++++- compiler/src/backend/dsa/variable.rs | 157 ++---- compiler/src/frontend/dsc/lexer.rs | 190 ++++++- compiler/src/frontend/dsc/parser.rs | 22 +- compiler/src/model.rs | 19 +- resources/dsa/example.dsa | 42 +- 10 files changed, 1153 insertions(+), 790 deletions(-) diff --git a/compiler/Cargo.toml b/compiler/Cargo.toml index ba60012..a4b6a6f 100644 --- a/compiler/Cargo.toml +++ b/compiler/Cargo.toml @@ -7,3 +7,4 @@ authors.workspace = true [dependencies] chrono = "0.4.43" common = { path = "../common" } +uuid = { version = "1.20.0", features = ["v4"] } diff --git a/compiler/src/backend/dsa/codegen.rs b/compiler/src/backend/dsa/codegen.rs index ae85ba1..bf78b9d 100644 --- a/compiler/src/backend/dsa/codegen.rs +++ b/compiler/src/backend/dsa/codegen.rs @@ -5,99 +5,46 @@ use std::time::SystemTime; use chrono::{DateTime, Local}; use super::registers::RegisterAllocator; -use crate::backend::dsa::instruction::CodeGen; +use crate::backend::dsa::instruction::{InsBlock as IB, Instruction as I, Label}; use crate::backend::dsa::registers::Register; -use crate::backend::dsa::variable::{ScopeKind, ScopeManager}; -use crate::{block, comment, dsa}; use crate::model::{ AssignmentOperator, BinaryOperator, Call, CompilerError, ConstExpr, Declaration, - Dependency, Expression, Name, Program, Statement, TypeId, UnaryOperator, Variable, + Dependency, Expression, Number, Program, Statement, TypeId, UnaryOperator, Variable, }; pub struct CodeGenerator { ast: Program, - imports: HashMap, - globals: Vec, - functions: Vec, + imports: HashMap, + globals: HashMap, + functions: Vec, symbols: Vec, allocator: RegisterAllocator, } -fn import(name: &str, path: &str) -> String { - format!("include {name}: \"{}\"", path) -} - impl CodeGenerator { - const RET: &'static str = "\tjmp _ret"; - pub fn new(ast: Program) -> Self { CodeGenerator { ast, imports: HashMap::new(), - globals: Vec::new(), + globals: HashMap::new(), functions: Vec::new(), symbols: Vec::new(), allocator: RegisterAllocator::new(), } } - pub fn include(&mut self, name: &str, path: &str) { - self.imports.insert(name.to_string(), path.to_string()); + pub fn include(&mut self, name: impl Into, path: impl Into) { + let name = name.into(); + self.imports.insert(name.clone(), I::include(name, path)); } fn is_global(&self, name: &str) -> bool { // Check if this variable is in the globals list - self.globals - .iter() - .any(|g| g.contains(&format!("dw {}:", name))) + self.globals.contains_key(name) } pub fn generate(&mut self) -> Result { - // let mut codegen = CodeGen::new(); - // let mut scope_mgr = ScopeManager::new(); - // scope_mgr.enter_scope(ScopeKind::Global); - // scope_mgr.enter_scope(ScopeKind::Function); - - // let point_type = TypeId::Struct { - // name: Name::new("Point", None), - // fields: vec![TypeId::U32, TypeId::U32], - // generics: vec![], - // }; - - // let p_var = scope_mgr.declare_var("p".into(), point_type)?; - // scope_mgr.allocate_var(p_var, false)?; // Force stack (struct too big) - - // // Access p.x (offset 0, size 4) - // let (x_reg, load_block) = scope_mgr.access_member(p_var, 0, 4)?; - // codegen.emit_block(load_block); - - // // Store to p.y (offset 4, size 4) - // let value_reg = Register::Rg0; // assume value is here - // let store_block = scope_mgr.store_member(p_var, 4, 4, value_reg)?; - // codegen.emit_block(store_block); - - // scope_mgr.enter_scope(ScopeKind::Loop); - // let pointer = scope_mgr - // .declare_var("pointer".into(), TypeId::Ptr(Box::new(TypeId::U32)))?; - // scope_mgr.allocate_var(pointer, true)?; - - // scope_mgr.enter_scope(ScopeKind::Function); - // let var2 = scope_mgr.declare_var("var2".into(), TypeId::U32)?; - // scope_mgr.allocate_var(var2, true)?; - // let array = scope_mgr.declare_var( - // "pointer".into(), - // TypeId::Array { - // r#type: Box::new(TypeId::U32), - // size: 10, - // }, - // )?; - // scope_mgr.allocate_var(array, false)?; - - // println!("{}", scope_mgr); - - // return Ok(String::new()); - // always include the print library for debugging! self.include("print", "./lib/io/print.dsa"); @@ -122,76 +69,91 @@ impl CodeGenerator { self.generate_block(block.clone())?; } - self.generate_layout() + let assembly = self.generate_layout()?; + Ok(assembly + .iter() + .map(|i| i.to_string()) + .collect::>() + .join("\n")) } - fn generate_layout(&mut self) -> Result { + fn generate_layout(&mut self) -> Result { let datetime: DateTime = SystemTime::now().into(); - Ok(dsa![ - "", - comment!("GENERATED BY DSC COMPILER"), - comment!(format!( - "Generated at {}", + + let mut block = IB::new(); + + block.extend(vec![ + I::comment("GENERATED BY DSC COMPILER"), + I::comment(format!( + "Generated at {}\n", datetime.format("%Y-%m-%d %H:%M:%S") )), - "", - // imports - comment!("Imports"), + I::comment("Imports"), + ]); + + block.extend( self.imports .iter() - .map(|(k, v)| import(k, v)) - .collect::>() - .join("\n"), - "", - // reserved memory - comment!("Globals & Reserved Memory"), - self.globals.join("\n"), - "", - // entry point - comment!("Entry Point"), - "dw stack: 0x10000", - "db message: \"Process Exited with code:\"", - block! [ "_init" - dsa![ldw stack, bpr], - dsa![mov bpr, spr], - dsa![push zero], - dsa![call main], - dsa![call print::print_newline], - dsa![lwi message, rg0], - dsa![push rg0], - dsa![call print::print], - dsa![pop zero], - dsa![call print::print_hex_word], - dsa![pop zero], - dsa![hlt] - ], - "", - comment!("Return"), - block! [ "_ret" - dsa![mov bpr, spr], - dsa![pop bpr], - dsa![return] - ], - comment!("Compiled Code Starts..."), - // block! [ "main" - // dsa![push bpr], - // dsa![mov spr, bpr], - // dsa![lwi 67, rg1], - // dsa![stw rg1, spr, 8], - // dsa![mov bpr, spr], - // dsa![pop bpr], - // dsa![return] - // ], - self.functions.join("\n"), - ]) + .map(|(_name, instruction)| instruction.clone()) + .collect::>(), + ); + + block.push(I::comment("")); + block.push(I::comment("Globals & Reserved Memory")); + + block.extend( + self.globals + .iter() + .map(|(_name, instruction)| instruction.clone()) + .collect::>(), + ); + + block.extend(vec![ + I::comment(""), + I::comment("Entry Point"), + I::db_word("stack", 0x10000), + I::db_string("message", "Process Exited with code:"), + // init function for stack setup. + I::label("_init"), + I::ldw_label("stack", Register::Bpr), + I::mov(Register::Bpr, Register::Spr), + I::push(Register::Zero), + I::call("main"), + I::call("print::print_newline"), + I::lwi_label("message", Register::Rg0), + I::push(Register::Rg0), + I::call("print::print"), + I::pop(Register::Zero), + I::call("print::print_hex_word"), + I::pop(Register::Zero), + I::Hlt, + // default return block boilerplate + I::comment("Return"), + I::label("_ret"), + I::mov(Register::Bpr, Register::Spr), + I::pop(Register::Bpr), + I::Return, + ]); + + for function in self.functions.iter() { + block.extend(function.iter().cloned()); + } + + Ok(block) } fn generate_global(&mut self, name: &str, init: Option) { - self.globals.push(format!( - "dw {}: {}", - name, - init.unwrap_or(ConstExpr::Number(0)) - )) + let init = init.unwrap_or(ConstExpr::Number(0)); + match init { + ConstExpr::Number(value) => { + self.globals + .insert(name.to_string(), I::db_word(name, value as u32)); + } + ConstExpr::String(str) => { + self.globals + .insert(name.to_string(), I::db_string(name, str)); + } + } } fn generate_block(&mut self, block: Declaration) -> Result<(), CompilerError> { @@ -205,14 +167,11 @@ impl CodeGenerator { body, return_type, } => { - let func = self - .generate_function(&name, ¶ms, &body, return_type) - .join("\n"); - - self.functions.push(format!("{func}\n")); + let func = self.generate_function(&name, ¶ms, &body, return_type); + self.functions.push(func); } Declaration::Dependency(Dependency { name, path }) => { - self.imports.insert(name, path); + self.include(name, path); } Declaration::Struct { .. } => {} /* can't do any codegen for these yet, * they're just types. */ @@ -228,49 +187,50 @@ impl CodeGenerator { params: &[Variable], body: &[Statement], return_type: TypeId, - ) -> Vec { - let mut code = Vec::new(); + ) -> IB { + let mut code = IB::new(); // Reset allocator for new function self.allocator.reset(); - code.push(format!( - "// fn {name}({}) -> {}", - params - .iter() - .map(|p| format!("{}: {}", p.name, p.type_id)) - .collect::>() - .join(", "), - return_type - )); + let fmtparams = params + .iter() + .map(|p| format!("{}: {}", p.name, p.type_id)) + .collect::>() + .join(", "); - // Function prologue - code.push(format!("{}:", name)); - code.push("\tpush bpr".to_string()); - code.push("\tmov spr, bpr".to_string()); - code.push(String::new()); + code.push(I::comment(format!( + "fn {name}({fmtparams}) -> {return_type}" + ))); + + code.extend(vec![ + I::label(name), + I::push(Register::Bpr), + I::mov(Register::Spr, Register::Bpr), + ]); // Allocate parameters to registers or stack locations for (i, param) in params.iter().enumerate() { let offset = 8 + (i as i32 * 4); // Parameters start at bpr+8 // Track that this parameter is at a stack location let (reg, load_code) = self.allocator.alloc_var(¶m.name).unwrap(); - code.extend(load_code); - code.push(format!("\tldw bpr, {}, {}", reg, offset)); + code.append(load_code); + code.push(I::ldw_reg_offset(Register::Bpr, reg, offset)); } // Generate code for function body for stmt in body { let stmt_code = self.generate_statement(stmt, &mut code).unwrap(); - code.extend(stmt_code); + code.append(stmt_code); } // automatically return at function end - if let Some(x) = code.last() - && x == Self::RET + if let Some(x) = code.iter().last() + && let I::Jmp { target: Label(val) } = x + && val == "_ret" { } else { - code.push(Self::RET.to_string()); + code.push(I::jmp("_ret")); } code @@ -280,9 +240,9 @@ impl CodeGenerator { fn generate_statement( &mut self, stmt: &Statement, - func_body: &mut Vec, - ) -> Result, CompilerError> { - let mut code = Vec::new(); + func_body: &mut IB, + ) -> Result { + let mut code = IB::new(); match stmt { Statement::Declaration { var, value } => { @@ -290,11 +250,11 @@ impl CodeGenerator { // Evaluate expression let (result_reg, expr_code) = self.generate_expression(expr, true, func_body)?; - code.extend(expr_code); + code.append(expr_code); // Store result in variable let store_code = self.allocator.store_var(&var.name, &result_reg); - code.extend(store_code); + code.append(store_code); // Free temporary register self.allocator.free_temp(result_reg); @@ -311,13 +271,13 @@ impl CodeGenerator { Statement::PtrWrite { ptr, value } => { let (result_reg, expr_code) = self.generate_expression(value, true, func_body)?; - code.extend(expr_code); + code.append(expr_code); let (ptr_reg, ptr_code) = self.generate_expression(ptr, true, func_body)?; - code.extend(ptr_code); + code.append(ptr_code); - code.push(format!("\tstw {}, {}", result_reg, ptr_reg)); + code.push(I::stw_reg(result_reg, ptr_reg)); self.allocator.free_temp(result_reg); self.allocator.free_temp(ptr_reg); @@ -331,17 +291,17 @@ impl CodeGenerator { // Evaluate expression let (result_reg, expr_code) = self.generate_expression(value, true, func_body)?; - code.extend(expr_code); + code.append(expr_code); if *operator == AssignmentOperator::Assign { // Check if this is a global variable if self.is_global(varname) { // Store to global label - code.push(format!("\tstw {}, {}", result_reg, varname)); + code.push(I::stw_label(result_reg, varname.clone())) } else { // Store result in local variable let store_code = self.allocator.store_var(varname, &result_reg); - code.extend(store_code); + code.append(store_code); } // Free temporary register @@ -352,25 +312,28 @@ impl CodeGenerator { // for more complex assignment cases we need an intermediate register. let (temp_reg, temp_code) = self.allocator.alloc_temp()?; - code.extend(temp_code); + code.append(temp_code); // fetch the value of the variable - let (var_reg, var_code) = if self.is_global(varname) { - (temp_reg, vec![format!("\tldw {}, {}", varname, temp_reg)]) + let var_reg = if self.is_global(varname) { + let instruction = I::ldw_label(varname.clone(), temp_reg); + code.push(instruction); + temp_reg } else { - self.allocator.load_var(varname)? + let (rg, block) = self.allocator.load_var(varname)?; + code.append(block); + rg }; - code.extend(var_code); let assign_code = match operator { AssignmentOperator::Assign => { unreachable!("assignment was already checked earlier.") } AssignmentOperator::AddAssign => { - format!("\tadd {var_reg}, {result_reg}, {temp_reg}") + I::add(var_reg, result_reg, temp_reg) } AssignmentOperator::SubAssign => { - format!("\tsub {var_reg}, {result_reg}, {temp_reg}") + I::sub(var_reg, result_reg, temp_reg) } AssignmentOperator::MulAssign => { return Err(CompilerError::Unimplemented(format!( @@ -383,30 +346,31 @@ impl CodeGenerator { ))); } AssignmentOperator::ModAssign => { - format!("\tmod {var_reg}, {result_reg}, {temp_reg}") + return Err(CompilerError::Unimplemented(format!( + "TODO: write proper mod function for DSA" + ))); } AssignmentOperator::AndAssign => { - format!("\tand {var_reg}, {result_reg}, {temp_reg}") - } - AssignmentOperator::OrAssign => { - format!("\tor {var_reg}, {result_reg}, {temp_reg}") + I::and(var_reg, result_reg, temp_reg) } + AssignmentOperator::OrAssign => I::or(var_reg, result_reg, temp_reg), AssignmentOperator::XorAssign => { - format!("\txor {var_reg}, {result_reg}, {temp_reg}") + I::xor(var_reg, result_reg, temp_reg) } AssignmentOperator::LeftShiftAssign => { // this is only useful if we optimise out the register allocation - // inside value. if let Expression::Number - // { value, .. } = *value { format!("\ - // tshl {var_reg}, {value}, {temp_reg}") } - format!("\tshl {var_reg}, {result_reg}, 0, {temp_reg}") + // inside value. + // if let Expression::Number { value, .. } = *value { + // I::shl(var_reg, value, temp_reg) + // } + I::shl(var_reg, result_reg, 0, temp_reg) } AssignmentOperator::RightShiftAssign => { // this is only useful if we optimise out the register allocation // if let Expression::Number { value, .. } = *value { - // format!("\tshr {var_reg}, {value}, {temp_reg}") + // I::shr(var_reg, value, temp_reg) // } - format!("\tshr {var_reg}, {result_reg}, 0, {temp_reg}") + I::shr(var_reg, result_reg, 0, temp_reg) } }; code.push(assign_code); @@ -414,11 +378,11 @@ impl CodeGenerator { // Check if this is a global variable if self.is_global(varname) { // Store to global label - code.push(format!("\tstw {}, {}", temp_reg, varname)); + code.push(I::stw_label(temp_reg, varname.clone())) } else { // Store result in local variable let store_code = self.allocator.store_var(varname, &temp_reg); - code.extend(store_code); + code.append(store_code); } self.allocator.free_temp(result_reg); @@ -429,9 +393,9 @@ impl CodeGenerator { if let Some(e) = expr { let (result_reg, expr_code) = self.generate_expression(e, true, func_body)?; - code.extend(expr_code); - code.push(format!("\tstw {}, bpr, 8", result_reg)); - code.push(format!("\tjmp _ret")); + code.append(expr_code); + code.push(I::stw_reg_offset(result_reg, Register::Bpr, 8)); + code.push(I::jmp("_ret")); self.allocator.free_temp(result_reg); } } @@ -444,10 +408,10 @@ impl CodeGenerator { // Generate condition let (cond_reg, cond_code) = self.generate_expression(condition, true, func_body)?; - code.extend(cond_code); + code.append(cond_code); // Compare with zero - code.push(format!("\tcmp {}, zero", cond_reg)); + code.push(I::cmp(cond_reg, Register::Zero)); self.allocator.free_temp(cond_reg); // Generate unique labels @@ -456,80 +420,79 @@ impl CodeGenerator { let end_label = format!("_end_{}", self.get_unique_label()); // Jump to else if condition is false (equal to zero) - code.push(format!("\tjeq {}", else_label)); + code.push(I::jeq(else_label.clone())); // Then block - code.push(format!("{}:", then_label)); + code.push(I::label(then_label)); for s in then_stmt { - code.extend(self.generate_statement(s, func_body)?); + code.append(self.generate_statement(s, func_body)?); } if then_stmt.len() == 0 { - code.push("\tnop".to_string()); + code.push(I::Nop); } - code.push(format!("\tjmp {}", end_label)); + code.push(I::jmp(end_label.clone())); // Else block - code.push(format!("{}:", else_label)); + code.push(I::label(else_label)); for s in else_stmt { - code.extend(self.generate_statement(s, func_body)?); + code.append(self.generate_statement(s, func_body)?); } if else_stmt.len() == 0 { - code.push("\tnop".to_string()); + code.push(I::Nop); } - code.push(format!("{}:", end_label)); + code.push(I::label(end_label)); } Statement::While { condition, body } => { let loop_start = format!("_while_start_{}", self.get_unique_label()); let loop_end = format!("_while_end_{}", self.get_unique_label()); - code.push(format!("{}:", loop_start)); + code.push(I::label(&loop_start)); // Generate condition let (cond_reg, cond_code) = self.generate_expression(condition, true, func_body)?; - code.extend(cond_code); + code.append(cond_code); - code.push(format!("\tcmp {}, zero", cond_reg)); + code.push(I::cmp(cond_reg, Register::Zero)); self.allocator.free_temp(cond_reg); - code.push(format!("\tjeq {}", loop_end)); + code.push(I::jeq(loop_end.clone())); // Loop body for s in body { - code.extend(self.generate_statement(s, func_body)?); + code.append(self.generate_statement(s, func_body)?); } - code.push(format!("\tjmp {}", loop_start)); - code.push(format!("{}:", loop_end)); + code.push(I::jmp(loop_start)); + code.push(I::label(loop_end)); } Statement::Loop(body) => { let loop_start = format!("_loop_start_{}", self.get_unique_label()); - - code.push(format!("{}:", loop_start)); + code.push(I::label(&loop_start)); for s in body { - code.extend(self.generate_statement(s, func_body)?); + code.append(self.generate_statement(s, func_body)?); } - code.push(format!("\tjmp {}", loop_start)); + code.push(I::jmp(loop_start)); } Statement::Expression { expr } => { let (result_reg, expr_code) = self.generate_expression(expr, false, func_body)?; - code.extend(expr_code); + code.append(expr_code); self.allocator.free_temp(result_reg); } Statement::Block(statements) => { for s in statements { - code.extend(self.generate_statement(s, func_body)?); + code.append(self.generate_statement(s, func_body)?); } } } @@ -543,46 +506,56 @@ impl CodeGenerator { &mut self, expr: &Expression, use_result: bool, - func_body: &mut Vec, - ) -> Result<(Register, Vec), CompilerError> { - let mut code = Vec::new(); + func_body: &mut IB, + ) -> Result<(Register, IB), CompilerError> { + let mut code = IB::new(); match expr { Expression::Empty => Ok((Register::Null, code)), - Expression::Number { value, .. } => { - let (reg, alloc_code) = self.allocator.alloc_temp()?; - code.extend(alloc_code); + Expression::Number(n) => match n { + Number::Signed(value, _) => { + let (reg, alloc_code) = self.allocator.alloc_temp()?; + code.append(alloc_code); - // Load immediate value - code.push(format!("\tlli {}, {}", value & 0xFFFF, reg)); - if *value > 0xFFFF || *value < 0 { - code.push(format!("\tlui {}, {}", (value >> 16) & 0xFFFF, reg)); + // Load immediate value + code.push(I::lwi(*value as u32, reg)); + + Ok((reg, code)) } + Number::Unsigned(value, _) => { + let (reg, alloc_code) = self.allocator.alloc_temp()?; + code.append(alloc_code); - Ok((reg, code)) - } + // Load immediate value + code.push(I::lwi(*value as u32, reg)); + + Ok((reg, code)) + } + }, Expression::CharLiteral(value) => { let (reg, alloc_code) = self.allocator.alloc_temp()?; - code.extend(alloc_code); + code.append(alloc_code); // Load immediate value - code.push(format!("\tlli {}, {} // '{value}'", *value as u8, reg)); + code.push(I::comment(format!("char literal '{value}'"))); + code.push(I::lwi(*value as u32, reg)); Ok((reg, code)) } Expression::StringLiteral(value) => { let (reg, alloc_code) = self.allocator.alloc_temp()?; - code.extend(alloc_code); + code.append(alloc_code); // write string into memory let uuid = self.get_unique_label(); - func_body.insert(0, format!("db str_{uuid}: \"{value}\"")); + + func_body.insert(0, I::db_string(format!("str_{uuid}"), value)); // Load pointer to string - code.push(format!("\tlwi str_{uuid}, {reg}")); + code.push(I::lwi_label(format!("str_{uuid}"), reg)); Ok((reg, code)) } @@ -598,16 +571,16 @@ impl CodeGenerator { if self.is_global(&name.name) { // Allocate a temporary register for the global let (reg, alloc_code) = self.allocator.alloc_temp()?; - code.extend(alloc_code); + code.append(alloc_code); // Load from global label - code.push(format!("\tldw {}, {}", name.name, reg)); + code.push(I::ldw_label(name.name.clone(), reg)); Ok((reg, code)) } else { // Local variable - use existing allocator logic let (reg, load_code) = self.allocator.load_var(&name.name)?; - code.extend(load_code); + code.append(load_code); Ok((reg, code)) } } @@ -618,37 +591,33 @@ impl CodeGenerator { // Evaluate left operand let (left_reg, left_code) = self.generate_expression(left, true, func_body)?; - code.extend(left_code); + code.append(left_code); // Evaluate right operand let (right_reg, right_code) = self.generate_expression(right, true, func_body)?; - code.extend(right_code); + code.append(right_code); // Allocate result register let (result_reg, result_alloc) = self.allocator.alloc_temp()?; - code.extend(result_alloc); + code.append(result_alloc); // Generate operation match op { BinaryOperator::Add => { - code.push( - format!("\tadd {left_reg}, {right_reg}, {result_reg}",), - ); + code.push(I::add(left_reg, right_reg, result_reg)); } BinaryOperator::Sub => { - code.push( - format!("\tsub {left_reg}, {right_reg}, {result_reg}",), - ); + code.push(I::sub(left_reg, right_reg, result_reg)); } BinaryOperator::Mul => { self.include("maths", "./lib/maths/core.dsa"); // Call multiply function - code.push(format!("\tpush {}", right_reg)); - code.push(format!("\tpush {}", left_reg)); - code.push("\tcall maths::multiply".to_string()); - code.push(format!("\tpop {}", result_reg)); - code.push("\tpop zero".to_string()); + code.push(I::push(right_reg)); + code.push(I::push(left_reg)); + code.push(I::call("maths::multiply")); + code.push(I::pop(result_reg)); + code.push(I::pop(Register::Zero)); } BinaryOperator::Div => { return Err(CompilerError::Unimplemented(format!( @@ -675,13 +644,13 @@ impl CodeGenerator { // code.push("\tpop zero".to_string()); } BinaryOperator::BitwiseAnd => { - code.push(format!("\tand {left_reg}, {right_reg}, {result_reg}")); + code.push(I::and(left_reg, right_reg, result_reg)); } BinaryOperator::BitwiseOr => { - code.push(format!("\tor {left_reg}, {right_reg}, {result_reg}")); + code.push(I::or(left_reg, right_reg, result_reg)); } BinaryOperator::BitwiseXor => { - code.push(format!("\txor {left_reg}, {right_reg}, {result_reg}")); + code.push(I::xor(left_reg, right_reg, result_reg)); } BinaryOperator::LogicalAnd => { return Err(CompilerError::Unimplemented(format!( @@ -693,58 +662,70 @@ impl CodeGenerator { "assembler/ISA does not yet support logical or!" ))); } - BinaryOperator::LeftShift => code - .push(format!("\tshl {left_reg}, {right_reg}, 0, {result_reg}")), - BinaryOperator::RightShift => code - .push(format!("\tshr {left_reg}, {right_reg}, 0, {result_reg}")), + BinaryOperator::LeftShift => { + code.push(I::shl(left_reg, right_reg, 0, result_reg)); + } + BinaryOperator::RightShift => { + code.push(I::shr(left_reg, right_reg, 0, result_reg)); + } // Comparison operators - return 1 (true) or 0 (false) BinaryOperator::Equal => { - code.push(format!("\tcmp {}, {}", left_reg, right_reg)); - code.push(format!("\tlli 1, {}", result_reg)); + code.push(I::cmp(left_reg, right_reg)); + code.push(I::lwi(1, result_reg)); let end_label = format!("_cmp_end_{}", self.get_unique_label()); - code.push(format!("\tjeq {}", end_label)); - code.push(format!("\tlli 0, {}", result_reg)); - code.push(format!("{}:", end_label)); + code.push(I::jeq(end_label.clone())); + code.push(I::lwi(0, result_reg)); + code.push(I::label(end_label)); } BinaryOperator::NotEqual => { - code.push(format!("\tcmp {}, {}", left_reg, right_reg)); - code.push(format!("\tlli 1, {}", result_reg)); + code.push(I::cmp(left_reg, right_reg)); + code.push(I::lwi(1, result_reg)); let end_label = format!("_cmp_end_{}", self.get_unique_label()); - code.push(format!("\tjne {}", end_label)); - code.push(format!("\tlli 0, {}", result_reg)); - code.push(format!("{}:", end_label)); + code.push(I::Jne { + target: Label(end_label.clone()), + }); + code.push(I::lwi(0, result_reg)); + code.push(I::label(&end_label)); } BinaryOperator::LessThan => { - code.push(format!("\tcmp {}, {}", left_reg, right_reg)); - code.push(format!("\tlli 1, {}", result_reg)); + code.push(I::cmp(left_reg, right_reg)); + code.push(I::lwi(1, result_reg)); let end_label = format!("_cmp_end_{}", self.get_unique_label()); - code.push(format!("\tjlt {}", end_label)); - code.push(format!("\tlli 0, {}", result_reg)); - code.push(format!("{}:", end_label)); + code.push(I::Jlt { + target: Label(end_label.clone()), + }); + code.push(I::lwi(0, result_reg)); + code.push(I::label(&end_label)); } BinaryOperator::LessOrEqual => { - code.push(format!("\tcmp {}, {}", left_reg, right_reg)); - code.push(format!("\tlli 1, {}", result_reg)); + code.push(I::cmp(left_reg, right_reg)); + code.push(I::lwi(1, result_reg)); let end_label = format!("_cmp_end_{}", self.get_unique_label()); - code.push(format!("\tjle {}", end_label)); - code.push(format!("\tlli 0, {}", result_reg)); - code.push(format!("{}:", end_label)); + code.push(I::Jle { + target: Label(end_label.clone()), + }); + code.push(I::lwi(0, result_reg)); + code.push(I::label(&end_label)); } BinaryOperator::GreaterThan => { - code.push(format!("\tcmp {}, {}", left_reg, right_reg)); - code.push(format!("\tlli 1, {}", result_reg)); + code.push(I::cmp(left_reg, right_reg)); + code.push(I::lwi(1, result_reg)); let end_label = format!("_cmp_end_{}", self.get_unique_label()); - code.push(format!("\tjgt {}", end_label)); - code.push(format!("\tlli 0, {}", result_reg)); - code.push(format!("{}:", end_label)); + code.push(I::Jgt { + target: Label(end_label.clone()), + }); + code.push(I::lwi(0, result_reg)); + code.push(I::label(&end_label)); } BinaryOperator::GreaterOrEqual => { - code.push(format!("\tcmp {}, {}", left_reg, right_reg)); - code.push(format!("\tlli 1, {}", result_reg)); + code.push(I::cmp(left_reg, right_reg)); + code.push(I::lwi(1, result_reg)); let end_label = format!("_cmp_end_{}", self.get_unique_label()); - code.push(format!("\tjge {}", end_label)); - code.push(format!("\tlli 8, {}", result_reg)); - code.push(format!("{}:", end_label)); + code.push(I::Jge { + target: Label(end_label.clone()), + }); + code.push(I::lwi(0, result_reg)); + code.push(I::label(&end_label)); } // _ => unimplemented!(), } @@ -758,23 +739,19 @@ impl CodeGenerator { Expression::UnaryPostfix { op, operand, .. } => { let (operand_reg, operand_code) = self.generate_expression(operand, true, func_body)?; - code.extend(operand_code); + code.append(operand_code); let (result_reg, result_alloc) = self.allocator.alloc_temp()?; - code.extend(result_alloc); + code.append(result_alloc); match op { UnaryOperator::Increment => { - // prefix increment - // code.push(format!("\taddi {}, 1, {}", operand_reg, - // operand_reg)); - code.push(format!("\tmov {}, {}", operand_reg, result_reg)); + // postfix increment - return old value + code.push(I::mov(operand_reg, result_reg)); } UnaryOperator::Decrement => { - // prefix decrement - // code.push(format!("\tsubi {}, 1, {}", operand_reg, - // operand_reg)); - code.push(format!("\tmov {}, {}", operand_reg, result_reg)); + // postfix decrement - return old value + code.push(I::mov(operand_reg, result_reg)); } _ => { return Err(CompilerError::Generic(format!( @@ -790,53 +767,53 @@ impl CodeGenerator { Expression::Unary { op, operand, .. } => { let (operand_reg, operand_code) = self.generate_expression(operand, true, func_body)?; - code.extend(operand_code); + code.append(operand_code); let (result_reg, result_alloc) = self.allocator.alloc_temp()?; - code.extend(result_alloc); + code.append(result_alloc); match op { UnaryOperator::Minus => { // Negate: result = 0 - operand - code.push(format!("\tsub zero, {}, {}", operand_reg, result_reg)); + code.push(I::sub(Register::Zero, operand_reg, result_reg)); } UnaryOperator::Plus => { // Just move - code.push(format!("\tmov {}, {}", operand_reg, result_reg)); + code.push(I::mov(operand_reg, result_reg)); } UnaryOperator::Dereference => { - code.push(format!("\tldw {}, {}", operand_reg, result_reg)); + code.push(I::ldw_reg(operand_reg, result_reg)); } UnaryOperator::AddressOf => { // ensure the referenced variable is on the stack and return its // address. let (offset, alloc_code) = self.allocator.free_register(&operand_reg)?; - code.extend(alloc_code); - code.push(format!( - "\taddi spr, {}, {}", + code.push(alloc_code); + code.push(I::iadd_dest( + Register::Spr, offset - self.allocator.get_stack_offset(), - result_reg + result_reg, )); } UnaryOperator::SizeOf => { if let Ok(id) = operand.type_id() { let size = id.size(); - code.push(format!("\tmov {}, {}", size, result_reg)); + code.push(I::lwi(size as u32, result_reg)); } } UnaryOperator::Increment => { // prefix increment - // code.push(format!("\tmov {}, {}", operand_reg, result_reg)); - code.push(format!("\taddi {}, 1, {}", operand_reg, result_reg)); + code.push(I::mov(operand_reg, result_reg)); + code.push(I::iadd_dest(operand_reg, 1, result_reg)); } UnaryOperator::Decrement => { // prefix decrement - // code.push(format!("\tmov {}, {}", operand_reg, result_reg)); - code.push(format!("\tsubi {}, 1, {}", operand_reg, result_reg)); + code.push(I::mov(operand_reg, result_reg)); + code.push(I::iadd_dest(operand_reg, -1, result_reg)); } UnaryOperator::BitwiseNot => { - code.push(format!("\tnot {}, {}", operand_reg, result_reg)); + code.push(I::not(operand_reg, result_reg)); } UnaryOperator::LogicalNot => { return Err(CompilerError::Unimplemented(format!( @@ -863,7 +840,7 @@ impl CodeGenerator { for arg in args.iter().rev() { let (arg_reg, arg_code) = self.generate_expression(arg, true, func_body)?; - code.extend(arg_code); + code.append(arg_code); arg_regs.push(arg_reg); } @@ -871,25 +848,22 @@ impl CodeGenerator { let saved_regs = self.allocator.get_caller_saved_registers(); for reg in &saved_regs { // spill variables to stack - code.extend(self.allocator.free_register(reg).unwrap().1); + code.push(self.allocator.free_register(reg).unwrap().1); } // Evaluate and push arguments in reverse order for (i, arg_reg) in arg_regs.iter().enumerate() { - code.push(format!( - "\tpush {} // push arg {}", - arg_reg, - args.len() - 1 - i - )); + code.push(I::comment(format!("push arg {}", args.len() - 1 - i))); + code.push(I::push(*arg_reg)); } if self.symbols.contains(&name.name) { // Call local function - code.push(format!("\tcall {}", name)); + code.push(I::call(name.to_string())); } else if let Some(ns) = name.namespace.clone() && self.imports.contains_key(&ns) { - code.push(format!("\tcall {}", name)); + code.push(I::call(name.to_string())); } else { return Err(CompilerError::Undefined(name.clone())); } @@ -900,13 +874,13 @@ impl CodeGenerator { let (temp_result_reg, result_alloc) = self.allocator.alloc_temp()?; result_reg = temp_result_reg; - code.extend(result_alloc); - code.push(format!("\tpop {}", result_reg)); + code.append(result_alloc); + code.push(I::pop(result_reg)); // Clean up arguments if args.len() > 1 { for _ in 0..(args.len() - 1) { - code.push("\tpop zero".to_string()); + code.push(I::pop(Register::Zero)); } } } else { @@ -915,7 +889,7 @@ impl CodeGenerator { // Clean up arguments if args.len() > 0 { for _ in 0..(args.len()) { - code.push("\tpop zero".to_string()); + code.push(I::pop(Register::Zero)); } } } @@ -935,19 +909,19 @@ impl CodeGenerator { } => { let (expr_reg, expr_alloc) = self.generate_expression(expr, true, func_body)?; - code.extend(expr_alloc); + code.append(expr_alloc); let (index_reg, index_alloc) = self.generate_expression(index, true, func_body)?; - code.extend(index_alloc); + code.append(index_alloc); let (result_reg, result_alloc) = self.allocator.alloc_temp()?; - code.extend(result_alloc); + code.append(result_alloc); // add the expr pointer to the index to get the final address. - code.push(format!("\tadd {expr_reg} {index_reg} {result_reg}")); + code.push(I::add(expr_reg, index_reg, result_reg)); // load the value at the address. - code.push(format!("\tldw {result_reg} {result_reg}")); + code.push(I::ldw_reg(result_reg, result_reg)); self.allocator.free_temp(expr_reg); self.allocator.free_temp(index_reg); @@ -986,66 +960,3 @@ impl CodeGenerator { (val + 1).to_string() } } - -/// Build a single string from any number of arguments. -/// Each argument must implement `Display` or be convertible to a string. -#[macro_export] -macro_rules! dsa { - ($($arg:expr),* $(,)?) => {{ - // Start with an empty String – we’ll grow it as we go. - use std::fmt::Write; - let mut s = ::std::string::String::new(); - $( - // `write!` is cheaper than `format!` for each element - // because it re‑uses the same buffer. - - write!(s, "{}\n", $arg).expect("write to String failed"); - )* - s - }}; -} - -// ──────────────────────── dsa! ──────────────────────── -// A tiny helper that just turns its token‑stream into a string. -// The trailing comma is kept – it’s part of the syntax you want. -#[macro_export] -macro_rules! cmd { - ($($tokens:tt)*) => {{ - // We’ll just stringify the tokens and return a String. - format!("{}", concat!(stringify!($tokens), "\n")) - }}; -} - -// ──────────────────────── block! ──────────────────────── -// Usage: -// -// let asm = block![ "name" -// dsa![mov rg0, rg1], -// dsa![add rg1, rg1] -// ]; -// -// `asm` is a `&'static str` containing: -// -// name: -// mov rg0, rg1 -// add rg1, rg1 -// -#[macro_export] -macro_rules! block { - // The first token must be a string literal – that’s the label. - ($label:literal $(dsa![$($ins:tt)*]),* ) => {{ - // Build a single string at compile time. - const CODE: &str = concat!( - $label, ":\n", - // Each `dsa!` call yields a string like `"mov rg0, rg1"`. - // We add a newline after each one to get the desired layout. - $(concat!("\t", stringify!($($ins)*), "\n")),* - ); - CODE - }}; -} - -#[macro_export] -macro_rules! comment { - ($text:expr) => {{ format!("// {}", $text) }}; -} diff --git a/compiler/src/backend/dsa/instruction.rs b/compiler/src/backend/dsa/instruction.rs index e419b62..c91e5dd 100644 --- a/compiler/src/backend/dsa/instruction.rs +++ b/compiler/src/backend/dsa/instruction.rs @@ -1,58 +1,6 @@ use std::fmt; -use crate::{ - backend::dsa::registers::Register, - model::{CompilerError, Expression}, -}; - -pub struct CodeGen { - // For building the final program - program: InsBlock, - - // For generating temporary blocks - label_counter: usize, - stack_offset: i32, -} - -impl CodeGen { - pub fn new() -> Self { - Self { - program: InsBlock::new(), - label_counter: 0, - stack_offset: 0, - } - } - - /// Emit directly to program (for top-level constructs) - pub fn emit(&mut self, instr: Instruction) { - self.program.push(instr); - } - - /// Emit a block to program - pub fn emit_block(&mut self, block: InsBlock) { - self.program.append(block); - } - - /// Build expression (returns block for composition) - pub fn build_expr(&mut self, expr: &Expression) -> Result { - // ... returns InstrBlock - todo!() - } - - /// Get final output - pub fn finish(mut self) -> String { - // Optimize before final output - // self.program.remove_dead_code(); - // self.program.optimize_peephole(); - - self.program - .instructions - .iter() - .map(|i| i.to_string()) - .collect::>() - .join("\n") - } -} +use crate::backend::dsa::registers::Register; pub struct InsBlock { instructions: Vec, @@ -65,6 +13,10 @@ impl InsBlock { } } + pub fn insert(&mut self, index: usize, instr: Instruction) { + self.instructions.insert(index, instr); + } + pub fn push(&mut self, instr: Instruction) { self.instructions.push(instr); } @@ -90,11 +42,56 @@ impl InsBlock { } } +impl From> for InsBlock { + fn from(instructions: Vec) -> Self { + Self { instructions } + } +} + +impl From for InsBlock { + fn from(instr: Instruction) -> Self { + Self { + instructions: vec![instr], + } + } +} + +#[derive(Debug, Clone)] pub enum Instruction { // Labels and comments Label(Label), Comment(String), + // Data Directives + Db { + label: String, + data: Vec, + }, + Dh { + label: String, + data: Vec, + }, + Dw { + label: String, + data: Vec, + }, + DString { + // alias for db. + label: String, + data: String, + }, + + Resx { + label: String, + size: u32, + }, + + // Include + Include { + name: String, + path: String, + }, + // Data movement Mov { src: Register, @@ -107,28 +104,28 @@ pub enum Instruction { // Memory operations Ldb { - addr: MemOperand, + src: MemOperand, dest: Register, }, Ldh { - addr: MemOperand, + src: MemOperand, dest: Register, }, Ldw { - addr: MemOperand, + src: MemOperand, dest: Register, }, Stb { src: Register, - addr: MemOperand, + dest: MemOperand, }, Sth { src: Register, - addr: MemOperand, + dest: MemOperand, }, Stw { src: Register, - addr: MemOperand, + dest: MemOperand, }, // Immediate loads @@ -140,6 +137,14 @@ pub enum Instruction { imm: Imm, dest: Register, }, + Lwi { + imm: Imm, + dest: Register, + }, + LwiLabel { + label: String, + dest: Register, + }, // Arithmetic Add { @@ -214,8 +219,8 @@ pub enum Instruction { }, Shr { src1: Register, - rsh: Register, - ish: u16, + r_shamt: Register, + i_shamt: u16, dest: Register, }, @@ -270,23 +275,67 @@ pub enum Instruction { }, } +pub enum DataDirective { + U8(Vec), + U16(Vec), + U32(Vec), + String(String), + Char(char), +} + impl fmt::Display for Instruction { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Self::Label(l) => write!(f, "{}:", l), - Self::Comment(c) => write!(f, "; {}", c), + Self::Comment(c) => write!(f, "// {}", c), + + Self::Include { name, path } => write!(f, "include {name}: \"{}\"", path), + + Self::Db { label, data } => write!( + f, + "db {}: {}", + label, + data.iter() + .map(|&b| format!("{:#04X}", b)) + .collect::>() + .join(", ") + ), + Self::Dh { label, data } => write!( + f, + "dh {}: {}", + label, + data.iter() + .map(|&b| format!("{:#06X}", b)) + .collect::>() + .join(", ") + ), + Self::Dw { label, data } => write!( + f, + "dw {}: {}", + label, + data.iter() + .map(|&b| format!("{:#08X}", b)) + .collect::>() + .join(", ") + ), + Self::DString { label, data } => write!(f, "db {}: \"{}\"", label, data), + + Self::Resx { label, size } => write!(f, "resx {}: {}", label, size), Self::Mov { src, dest } => write!(f, " mov {}, {}", src, dest), Self::Movs { src, dest } => write!(f, " movs {}, {}", src, dest), - Self::Ldb { addr, dest } => { - write!(f, " ldb {}, {}", format_mem_operand(addr), dest) + Self::Ldb { src: addr, dest } => { + let (reg, offset) = reg_and_offset(addr); + write!(f, " ldb {}, {}, {}", reg, dest, offset) } - Self::Ldh { addr, dest } => { - write!(f, " ldh {}, {}", format_mem_operand(addr), dest) + Self::Ldh { src: addr, dest } => { + let (reg, offset) = reg_and_offset(addr); + write!(f, " ldh {}, {}, {}", reg, dest, offset) } - Self::Ldw { addr, dest } => { - write!(f, " ldw {}, {}", format_mem_operand(addr), dest) + Self::Ldw { src, dest } => { + let (reg, offset) = reg_and_offset(src); + write!(f, " ldw {}, {}, {}", reg, dest, offset) } // Self::Ldbs { addr, dest } => { // write!(f, " ldbs {}, {}", format_mem_operand(addr), dest) @@ -297,18 +346,23 @@ impl fmt::Display for Instruction { // Self::Ldws { addr, dest } => { // write!(f, " ldws {}, {}", format_mem_operand(addr), dest) // } - Self::Stb { src, addr } => { - write!(f, " stb {}, {}", src, format_mem_operand(addr)) + Self::Stb { src, dest: addr } => { + let (reg, offset) = reg_and_offset(addr); + write!(f, " stb {}, {}, {}", src, reg, offset) } - Self::Sth { src, addr } => { - write!(f, " sth {}, {}", src, format_mem_operand(addr)) + Self::Sth { src, dest: addr } => { + let (reg, offset) = reg_and_offset(addr); + write!(f, " sth {}, {}, {}", src, reg, offset) } - Self::Stw { src, addr } => { - write!(f, " stw {}, {}", src, format_mem_operand(addr)) + Self::Stw { src, dest: addr } => { + let (reg, offset) = reg_and_offset(addr); + write!(f, " stw {}, {}, {}", src, reg, offset) } Self::Lli { imm, dest } => write!(f, " lli {}, {}", imm, dest), Self::Lui { imm, dest } => write!(f, " lui {}, {}", imm, dest), + Self::Lwi { imm, dest } => write!(f, " lwi {}, {}", imm, dest), + Self::LwiLabel { label, dest } => write!(f, " lwi {}, {}", label, dest), // arithmetic Self::Add { src1, src2, dest } => { @@ -340,16 +394,16 @@ impl fmt::Display for Instruction { } Self::IAdd { src, imm, dest } => { if let Some(d) = dest { - write!(f, " iadd {}, {}, {}", src, imm, d) + write!(f, " addi {}, {}, {}", src, imm, d) } else { - write!(f, " iadd {}, {}", src, imm) + write!(f, " addi {}, {}", src, imm) } } Self::ISub { src, imm, dest } => { if let Some(d) = dest { - write!(f, " isub {}, {}, {}", src, imm, d) + write!(f, " subi {}, {}, {}", src, imm, d) } else { - write!(f, " isub {}, {}", src, imm) + write!(f, " subi {}, {}", src, imm) } } @@ -364,8 +418,8 @@ impl fmt::Display for Instruction { } Self::Shr { src1, - rsh: r_shamt, - ish: i_shamt, + r_shamt, + i_shamt, dest, } => { write!(f, " shl {}, {}, {}, {}", src1, r_shamt, i_shamt, dest) @@ -401,47 +455,94 @@ impl fmt::Display for Instruction { } } } - impl Instruction { + // data directives + pub fn db_string(label: impl Into, data: impl Into) -> Self { + Self::DString { + label: label.into(), + data: data.into(), + } + } + + pub fn db_word(label: impl Into, data: u32) -> Self { + Self::Dw { + label: label.into(), + data: vec![data], + } + } + + pub fn db_bytes(label: impl Into, data: &[u8]) -> Self { + Self::Db { + label: label.into(), + data: data.to_vec(), + } + } + // Movement - pub fn mov(src: Register, dest: Register) -> Self { - Self::Mov { src, dest } + pub fn mov(src: R1, dest: R2) -> Self + where + R1: Into, + R2: Into, + { + Self::Mov { + src: src.into(), + dest: dest.into(), + } } // Memory loads - pub fn ldw_reg(base: Register, dest: Register) -> Self { + pub fn ldw_reg(base: R, dest: Register) -> Self + where + R: Into, + { Self::Ldw { - addr: MemOperand::RegIndirect(base), + src: MemOperand::RegIndirect(base.into()), dest, } } - pub fn ldw_reg_offset(base: Register, offset: i32, dest: Register) -> Self { + pub fn ldw_reg_offset(base: R, dest: Register, offset: i32) -> Self + where + R: Into, + { Self::Ldw { - addr: MemOperand::RegOffset(base, offset), + src: MemOperand::RegOffset(base.into(), offset), dest, } } pub fn ldw_label(label: impl Into