From a35cfbe8646ade06b40f47bb946fa21bee380dd4 Mon Sep 17 00:00:00 2001 From: zxq5 Date: Thu, 5 Feb 2026 01:09:14 +0000 Subject: [PATCH] updated compiler to support multiple frontends and backends --- Cargo.toml | 2 +- compiler/src/backend/dsa/codegen.rs | 738 ++++++++++++++++++ compiler/src/backend/dsa/mod.rs | 9 + compiler/src/{ => backend/dsa}/registers.rs | 66 +- compiler/src/backend/mod.rs | 13 + compiler/src/frontend/dsc/lexer.rs | 627 +++++++++++++++ compiler/src/frontend/dsc/mod.rs | 38 + compiler/src/{ => frontend/dsc}/parser.rs | 234 +----- .../src/frontend/dsc/semantic_analyser.rs | 13 + compiler/src/frontend/mod.rs | 15 + compiler/src/lib.rs | 78 +- compiler/src/main.rs | 2 +- compiler/src/model.rs | 213 +++++ compiler/src/semantic_analyser.rs | 13 - 14 files changed, 1737 insertions(+), 324 deletions(-) create mode 100644 compiler/src/backend/dsa/codegen.rs create mode 100644 compiler/src/backend/dsa/mod.rs rename compiler/src/{ => backend/dsa}/registers.rs (86%) create mode 100644 compiler/src/backend/mod.rs create mode 100644 compiler/src/frontend/dsc/lexer.rs create mode 100644 compiler/src/frontend/dsc/mod.rs rename compiler/src/{ => frontend/dsc}/parser.rs (78%) create mode 100644 compiler/src/frontend/dsc/semantic_analyser.rs create mode 100644 compiler/src/frontend/mod.rs create mode 100644 compiler/src/model.rs delete mode 100644 compiler/src/semantic_analyser.rs diff --git a/Cargo.toml b/Cargo.toml index b317ec7..9ae7c73 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ cargo-features = ["codegen-backend"] [workspace] -members = ["emulator", "common", "assembler", "dsa_editor", "compiler", "c_compiler"] +members = ["emulator", "common", "assembler", "dsa_editor", "compiler"] resolver = "3" [workspace.package] diff --git a/compiler/src/backend/dsa/codegen.rs b/compiler/src/backend/dsa/codegen.rs new file mode 100644 index 0000000..e6a4dca --- /dev/null +++ b/compiler/src/backend/dsa/codegen.rs @@ -0,0 +1,738 @@ +use std::collections::HashMap; +use std::sync::atomic::AtomicU32; +use std::time::SystemTime; + +use chrono::{DateTime, Local}; + +use super::registers::RegisterAllocator; +use crate::{block, comment, dsa}; + +use crate::model::{ + BinaryOperator, CompilerError, ConstExpr, Declaration, Dependency, Expression, + Program, Statement, UnaryOperator, Variable, +}; + +pub struct CodeGenerator { + ast: Program, + imports: HashMap, + globals: Vec, + functions: Vec, + symbols: Vec, + allocator: RegisterAllocator, +} + +fn import(name: &str, path: &str) -> String { + format!("include {name}: \"{}\"", path) +} + +impl CodeGenerator { + const RET: &'static str = "\tjmp _ret"; + + pub fn new(ast: Program) -> Self { + CodeGenerator { + ast, + imports: HashMap::new(), + globals: Vec::new(), + functions: Vec::new(), + symbols: Vec::new(), + allocator: RegisterAllocator::new(), + } + } + + pub fn include(&mut self, name: &str, path: &str) { + self.imports.insert(name.to_string(), path.to_string()); + } + + fn is_global(&self, name: &str) -> bool { + // Check if this variable is in the globals list + self.globals + .iter() + .any(|g| g.contains(&format!("dw {}:", name))) + } + + pub fn generate(&mut self) -> Result { + // always include the print library for debugging! + self.include("print", "./lib/io/print.dsa"); + + for block in self.ast.clone().declarations { + match block { + Declaration::Variable { + var: Variable { name, .. }, + .. + } => self.symbols.push(name), + Declaration::Function { name, .. } => self.symbols.push(name), + Declaration::Dependency(Dependency { name, .. }) => { + self.symbols.push(name) + } + } + } + + for block in self.ast.clone().declarations { + self.generate_block(block.clone())?; + } + + self.generate_layout() + } + + fn generate_layout(&mut self) -> Result { + let datetime: DateTime = SystemTime::now().into(); + Ok(dsa![ + "", + comment!("GENERATED BY DSC COMPILER"), + comment!(format!( + "Generated at {}", + datetime.format("%Y-%m-%d %H:%M:%S") + )), + "", + // imports + comment!("Imports"), + self.imports + .iter() + .map(|(k, v)| import(k, v)) + .collect::>() + .join("\n"), + "", + // reserved memory + comment!("Globals & Reserved Memory"), + self.globals.join("\n"), + "", + // entry point + comment!("Entry Point"), + "dw stack: 0x10000", + "db message: \"Process Exited with code:\"", + block! [ "_init" + dsa![ldw stack, bpr], + dsa![mov bpr, spr], + dsa![push zero], + dsa![call main], + dsa![call print::print_newline], + dsa![lwi message, rg0], + dsa![push rg0], + dsa![call print::print], + dsa![pop zero], + dsa![call print::print_hex_word], + dsa![pop zero], + dsa![hlt] + ], + "", + comment!("Return"), + block! [ "_ret" + dsa![mov bpr, spr], + dsa![pop bpr], + dsa![return] + ], + comment!("Compiled Code Starts..."), + // block! [ "main" + // dsa![push bpr], + // dsa![mov spr, bpr], + // dsa![lwi 67, rg1], + // dsa![stw rg1, spr, 8], + // dsa![mov bpr, spr], + // dsa![pop bpr], + // dsa![return] + // ], + self.functions.join("\n"), + ]) + } + + fn generate_global(&mut self, name: &str, init: Option) { + self.globals.push(format!( + "dw {}: {}", + name, + init.unwrap_or(ConstExpr::Number(0)) + )) + } + + fn generate_block(&mut self, block: Declaration) -> Result<(), CompilerError> { + match block { + Declaration::Variable { var, init, .. } => { + self.generate_global(&var.name, init) + } + Declaration::Function { + name, params, body, .. + } => { + let func = self.generate_function(&name, ¶ms, &body).join("\n"); + + self.functions.push(format!("{func}\n")); + } + Declaration::Dependency(Dependency { name, path }) => { + self.imports.insert(name, path); + } + }; + + Ok(()) + } + + // Example: Generate code for a function + fn generate_function( + &mut self, + name: &str, + params: &[Variable], + body: &[Statement], + ) -> Vec { + let mut code = Vec::new(); + + // Reset allocator for new function + self.allocator.reset(); + + // Function prologue + code.push(format!("{}:", name)); + code.push("\tpush bpr".to_string()); + code.push("\tmov spr, bpr".to_string()); + code.push(String::new()); + + // Allocate parameters to registers or stack locations + for (i, param) in params.iter().enumerate() { + let offset = 8 + (i as i32 * 4); // Parameters start at bpr+8 + // Track that this parameter is at a stack location + let (reg, load_code) = self.allocator.alloc_var(¶m.name).unwrap(); + code.extend(load_code); + code.push(format!("\tldw bpr, {}, {}", reg, offset)); + } + + // Generate code for function body + for stmt in body { + let stmt_code = self.generate_statement(stmt).unwrap(); + code.extend(stmt_code); + } + + // automatically return at function end + if let Some(x) = code.last() + && x == Self::RET + { + } else { + code.push(Self::RET.to_string()); + } + + code + } + + // Example: Generate code for a statement + fn generate_statement( + &mut self, + stmt: &Statement, + ) -> Result, CompilerError> { + let mut code = Vec::new(); + + match stmt { + Statement::Declaration { var, value } => { + if let Some(expr) = value { + // Evaluate expression + let (result_reg, expr_code) = self.generate_expression(expr, true)?; + code.extend(expr_code); + + // Store result in variable + let store_code = self.allocator.store_var(&var.name, &result_reg); + code.extend(store_code); + + // Free temporary register + self.allocator.free_temp(&result_reg); + } else { + // Just declaring variable without initialization + self.allocator.alloc_var(&var.name)?; + } + } + + Statement::Break => unimplemented!(), + Statement::Continue => unimplemented!(), + + Statement::PtrWrite { ptr, value } => { + let (result_reg, expr_code) = self.generate_expression(value, true)?; + code.extend(expr_code); + + let (ptr_reg, ptr_code) = self.generate_expression(ptr, true)?; + code.extend(ptr_code); + + code.push(format!("\tstw {}, {}", result_reg, ptr_reg)); + + self.allocator.free_temp(&result_reg); + self.allocator.free_temp(&ptr_reg); + } + + Statement::Assign { varname, value } => { + // Evaluate expression + let (result_reg, expr_code) = self.generate_expression(value, true)?; + code.extend(expr_code); + + // Check if this is a global variable + if self.is_global(varname) { + // Store to global label + code.push(format!("\tstw {}, {}", result_reg, varname)); + } else { + // Store result in local variable + let store_code = self.allocator.store_var(varname, &result_reg); + code.extend(store_code); + } + + // Free temporary register + self.allocator.free_temp(&result_reg); + } + + Statement::Return(expr) => { + if let Some(e) = expr { + let (result_reg, expr_code) = self.generate_expression(e, true)?; + code.extend(expr_code); + code.push(format!("\tstw {}, bpr, 8", result_reg)); + code.push(format!("\tjmp _ret")); + self.allocator.free_temp(&result_reg); + } + } + + Statement::If { + condition, + then_stmt, + else_stmt, + } => { + // Generate condition + let (cond_reg, cond_code) = self.generate_expression(condition, true)?; + code.extend(cond_code); + + // Compare with zero + code.push(format!("\tcmp {}, zero", cond_reg)); + self.allocator.free_temp(&cond_reg); + + // Generate unique labels + let then_label = format!("_then_{}", self.get_unique_label()); + let else_label = format!("_else_{}", self.get_unique_label()); + let end_label = format!("_end_{}", self.get_unique_label()); + + // Jump to else if condition is false (equal to zero) + code.push(format!("\tjeq {}", else_label)); + + // Then block + code.push(format!("{}:", then_label)); + for s in then_stmt { + code.extend(self.generate_statement(s)?); + } + + if then_stmt.len() == 0 { + code.push("\tnop".to_string()); + } + + code.push(format!("\tjmp {}", end_label)); + + // Else block + code.push(format!("{}:", else_label)); + for s in else_stmt { + code.extend(self.generate_statement(s)?); + } + + if else_stmt.len() == 0 { + code.push("\tnop".to_string()); + } + + code.push(format!("{}:", end_label)); + } + + Statement::While { condition, body } => { + let loop_start = format!("_while_start_{}", self.get_unique_label()); + let loop_end = format!("_while_end_{}", self.get_unique_label()); + + code.push(format!("{}:", loop_start)); + + // Generate condition + let (cond_reg, cond_code) = self.generate_expression(condition, true)?; + code.extend(cond_code); + + code.push(format!("\tcmp {}, zero", cond_reg)); + self.allocator.free_temp(&cond_reg); + + code.push(format!("\tjeq {}", loop_end)); + + // Loop body + for s in body { + code.extend(self.generate_statement(s)?); + } + + code.push(format!("\tjmp {}", loop_start)); + code.push(format!("{}:", loop_end)); + } + + Statement::Loop(body) => { + let loop_start = format!("_loop_start_{}", self.get_unique_label()); + + code.push(format!("{}:", loop_start)); + + for s in body { + code.extend(self.generate_statement(s)?); + } + + code.push(format!("\tjmp {}", loop_start)); + } + + Statement::Expression { expr } => { + let (result_reg, expr_code) = self.generate_expression(expr, false)?; + code.extend(expr_code); + self.allocator.free_temp(&result_reg); + } + + Statement::Block(statements) => { + for s in statements { + code.extend(self.generate_statement(s)?); + } + } + } + + Ok(code) + } + + // Example: Generate code for an expression + // Returns (register containing result, assembly code) + fn generate_expression( + &mut self, + expr: &Expression, + use_result: bool, + ) -> Result<(String, Vec), CompilerError> { + let mut code = Vec::new(); + + // optimisation to prevent generating dead code! + if expr.is_pure() && !use_result { + return Ok((String::new(), code)); + } + + match expr { + Expression::StringLiteral(value) => { + let (reg, alloc_code) = self.allocator.alloc_temp()?; + code.extend(alloc_code); + + // write string into memory + let uuid = self.get_unique_label(); + code.push(format!("\tdb str_{uuid}: \"{value}\"")); + + // Load pointer to string + code.push(format!("\tlwi str_{uuid}, {reg}")); + + Ok((reg, code)) + } + + Expression::CharLiteral(value) => { + let (reg, alloc_code) = self.allocator.alloc_temp()?; + code.extend(alloc_code); + + // Load immediate value + code.push(format!("\tlli {}, {} // '{value}'", *value as u8, reg)); + + Ok((reg, code)) + } + + Expression::Number(value) => { + let (reg, alloc_code) = self.allocator.alloc_temp()?; + code.extend(alloc_code); + + // Load immediate value + code.push(format!("\tlli {}, {}", value & 0xFFFF, reg)); + if *value > 0xFFFF || *value < 0 { + code.push(format!("\tlui {}, {}", (value >> 16) & 0xFFFF, reg)); + } + + Ok((reg, code)) + } + + Expression::Variable { name, .. } => { + if self.is_global(&name.name) { + // Allocate a temporary register for the global + let (reg, alloc_code) = self.allocator.alloc_temp()?; + code.extend(alloc_code); + + // Load from global label + code.push(format!("\tldw {}, {}", name.name, reg)); + + Ok((reg, code)) + } else { + // Local variable - use existing allocator logic + let (reg, load_code) = self.allocator.load_var(&name.name)?; + code.extend(load_code); + Ok((reg, code)) + } + } + + Expression::Binary { op, left, right } => { + // Evaluate left operand + let (left_reg, left_code) = self.generate_expression(left, true)?; + code.extend(left_code); + + // Evaluate right operand + let (right_reg, right_code) = self.generate_expression(right, true)?; + code.extend(right_code); + + // Allocate result register + let (result_reg, result_alloc) = self.allocator.alloc_temp()?; + code.extend(result_alloc); + + // Generate operation + match op { + BinaryOperator::Add => { + code.push(format!( + "\tadd {}, {}, {}", + left_reg, right_reg, result_reg + )); + } + BinaryOperator::Sub => { + code.push(format!( + "\tsub {}, {}, {}", + left_reg, right_reg, result_reg + )); + } + BinaryOperator::Mul => { + self.include("maths", "./lib/maths/core.dsa"); + // Call multiply function + code.push(format!("\tpush {}", right_reg)); + code.push(format!("\tpush {}", left_reg)); + code.push("\tcall maths::multiply".to_string()); + code.push(format!("\tpop {}", result_reg)); + code.push("\tpop zero".to_string()); + } + // Comparison operators - return 1 (true) or 0 (false) + BinaryOperator::Eq => { + code.push(format!("\tcmp {}, {}", left_reg, right_reg)); + code.push(format!("\tlli 0, {}", result_reg)); + let end_label = format!("_cmp_end_{}", self.get_unique_label()); + code.push(format!("\tjne {}", end_label)); // If not equal, skip setting to 1 + code.push(format!("\tlli 1, {}", result_reg)); + code.push(format!("{}:", end_label)); + } + BinaryOperator::Ne => { + code.push(format!("\tcmp {}, {}", left_reg, right_reg)); + code.push(format!("\tlli 0, {}", result_reg)); + let end_label = format!("_cmp_end_{}", self.get_unique_label()); + code.push(format!("\tjeq {}", end_label)); // If equal, skip setting to 1 + code.push(format!("\tlli 1, {}", result_reg)); + code.push(format!("{}:", end_label)); + } + BinaryOperator::Lt => { + code.push(format!("\tcmp {}, {}", left_reg, right_reg)); + code.push(format!("\tlli 0, {}", result_reg)); + let end_label = format!("_cmp_end_{}", self.get_unique_label()); + code.push(format!("\tjge {}", end_label)); // If greater or equal, skip setting to 1 + code.push(format!("\tlli 1, {}", result_reg)); + code.push(format!("{}:", end_label)); + } + BinaryOperator::Le => { + code.push(format!("\tcmp {}, {}", left_reg, right_reg)); + code.push(format!("\tlli 0, {}", result_reg)); + let end_label = format!("_cmp_end_{}", self.get_unique_label()); + code.push(format!("\tjgt {}", end_label)); // If greater than, skip setting to 1 + code.push(format!("\tlli 1, {}", result_reg)); + code.push(format!("{}:", end_label)); + } + BinaryOperator::Gt => { + code.push(format!("\tcmp {}, {}", left_reg, right_reg)); + code.push(format!("\tlli 0, {}", result_reg)); + let end_label = format!("_cmp_end_{}", self.get_unique_label()); + code.push(format!("\tjle {}", end_label)); // If less or equal, skip setting to 1 + code.push(format!("\tlli 1, {}", result_reg)); + code.push(format!("{}:", end_label)); + } + BinaryOperator::Ge => { + code.push(format!("\tcmp {}, {}", left_reg, right_reg)); + code.push(format!("\tlli 0, {}", result_reg)); + let end_label = format!("_cmp_end_{}", self.get_unique_label()); + code.push(format!("\tjlt {}", end_label)); // If less than, skip setting to 1 + code.push(format!("\tlli 1, {}", result_reg)); + code.push(format!("{}:", end_label)); + } + _ => unimplemented!(), + } + + // Free operand registers (allocator will protect variables) + self.allocator.free_temp(&left_reg); + self.allocator.free_temp(&right_reg); + + Ok((result_reg, code)) + } + + Expression::Call { name, args } => { + // first evaluate all the args we're going to need + let mut arg_regs = Vec::new(); + for arg in args.iter().rev() { + let (arg_reg, arg_code) = self.generate_expression(arg, true)?; + code.extend(arg_code); + arg_regs.push(arg_reg); + } + + // Save caller-saved registers and track which ones we saved + // old method, inefficient. + // let saved_regs = self.allocator.get_caller_saved_registers(); + // for reg in &saved_regs { + // code.push(format!("\tpush {}", reg)); + // } + + // Save caller-saved registers and track which ones we saved + let saved_regs = self.allocator.get_caller_saved_registers(); + for reg in &saved_regs { + // spill variables to stack + code.extend(self.allocator.spill_register(reg).unwrap()); + } + + // Evaluate and push arguments in reverse order + for (i, arg_reg) in arg_regs.iter().enumerate() { + code.push(format!( + "\tpush {} // push arg {}", + arg_reg, + args.len() - 1 - i + )); + } + + // if GLOBAL_METHODS.contains_key(name.name.as_str()) { + // code.push(format!("\tcall {}", + // GLOBAL_METHODS[name.name.as_str()])); } else + if self.symbols.contains(&name.name) { + // Call local function + code.push(format!("\tcall {}", name)); + } else if let Some(ns) = name.namespace.clone() + && self.imports.contains_key(&ns) + { + code.push(format!("\tcall {}", name)); + } else { + return Err(CompilerError::Undefined(name.clone())); + } + + let result_reg: String; + + if use_result { + let (temp_result_reg, result_alloc) = self.allocator.alloc_temp()?; + result_reg = temp_result_reg; + + code.extend(result_alloc); + code.push(format!("\tpop {}", result_reg)); + + // Clean up arguments + if args.len() > 1 { + for _ in 0..(args.len() - 1) { + code.push("\tpop zero".to_string()); + } + } + } else { + result_reg = "zero".to_string(); + + // Clean up arguments + if args.len() > 0 { + for _ in 0..(args.len()) { + code.push("\tpop zero".to_string()); + } + } + } + + // Restore caller-saved registers in reverse order (LIFO) + // for reg in saved_regs.iter().rev() { + // code.push(format!("\tpop {}", reg)); + // } + + // Free argument registers + for reg in arg_regs { + self.allocator.free_temp(®); + } + + Ok((result_reg, code)) + } + + Expression::Unary { op, operand } => { + let (operand_reg, operand_code) = + self.generate_expression(operand, true)?; + code.extend(operand_code); + + let (result_reg, result_alloc) = self.allocator.alloc_temp()?; + code.extend(result_alloc); + + match op { + UnaryOperator::Minus => { + // Negate: result = 0 - operand + code.push(format!("\tsub zero, {}, {}", operand_reg, result_reg)); + } + UnaryOperator::Plus => { + // Just move + code.push(format!("\tmov {}, {}", operand_reg, result_reg)); + } + UnaryOperator::Dereference => { + code.push(format!("\tldw {}, {}", operand_reg, result_reg)); + } + UnaryOperator::Reference => { + code.extend(self.allocator.spill_register(&operand_reg)?); + code.push(format!( + "\tsubi bpr {} {}", + -(4 + self.allocator.get_stack_offset()), + result_reg + )) + } + } + + self.allocator.free_temp(&operand_reg); + Ok((result_reg, code)) + } + + Expression::Empty => Ok(("zero".to_string(), code)), + } + } + + // Helper for generating unique labels + fn get_unique_label(&mut self) -> String { + // You'd implement a counter here + static COUNTER: AtomicU32 = AtomicU32::new(0); + + let val = COUNTER.fetch_add(1, std::sync::atomic::Ordering::SeqCst); + (val + 1).to_string() + } +} + +/// Build a single string from any number of arguments. +/// Each argument must implement `Display` or be convertible to a string. +#[macro_export] +macro_rules! dsa { + ($($arg:expr),* $(,)?) => {{ + // Start with an empty String – we’ll grow it as we go. + use std::fmt::Write; + let mut s = ::std::string::String::new(); + $( + // `write!` is cheaper than `format!` for each element + // because it re‑uses the same buffer. + + write!(s, "{}\n", $arg).expect("write to String failed"); + )* + s + }}; +} + +// ──────────────────────── dsa! ──────────────────────── +// A tiny helper that just turns its token‑stream into a string. +// The trailing comma is kept – it’s part of the syntax you want. +#[macro_export] +macro_rules! cmd { + ($($tokens:tt)*) => {{ + // We’ll just stringify the tokens and return a String. + format!("{}", concat!(stringify!($tokens), "\n")) + }}; +} + +// ──────────────────────── block! ──────────────────────── +// Usage: +// +// let asm = block![ "name" +// dsa![mov rg0, rg1], +// dsa![add rg1, rg1] +// ]; +// +// `asm` is a `&'static str` containing: +// +// name: +// mov rg0, rg1 +// add rg1, rg1 +// +#[macro_export] +macro_rules! block { + // The first token must be a string literal – that’s the label. + ($label:literal $(dsa![$($ins:tt)*]),* ) => {{ + // Build a single string at compile time. + const CODE: &str = concat!( + $label, ":\n", + // Each `dsa!` call yields a string like `"mov rg0, rg1"`. + // We add a newline after each one to get the desired layout. + $(concat!("\t", stringify!($($ins)*), "\n")),* + ); + CODE + }}; +} + +#[macro_export] +macro_rules! comment { + ($text:expr) => {{ format!("// {}", $text) }}; +} diff --git a/compiler/src/backend/dsa/mod.rs b/compiler/src/backend/dsa/mod.rs new file mode 100644 index 0000000..8ee13d7 --- /dev/null +++ b/compiler/src/backend/dsa/mod.rs @@ -0,0 +1,9 @@ +use crate::model::{CompilerError, Program}; + +mod codegen; +mod registers; + +pub fn generate_code(ast: &Program) -> Result { + let mut codegen = codegen::CodeGenerator::new(ast.clone()); + codegen.generate() +} diff --git a/compiler/src/registers.rs b/compiler/src/backend/dsa/registers.rs similarity index 86% rename from compiler/src/registers.rs rename to compiler/src/backend/dsa/registers.rs index e07949c..00f59e7 100644 --- a/compiler/src/registers.rs +++ b/compiler/src/backend/dsa/registers.rs @@ -1,6 +1,6 @@ use std::collections::HashMap; -use crate::parser::CompilerError; +use crate::model::CompilerError; /// Register allocator for DSA assembly generation /// Manages general-purpose registers (rg0-rgf) and handles stack spilling @@ -147,7 +147,7 @@ impl RegisterAllocator { } /// Get the current location of a variable - pub fn get_var_location(&self, var_name: &str) -> Option<&Location> { + pub fn _get_var_location(&self, var_name: &str) -> Option<&Location> { self.variable_locations.get(var_name) } @@ -264,7 +264,7 @@ impl RegisterAllocator { } /// Spill all registers to stack (useful before function calls) - pub fn spill_all(&mut self) -> Vec { + pub fn _spill_all(&mut self) -> Vec { let mut code = Vec::new(); let regs_to_spill: Vec = self.register_contents.keys().cloned().collect(); @@ -284,7 +284,7 @@ impl RegisterAllocator { } /// Get the total stack space needed for local variables - pub fn get_stack_size(&self) -> i32 { + pub fn _get_stack_size(&self) -> i32 { -self.stack_offset // Convert negative offset to positive size } @@ -298,7 +298,7 @@ impl RegisterAllocator { /// Mark a variable as dead (no longer needed) /// Frees its register if it's in one - pub fn free_var(&mut self, var_name: &str) { + pub fn _free_var(&mut self, var_name: &str) { if let Some(Location::Register(reg)) = self.variable_locations.get(var_name) { let reg = reg.clone(); self.register_contents.remove(®); @@ -319,12 +319,12 @@ impl RegisterAllocator { /// Save caller-saved registers before a function call /// Returns assembly code to save them - pub fn save_caller_saved(&mut self) -> Vec { + pub fn _save_caller_saved(&mut self) -> Vec { let mut code = Vec::new(); // For simplicity, save all currently used registers // In a more sophisticated compiler, you'd only save registers that are live - for (reg, var_name) in self.register_contents.clone() { + for (reg, _) in self.register_contents.clone() { if *self.in_use.get(®).unwrap_or(&false) { code.push(format!("\tpush {}", reg)); } @@ -335,7 +335,7 @@ impl RegisterAllocator { /// Restore caller-saved registers after a function call /// Returns assembly code to restore them - pub fn restore_caller_saved(&mut self, saved_regs: &[String]) -> Vec { + pub fn _restore_caller_saved(&mut self, saved_regs: &[String]) -> Vec { let mut code = Vec::new(); // Restore in reverse order (LIFO) @@ -346,53 +346,3 @@ impl RegisterAllocator { code } } - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_basic_allocation() { - let mut allocator = RegisterAllocator::new(); - - let (reg1, code1) = allocator.alloc_temp().unwrap(); - assert_eq!(code1.len(), 0); // No spill needed - assert_eq!(reg1, "rg0"); - - let (reg2, code2) = allocator.alloc_temp().unwrap(); - assert_eq!(code2.len(), 0); - assert_eq!(reg2, "rg1"); - - allocator.free_temp(®1); - - let (reg3, code3) = allocator.alloc_temp().unwrap(); - assert_eq!(code3.len(), 0); - assert_eq!(reg3, "rg0"); // Reuses freed register - } - - #[test] - fn test_variable_allocation() { - let mut allocator = RegisterAllocator::new(); - - let (reg, _) = allocator.alloc_var("x").unwrap(); - assert_eq!(reg, "rg0"); - - // Requesting same variable again should return same register - let (reg2, _) = allocator.alloc_var("x").unwrap(); - assert_eq!(reg2, "rg0"); - } - - #[test] - fn test_stack_allocation() { - let mut allocator = RegisterAllocator::new(); - - // Allocate all 16 registers - for i in 0..16 { - allocator.alloc_var(&format!("var{}", i)).unwrap(); - } - - // Next allocation should spill to stack - let (reg, code) = allocator.alloc_var("var16").unwrap(); - assert!(code.len() > 0); // Should have spill code - } -} diff --git a/compiler/src/backend/mod.rs b/compiler/src/backend/mod.rs new file mode 100644 index 0000000..9e912e8 --- /dev/null +++ b/compiler/src/backend/mod.rs @@ -0,0 +1,13 @@ +use crate::model::{CompilerError, Program}; + +mod dsa; + +pub fn compiler_backend(ext: &str, ast: &Program) -> Result { + match ext { + "dsa" => Ok(dsa::generate_code(ast)?), + _ => Err(CompilerError::Generic(format!( + "File type {} not supported", + ext + ))), + } +} diff --git a/compiler/src/frontend/dsc/lexer.rs b/compiler/src/frontend/dsc/lexer.rs new file mode 100644 index 0000000..c41a62b --- /dev/null +++ b/compiler/src/frontend/dsc/lexer.rs @@ -0,0 +1,627 @@ +use std::iter::Peekable; +use std::str::Chars; + +#[derive(Debug, PartialEq, Clone)] +pub enum Token { + // Keywords + Fn, + Let, + If, + Else, + Loop, + While, + Break, + Return, + Continue, + Include, + Static, + Const, + + // Identifiers and literals + Identifier(Name), + String(String), + Integer(u64), + Char(char), + + // Symbols + LeftParen, // ( + RightParen, // ) + LeftBrace, // { + RightBrace, // } + Semicolon, // ; + Colon, // : + Comma, // , + + // Operators + Plus, // + + Minus, // - + Star, // * + Amphersand, // & + Slash, // / + Assign, // = + EqualEqual, // == + Bang, // ! + BangEqual, // != + Less, // < + LessEqual, // <= + Greater, // > + GreaterEqual, // >= + RightArrow, // -> + + // Special + Eof, +} + +use std::fmt; + +use crate::model::Name; + +impl fmt::Display for Name { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(ref ns) = self.namespace { + write!(f, "{}::{}", ns, self.name) + } else { + write!(f, "{}", self.name) + } + } +} + +impl Token { + pub fn tt(&self) -> &str { + match self { + Token::Const => "Const", + Token::Static => "Static", + Token::Include => "Include", + Token::Fn => "Fn", + Token::If => "If", + Token::Let => "Let", + Token::Else => "Else", + Token::Loop => "Loop", + Token::While => "While", + Token::Break => "Break", + Token::Return => "Return", + Token::Continue => "Continue", + Token::Identifier(_) => "Identifier", + Token::String(_) => "String", + Token::Integer(_) => "UnsignedInt", + Token::Char(_) => "Char", + Token::LeftParen => "LeftParen", + Token::RightParen => "RightParen", + Token::LeftBrace => "LeftBrace", + Token::RightBrace => "RightBrace", + Token::Semicolon => "Semicolon", + Token::Colon => "Colon", + Token::Comma => "Comma", + Token::RightArrow => "RightArrow", + Token::Plus => "Plus", + Token::Minus => "Minus", + Token::Star => "Star", + Token::Amphersand => "Amphersand", + Token::Slash => "Slash", + Token::Assign => "Assign", + Token::EqualEqual => "EqualEqual", + Token::Bang => "Bang", + Token::BangEqual => "BangEqual", + Token::Less => "Less", + Token::LessEqual => "LessEqual", + Token::Greater => "Greater", + Token::GreaterEqual => "GreaterEqual", + Token::Eof => "Eof", + } + } +} + +#[derive(Debug)] +pub struct Lexer<'a> { + chars: Peekable>, + current: Option, + line: usize, +} + +impl<'a> Lexer<'a> { + pub fn new(input: &'a str) -> Self { + let mut chars = input.chars().peekable(); + let current = chars.next(); + + Lexer { + chars, + current, + line: 1, + } + } + + fn advance(&mut self) -> Option { + self.current = self.chars.next(); + self.current + } + + fn peek(&mut self) -> Option<&char> { + self.chars.peek() + } + + fn skip_whitespace(&mut self) { + while let Some(c) = self.current { + if !c.is_whitespace() { + break; + } + if c == '\n' { + self.line += 1; + } + self.advance(); + } + } + + fn skip_line_comment(&mut self) { + // Skip the two slashes + self.advance(); // first / + self.advance(); // second / + + // Skip until newline or EOF + while let Some(c) = self.current { + if c == '\n' { + self.line += 1; + self.advance(); + break; + } + self.advance(); + } + } + + fn skip_block_comment(&mut self) -> Result<(), String> { + // Skip the /* + self.advance(); // / + self.advance(); // * + + let start_line = self.line; + + // Look for */ + while let Some(c) = self.current { + if c == '\n' { + self.line += 1; + } + + if c == '*' { + if let Some(&next) = self.peek() { + if next == '/' { + self.advance(); // * + self.advance(); // / + return Ok(()); + } + } + } + + self.advance(); + } + + Err(format!( + "Unterminated block comment starting at line {}", + start_line + )) + } + + fn skip_whitespace_and_comments(&mut self) { + loop { + self.skip_whitespace(); + + // Check for comments + if let Some('/') = self.current { + if let Some(&next) = self.peek() { + match next { + '/' => { + self.skip_line_comment(); + continue; + } + '*' => { + if let Err(e) = self.skip_block_comment() { + eprintln!("Lexer error: {}", e); + } + continue; + } + _ => break, + } + } + } + + break; + } + } + + fn read_identifier(&mut self) -> String { + let mut ident = String::new(); + + // Include the current character if it's valid + if let Some(c) = self.current { + if c.is_alphabetic() || c == '_' { + ident.push(c); + } + } + + // Read remaining characters + while let Some(&c) = self.peek() { + if c.is_alphanumeric() || c == '_' { + self.advance(); + ident.push(c); + } else { + break; + } + } + + ident + } + + fn keyword_or_identifier(&mut self) -> Token { + let first_ident = self.read_identifier(); + + // Check if it's a keyword first (keywords can't have namespaces) + let keyword = match first_ident.as_str() { + "fn" => Some(Token::Fn), + "if" => Some(Token::If), + "else" => Some(Token::Else), + "while" => Some(Token::While), + "loop" => Some(Token::Loop), + "break" => Some(Token::Break), + "return" => Some(Token::Return), + "continue" => Some(Token::Continue), + "include" => Some(Token::Include), + "let" => Some(Token::Let), + "const" => Some(Token::Const), + "static" => Some(Token::Static), + _ => None, + }; + + if let Some(kw) = keyword { + return kw; + } + + // Not a keyword - check for namespace separator (::) + // We need to peek TWO characters ahead without consuming anything + if let Some(&':') = self.peek() { + // We see one colon, but we need to check if there's another one after it + // We can't peek two ahead directly, so we need a different approach + + // Save the current position by using a temporary peekable iterator + // Actually, we can't do that easily. Instead, let's just check: + // If we see ':', temporarily advance and check the next char + + // Create a temporary check + let mut temp_chars = self.chars.clone(); + let _ = temp_chars.next(); // This is the ':' we already saw + let second_peek = temp_chars.peek(); + + if let Some(&':') = second_peek { + // It's :: - consume both colons + self.advance(); // consume first : + self.advance(); // consume second : + + // Read the second identifier (the actual name) + let second_ident = self.read_identifier(); + + // Return namespaced identifier + return Token::Identifier(Name { + namespace: Some(first_ident), + name: second_ident, + }); + } + // else: It's a single colon (type annotation) - DON'T consume it + // Just fall through and return the identifier + } + + // No namespace separator - just a regular identifier + Token::Identifier(Name { + namespace: None, + name: first_ident, + }) + } + + fn read_number(&mut self) -> Result { + let current = self.current.unwrap(); + + // Check for hex (0x) or binary (0b) prefix + if current == '0' { + if let Some(&next_char) = self.peek() { + match next_char { + 'x' | 'X' => { + self.advance(); // consume '0' + self.advance(); // consume 'x' + return self.read_hex_number(); + } + 'b' | 'B' => { + self.advance(); // consume '0' + self.advance(); // consume 'b' + return self.read_binary_number(); + } + _ => {} + } + } + } + + // Read decimal number + self.read_decimal_number() + } + + fn read_decimal_number(&mut self) -> Result { + let mut num_str = String::new(); + + if let Some(c) = self.current { + num_str.push(c); + } + + while let Some(&c) = self.peek() { + if c.is_ascii_digit() { + self.advance(); + num_str.push(c); + } else { + break; + } + } + + num_str + .parse::() + .map_err(|_| format!("Invalid decimal number: {}", num_str)) + } + + fn read_hex_number(&mut self) -> Result { + let mut num_str = String::new(); + + // Read current character if it's a hex digit + if let Some(c) = self.current { + if c.is_ascii_hexdigit() { + num_str.push(c); + } + } + + while let Some(&c) = self.peek() { + if c.is_ascii_hexdigit() { + self.advance(); + num_str.push(c); + } else { + break; + } + } + + if num_str.is_empty() { + return Err("Invalid hexadecimal number: no digits after 0x".to_string()); + } + + u64::from_str_radix(&num_str, 16) + .map_err(|_| format!("Invalid hexadecimal number: {}", num_str)) + } + + fn read_binary_number(&mut self) -> Result { + let mut num_str = String::new(); + + // Read current character if it's a binary digit + if let Some(c) = self.current { + if c == '0' || c == '1' { + num_str.push(c); + } + } + + while let Some(&c) = self.peek() { + if c == '0' || c == '1' { + self.advance(); + num_str.push(c); + } else { + break; + } + } + + if num_str.is_empty() { + return Err("Invalid binary number: no digits after 0b".to_string()); + } + + u64::from_str_radix(&num_str, 2) + .map_err(|_| format!("Invalid binary number: {}", num_str)) + } + + fn read_string(&mut self) -> Result { + self.advance(); // Skip the opening quote + let mut s = String::new(); + + while let Some(c) = self.current { + if c == '"' { + return Ok(s); + } + + // Handle escape sequences + if c == '\\' { + self.advance(); + if let Some(escaped) = self.current { + let escaped_char = match escaped { + 'n' => '\n', + 't' => '\t', + 'r' => '\r', + '\\' => '\\', + '"' => '"', + _ => escaped, // For now, just use the character as-is + }; + s.push(escaped_char); + } else { + return Err("Unexpected end of string after escape".to_string()); + } + } else { + s.push(c); + } + + self.advance(); + } + + Err("Unterminated string literal".to_string()) + } + + fn match_next(&mut self, expected: char) -> bool { + match self.peek() { + Some(&c) if c == expected => { + self.advance(); + true + } + _ => false, + } + } + + fn scan_single_char_token(&mut self, c: char) -> Option { + match c { + '(' => Some(Token::LeftParen), + ')' => Some(Token::RightParen), + '{' => Some(Token::LeftBrace), + '}' => Some(Token::RightBrace), + ';' => Some(Token::Semicolon), + ',' => Some(Token::Comma), + '&' => Some(Token::Amphersand), + '+' => Some(Token::Plus), + '*' => Some(Token::Star), + _ => None, + } + } + + fn scan_operator(&mut self, c: char) -> Option { + match c { + '-' => Some(if self.match_next('>') { + Token::RightArrow + } else { + Token::Minus + }), + '!' => Some(if self.match_next('=') { + Token::BangEqual + } else { + Token::Bang + }), + '=' => Some(if self.match_next('=') { + Token::EqualEqual + } else { + Token::Assign + }), + '<' => Some(if self.match_next('=') { + Token::LessEqual + } else { + Token::Less + }), + '>' => Some(if self.match_next('=') { + Token::GreaterEqual + } else { + Token::Greater + }), + ':' => { + // Single colon (for type annotations) + // Note: :: is handled in keyword_or_identifier for namespaces + Some(Token::Colon) + } + '/' => { + // Check if it's a comment or division + if let Some(&next) = self.peek() { + if next == '/' || next == '*' { + // It's a comment, don't consume it here + // Let skip_whitespace_and_comments handle it + None + } else { + Some(Token::Slash) + } + } else { + Some(Token::Slash) + } + } + _ => None, + } + } + + pub fn next_token(&mut self) -> Token { + self.skip_whitespace_and_comments(); + + let Some(c) = self.current else { + return Token::Eof; + }; + + // Try single-character tokens first + if let Some(token) = self.scan_single_char_token(c) { + self.advance(); + return token; + } + + // Try operators (may be multi-character) + if let Some(token) = self.scan_operator(c) { + self.advance(); + return token; + } + + // Char literals + if c == '\'' { + let mut value = ' '; + self.advance(); + if let Some(ch) = self.current { + value = ch; + self.advance(); + } + if self.current == Some('\'') { + self.advance(); + return Token::Char(value); + } + eprintln!("Lexer error on line {}: Invalid char literal", self.line); + } + + // String literals + if c == '"' { + let token = match self.read_string() { + Ok(s) => Token::String(s), + Err(e) => { + eprintln!("Lexer error on line {}: {}", self.line, e); + // Skip to next quote or end + while let Some(ch) = self.current { + if ch == '"' || ch == '\n' { + break; + } + self.advance(); + } + Token::String(String::new()) + } + }; + self.advance(); + return token; + } + + // Identifiers and keywords (including namespaced identifiers) + if c.is_alphabetic() || c == '_' { + let token = self.keyword_or_identifier(); + self.advance(); + return token; + } + + // Numbers (decimal, hex, binary) + if c.is_ascii_digit() { + let token = match self.read_number() { + Ok(num) => Token::Integer(num), + Err(e) => { + eprintln!("Lexer error on line {}: {}", self.line, e); + // Skip invalid number + while let Some(&ch) = self.peek() { + if !ch.is_alphanumeric() { + break; + } + self.advance(); + } + Token::Integer(0) + } + }; + self.advance(); + return token; + } + + // Unknown character - skip it + eprintln!( + "Lexer warning on line {}: Skipping unknown character '{}'", + self.line, c + ); + self.advance(); + self.next_token() + } +} + +impl<'a> Iterator for Lexer<'a> { + type Item = Token; + + fn next(&mut self) -> Option { + match self.next_token() { + Token::Eof => None, + token => Some(token), + } + } +} diff --git a/compiler/src/frontend/dsc/mod.rs b/compiler/src/frontend/dsc/mod.rs new file mode 100644 index 0000000..b4a6666 --- /dev/null +++ b/compiler/src/frontend/dsc/mod.rs @@ -0,0 +1,38 @@ +use common::logging::log; + +use crate::model::{CompilerError, Program}; +use parser::{ParseResult, Parser}; +use semantic_analyser::Analyser; + +pub mod lexer; +pub mod parser; +pub mod semantic_analyser; + +pub fn generate_ast(input: &str) -> Result { + log("Tokenising Input..."); + + let lexer = lexer::Lexer::new(&input); + let tokens = lexer.collect::>(); + // println!("{tokens:?}"); + + log(&format!("Parsing {} Tokens...", tokens.len())); + + let mut parser = Parser::new(tokens); + let ast = match parser.parse() { + ParseResult::Accept(ast) => ast, + ParseResult::Reject(e) => return Err(e), + ParseResult::Deny => { + return Err(CompilerError::Generic("Parser used ::Deny".to_string())); + } + }; + // println!("{ast:#?}"); + + log("Analyzing AST..."); + log("Checking Type Information..."); + + let analyser = Analyser::new(); + analyser.analyse(ast.clone()).unwrap(); + + log("Type Checking Complete..."); + Ok(ast) +} diff --git a/compiler/src/parser.rs b/compiler/src/frontend/dsc/parser.rs similarity index 78% rename from compiler/src/parser.rs rename to compiler/src/frontend/dsc/parser.rs index 6b56d55..752fbba 100644 --- a/compiler/src/parser.rs +++ b/compiler/src/frontend/dsc/parser.rs @@ -1,6 +1,9 @@ -use crate::lexer::{Name, Token}; +use super::lexer::Token; +use crate::model::{ + BinaryOperator, Block, CompilerError, ConstExpr, Declaration, Dependency, Expression, + Program, Statement, TypeId, UnaryOperator, Variable, +}; use crate::{expect_tt, expect_value}; -use core::fmt; use std::ops::{ControlFlow, FromResidual, Try}; #[derive(Debug, Clone)] @@ -10,16 +13,6 @@ pub enum ParseResult { Reject(E), } -#[derive(Debug, Clone)] -pub enum CompilerError { - UnexpectedToken(Token), - UnexpectedEndOfInput, - UnexpectedCharacter(char), - Undefined(Name), - InvalidSyntax(String), - Generic(String), -} - pub struct Parser { tokens: Vec, idx: usize, @@ -86,7 +79,11 @@ impl Parser { let init = match value { Token::String(x) => Some(ConstExpr::String(x)), Token::Integer(x) => Some(ConstExpr::Number(x as i32)), - _ => return ParseResult::Reject(CompilerError::UnexpectedToken(value)), + _ => { + return ParseResult::Reject(CompilerError::UnexpectedToken( + value.tt().to_string(), + )); + } }; let _ = expect_tt!(self.next()?, Semicolon)?; @@ -141,7 +138,9 @@ impl Parser { body: self.parse_block()?, }) } else { - ParseResult::Reject(CompilerError::UnexpectedToken(self.peek_next()?)) + ParseResult::Reject(CompilerError::UnexpectedToken( + self.peek_next()?.tt().to_string(), + )) } } @@ -268,7 +267,7 @@ impl Parser { expr } else { return ParseResult::Reject(CompilerError::UnexpectedToken( - self.peek_next()?, + self.peek_next()?.tt().to_string(), )); }; @@ -341,7 +340,9 @@ impl Parser { }); } - ParseResult::Reject(CompilerError::UnexpectedToken(self.peek_next()?)) + ParseResult::Reject(CompilerError::UnexpectedToken( + self.peek_next()?.tt().to_string(), + )) } fn parse_expression(&mut self) -> ParseResult { @@ -463,7 +464,9 @@ impl Parser { let _ = expect_tt!(self.next()?, RightParen)?; ParseResult::Accept(expr) } - _ => ParseResult::Reject(CompilerError::UnexpectedToken(self.peek_next()?)), + _ => ParseResult::Reject(CompilerError::UnexpectedToken( + self.peek_next()?.tt().to_string(), + )), } } @@ -525,197 +528,6 @@ impl Parser { } } -#[derive(Debug, Clone)] -pub struct Program { - pub declarations: Vec, -} - -#[derive(Debug, Clone)] -pub enum Declaration { - Function { - name: String, - return_type: TypeId, - params: Vec, - body: Block, - }, - Variable { - var: Variable, - init: Option, - is_const: bool, - }, - Dependency(Dependency), -} - -#[derive(Debug, Clone)] -pub struct Dependency { - pub name: String, - pub path: String, -} - -#[derive(Debug, Clone)] -pub enum TypeId { - U8, - U16, - U32, - I8, - I16, - I32, - Char, - Void, - Ptr(Box), - Ref(Box), - Array(Box, usize), - Struct { name: Name, fields: Vec }, -} - -pub type Block = Vec; - -#[derive(Debug, Clone)] -pub struct Variable { - pub name: String, - pub type_id: TypeId, -} - -#[derive(Debug, Clone)] -pub enum Statement { - Block(Block), - Declaration { - var: Variable, - value: Option, - }, - Assign { - varname: String, - value: Expression, - }, - PtrWrite { - ptr: Expression, - value: Expression, - }, - Expression { - expr: Expression, - }, - If { - condition: Expression, - then_stmt: Block, - else_stmt: Block, - }, - While { - condition: Expression, - body: Vec, - }, - Loop(Block), - Break, - Continue, - Return(Option), -} - -#[derive(Debug, Clone)] -pub enum ConstExpr { - Number(i32), - String(String), -} - -impl fmt::Display for ConstExpr { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - ConstExpr::Number(n) => write!(f, "{}", n), - ConstExpr::String(s) => write!(f, "\"{}\"", s), - } - } -} - -#[derive(Debug, Clone)] -pub enum Expression { - Empty, - Binary { - op: BinaryOperator, - left: Box, - right: Box, - }, - Unary { - op: UnaryOperator, - operand: Box, - }, - Variable { - name: Name, - expr_type: Option, - }, - Call { - name: Name, - args: Vec, - }, - Number(isize), - StringLiteral(String), - CharLiteral(char), -} - -impl Expression { - pub fn is_pure(&self) -> bool { - match self { - Expression::Number(_) => true, - Expression::StringLiteral(_) => true, - Expression::CharLiteral(_) => true, - Expression::Call { name, args } => false, /* TODO: will require checking */ - // if the associated function - // body is pure - Expression::Binary { left, right, .. } => left.is_pure() && right.is_pure(), - Expression::Unary { op, operand } => operand.is_pure(), - Expression::Empty => true, - Expression::Variable { name, expr_type } => true, - } - } -} - -#[derive(Debug, Clone, PartialEq)] -pub enum BinaryOperator { - Add, - Sub, - Mul, - Div, - Eq, - Ne, - Lt, - Gt, - Le, - Ge, -} - -impl fmt::Display for BinaryOperator { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - BinaryOperator::Add => write!(f, "+"), - BinaryOperator::Sub => write!(f, "-"), - BinaryOperator::Mul => write!(f, "*"), - BinaryOperator::Div => write!(f, "/"), - BinaryOperator::Eq => write!(f, "=="), - BinaryOperator::Ne => write!(f, "!="), - BinaryOperator::Lt => write!(f, "<"), - BinaryOperator::Gt => write!(f, ">"), - BinaryOperator::Le => write!(f, "<="), - BinaryOperator::Ge => write!(f, ">="), - } - } -} - -#[derive(Debug, Clone, PartialEq)] -pub enum UnaryOperator { - Plus, - Minus, - Reference, - Dereference, -} - -impl fmt::Display for UnaryOperator { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - UnaryOperator::Plus => write!(f, "+"), - UnaryOperator::Minus => write!(f, "-"), - UnaryOperator::Dereference => write!(f, "*"), - UnaryOperator::Reference => write!(f, "&"), - } - } -} - impl ParseResult { pub fn accepted(&self) -> bool { matches!(self, ParseResult::Accept(_)) @@ -772,7 +584,7 @@ macro_rules! expect_tt { )+ _ => { // let expected = format!("[{}]", vec![$(stringify!($variant)),+].join(" | ")); - ParseResult::Reject(CompilerError::UnexpectedToken(token)) + ParseResult::Reject(CompilerError::UnexpectedToken(tt)) } } }}; @@ -784,7 +596,9 @@ macro_rules! expect_value { let tok = $expr; match tok.clone() { Token::$variant(value) => ParseResult::Accept(value), - _ => ParseResult::Reject(CompilerError::UnexpectedToken(tok)), + _ => { + ParseResult::Reject(CompilerError::UnexpectedToken(tok.tt().to_string())) + } } }}; } diff --git a/compiler/src/frontend/dsc/semantic_analyser.rs b/compiler/src/frontend/dsc/semantic_analyser.rs new file mode 100644 index 0000000..2b18e2c --- /dev/null +++ b/compiler/src/frontend/dsc/semantic_analyser.rs @@ -0,0 +1,13 @@ +use crate::model::{CompilerError, Program}; + +pub struct Analyser; + +impl Analyser { + pub fn new() -> Self { + Self + } + + pub fn analyse(&self, _ast: Program) -> Result<(), CompilerError> { + Ok(()) + } +} diff --git a/compiler/src/frontend/mod.rs b/compiler/src/frontend/mod.rs new file mode 100644 index 0000000..a17ba62 --- /dev/null +++ b/compiler/src/frontend/mod.rs @@ -0,0 +1,15 @@ +use crate::model::{CompilerError, Program}; + +mod c; +mod dsc; + +pub fn compiler_frontend(ext: &str, data: &str) -> Result { + match ext { + "dsc" => Ok(dsc::generate_ast(&data)?), + "c" => Ok(c::generate_ast(&data)?), + _ => Err(CompilerError::Generic(format!( + "File type {} not supported", + ext + ))), + } +} diff --git a/compiler/src/lib.rs b/compiler/src/lib.rs index c94ab1a..41fb48b 100644 --- a/compiler/src/lib.rs +++ b/compiler/src/lib.rs @@ -4,17 +4,12 @@ use std::path::Path; use common::logging::log; -use crate::{ - codegen::CodeGenerator, - parser::{ParseResult, Parser}, - semantic_analyser::Analyser, -}; +use crate::specialised::build_specialised; -mod codegen; -mod lexer; -mod parser; -mod registers; -mod semantic_analyser; +mod backend; +mod frontend; +mod model; +mod specialised; pub fn compile_file( input_path: &Path, @@ -22,43 +17,44 @@ pub fn compile_file( ) -> Result<(), Box> { let input = std::fs::read_to_string(input_path).expect("Failed to read input file"); - log("Tokenising Input..."); + let input_ext = input_path + .extension() + .and_then(|s| s.to_str()) + .unwrap_or(""); - let lexer = lexer::Lexer::new(&input); - let tokens = lexer.collect::>(); - // println!("{tokens:?}"); + // check if we're using a specialised compiler + if let Some(output) = build_specialised(input_ext, &input) { + let result = match output { + Ok(output) => output, + Err(err) => return Err(format!("Compilation failed: {err:?}").into()), + }; - log(&format!("Parsing {} Tokens...", tokens.len())); + std::fs::write(output_path, &result).expect("Failed to write output"); - let mut parser = Parser::new(tokens); - let ast = match parser.parse() { - ParseResult::Accept(ast) => ast, - ParseResult::Reject(e) => { - eprintln!("Error: {e:?}"); - return Err("Parsing error".into()); - } - ParseResult::Deny => { - panic!("Parser denied parsing") - } + log(&format!( + "Compilation Successful ✅ \n\tSource: {}\n\tOutput: {}\n", + input_path.display(), + output_path.display(), + )); + + return Ok(()); + } + + // Parse the input using the frontend, providing the file extension and data. + let ast = match frontend::compiler_frontend(input_ext, &input) { + Ok(ast) => ast, + Err(err) => return Err(format!("Compilation failed: {err:?}").into()), }; - // println!("{ast:#?}"); - log("Analyzing AST..."); - log("Checking Type Information..."); + let output_ext = output_path + .extension() + .and_then(|s| s.to_str()) + .unwrap_or(""); - let analyser = Analyser::new(); - analyser.analyse(ast.clone()).unwrap(); - - log("Generating Code..."); - - // Code Gen - let mut generator = CodeGenerator::new(ast); - let result = match generator.generate() { - Ok(code) => code, - Err(e) => { - eprintln!("Parsing error: {:?}", e); - return Err("Code generation error".into()); - } + // Generate the output using the backend with the parsed result. + let result = match backend::compiler_backend(output_ext, &ast) { + Ok(result) => result, + Err(err) => return Err(format!("Compilation failed: {err:?}").into()), }; // println!("{result}"); diff --git a/compiler/src/main.rs b/compiler/src/main.rs index c5eb367..3bfaf00 100644 --- a/compiler/src/main.rs +++ b/compiler/src/main.rs @@ -6,7 +6,7 @@ fn main() { // read from input file: syntax "c_compiler [output.dsa]" let args: Vec = std::env::args().collect(); if args.len() < 2 { - eprintln!("Usage: c_compiler [output.dsa]"); + eprintln!("Usage: c_compiler [output.dsa]"); return; } diff --git a/compiler/src/model.rs b/compiler/src/model.rs new file mode 100644 index 0000000..9c7e68e --- /dev/null +++ b/compiler/src/model.rs @@ -0,0 +1,213 @@ +use core::fmt; + +#[allow(unused)] +#[derive(Debug, Clone)] +pub enum CompilerError { + UnexpectedToken(String), + UnexpectedEndOfInput, + UnexpectedCharacter(char), + Undefined(Name), + InvalidSyntax(String), + Generic(String), +} + +#[derive(Debug, PartialEq, Clone)] +pub struct Name { + pub name: String, + pub namespace: Option, +} + +#[derive(Debug, Clone)] +pub struct Program { + pub declarations: Vec, +} + +#[allow(unused)] +#[derive(Debug, Clone)] +pub enum Declaration { + Function { + name: String, + return_type: TypeId, + params: Vec, + body: Block, + }, + Variable { + var: Variable, + init: Option, + is_const: bool, + }, + Dependency(Dependency), +} + +#[derive(Debug, Clone)] +pub struct Dependency { + pub name: String, + pub path: String, +} + +#[allow(unused)] +#[derive(Debug, Clone)] +pub enum TypeId { + U8, + U16, + U32, + I8, + I16, + I32, + Char, + Void, + Ptr(Box), + Ref(Box), + Array(Box, usize), + Struct { name: Name, fields: Vec }, +} + +pub type Block = Vec; + +#[allow(unused)] +#[derive(Debug, Clone)] +pub struct Variable { + pub name: String, + pub type_id: TypeId, +} + +#[allow(unused)] +#[derive(Debug, Clone)] +pub enum Statement { + Block(Block), + Declaration { + var: Variable, + value: Option, + }, + Assign { + varname: String, + value: Expression, + }, + PtrWrite { + ptr: Expression, + value: Expression, + }, + Expression { + expr: Expression, + }, + If { + condition: Expression, + then_stmt: Block, + else_stmt: Block, + }, + While { + condition: Expression, + body: Vec, + }, + Loop(Block), + Break, + Continue, + Return(Option), +} + +#[derive(Debug, Clone)] +pub enum ConstExpr { + Number(i32), + String(String), +} + +impl fmt::Display for ConstExpr { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + ConstExpr::Number(n) => write!(f, "{}", n), + ConstExpr::String(s) => write!(f, "\"{}\"", s), + } + } +} + +#[allow(unused)] +#[derive(Debug, Clone)] +pub enum Expression { + Empty, + Binary { + op: BinaryOperator, + left: Box, + right: Box, + }, + Unary { + op: UnaryOperator, + operand: Box, + }, + Variable { + name: Name, + expr_type: Option, + }, + Call { + name: Name, + args: Vec, + }, + Number(isize), + StringLiteral(String), + CharLiteral(char), +} + +impl Expression { + pub fn is_pure(&self) -> bool { + match self { + Expression::Number(_) => true, + Expression::StringLiteral(_) => true, + Expression::CharLiteral(_) => true, + Expression::Call { .. } => false, + Expression::Binary { left, right, .. } => left.is_pure() && right.is_pure(), + Expression::Unary { operand, .. } => operand.is_pure(), + Expression::Empty => true, + Expression::Variable { .. } => true, + } + } +} + +#[allow(unused)] +#[derive(Debug, Clone, PartialEq)] +pub enum BinaryOperator { + Add, + Sub, + Mul, + Div, + Eq, + Ne, + Lt, + Gt, + Le, + Ge, +} + +impl fmt::Display for BinaryOperator { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + BinaryOperator::Add => write!(f, "+"), + BinaryOperator::Sub => write!(f, "-"), + BinaryOperator::Mul => write!(f, "*"), + BinaryOperator::Div => write!(f, "/"), + BinaryOperator::Eq => write!(f, "=="), + BinaryOperator::Ne => write!(f, "!="), + BinaryOperator::Lt => write!(f, "<"), + BinaryOperator::Gt => write!(f, ">"), + BinaryOperator::Le => write!(f, "<="), + BinaryOperator::Ge => write!(f, ">="), + } + } +} + +#[derive(Debug, Clone, PartialEq)] +pub enum UnaryOperator { + Plus, + Minus, + Reference, + Dereference, +} + +impl fmt::Display for UnaryOperator { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + UnaryOperator::Plus => write!(f, "+"), + UnaryOperator::Minus => write!(f, "-"), + UnaryOperator::Dereference => write!(f, "*"), + UnaryOperator::Reference => write!(f, "&"), + } + } +} diff --git a/compiler/src/semantic_analyser.rs b/compiler/src/semantic_analyser.rs deleted file mode 100644 index d8f3ce5..0000000 --- a/compiler/src/semantic_analyser.rs +++ /dev/null @@ -1,13 +0,0 @@ -use crate::parser::{CompilerError, Program}; - -pub struct Analyser; - -impl Analyser { - pub fn new() -> Self { - Self - } - - pub fn analyse(&self, ast: Program) -> Result<(), CompilerError> { - Ok(()) - } -}