From 52ef7872f0cf6a1af08359fbf63b200c04d4387f Mon Sep 17 00:00:00 2001 From: zxq5 Date: Sat, 31 Jan 2026 13:28:42 +0000 Subject: [PATCH] compiler working for some mathematical expressions, function calls and simple conditionals --- c_compiler/example.c | 2 +- c_compiler/example.dsc | 27 +++++++ c_compiler/src/assembly.rs | 106 +++++++++++++++++++++++++++ c_compiler/src/codegen.rs | 96 ++++++++++++++++--------- c_compiler/src/lexer.rs | 70 ++++++++++++++++++ c_compiler/src/main.rs | 3 + c_compiler/src/parser.rs | 30 ++++++++ c_compiler/src/registers.rs | 20 ++++++ resources/dsa/code.dsa | 139 ++++++++++++++++++++++++++++++++++++ resources/dsa/main.dsa | 52 ++------------ 10 files changed, 466 insertions(+), 79 deletions(-) create mode 100644 c_compiler/example.dsc create mode 100644 c_compiler/src/assembly.rs create mode 100644 resources/dsa/code.dsa diff --git a/c_compiler/example.c b/c_compiler/example.c index 7bc16ef..1182420 100644 --- a/c_compiler/example.c +++ b/c_compiler/example.c @@ -7,6 +7,6 @@ int factorial(int n) { int main() { int res = factorial(3); - print(res); + printnum(res); return 0; } diff --git a/c_compiler/example.dsc b/c_compiler/example.dsc new file mode 100644 index 0000000..1582321 --- /dev/null +++ b/c_compiler/example.dsc @@ -0,0 +1,27 @@ +include print: "lib/io/print.dsa" + +int factorial(int n) { + if (n <= 1) { + return 1; + } + return n * factorial(n - 1); +} + +int add_(int a, int b) { + return a + b; +} + +int greater(int a, int b) { + if (a + a > b + b) { + return a; + } else { + return b + a; + } +} + +int main() { + printnum(greater(5, add_(5, 5))); + + printnum(factorial(5)); + return 0; +} diff --git a/c_compiler/src/assembly.rs b/c_compiler/src/assembly.rs new file mode 100644 index 0000000..4b46a4f --- /dev/null +++ b/c_compiler/src/assembly.rs @@ -0,0 +1,106 @@ +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +#[non_exhaustive] +pub enum Register { + // general purpose registers + Rg0, + Rg1, + Rg2, + Rg3, + Rg4, + Rg5, + Rg6, + Rg7, + Rg8, + Rg9, + Rga, + Rgb, + Rgc, + Rgd, + Rge, + Rgf, + + // special purpose registers + Acc, + Spr, + Bpr, + Ret, + Idr, + Mmr, + Zero, + NoReg, + + // system registers - can't be written to by instructions. + Mar, + Mdr, + Sts, + Cir, + Pcx, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(u8)] +#[non_exhaustive] +/// A list of all current instructions in the DSA Assembly language. +pub enum Instruction { + // No-op + Nop = 0x0, + + // Data transfer instructions + Mov(Register, Register) = 0x1, + Movs(Register, Register) = 0x2, + + Ldb(Register, Register, Option) = 0x3, + Ldbs(Register, Register, Option) = 0x4, + Ldh(Register, Register, Option) = 0x5, + Ldhs(Register, Register, Option) = 0x6, + Ldw(Register, Register, Option) = 0x7, + + Stb(Register, Register, Option) = 0x8, + Sth(Register, Register, Option) = 0x9, + Stw(Register, Register, Option) = 0xA, + + Lli(u16, Register) = 0xB, + Lui(u16, Register) = 0xC, + + // Jump Instructions + Jump(u16, Register) = 0xD, + JumpEq(u16, Register) = 0xE, + JumpNeq(u16, Register) = 0xF, + JumpGt(u16, Register) = 0x10, + JumpGe(u16, Register) = 0x11, + JumpLt(u16, Register) = 0x12, + JumpLe(u16, Register) = 0x13, + + // Comparison + Compare(Register, Register) = 0x14, + + // // Arithmetic + // Add(args::RTypeArgs) = 0x19, + // Sub(args::RTypeArgs) = 0x1A, + // Increment(args::RTypeArgs) = 0x15, + // Decrement(args::RTypeArgs) = 0x16, + // ShiftLeft(args::RTypeArgs) = 0x17, + // ShiftRight(args::RTypeArgs) = 0x18, + + // // Logical + // And(args::RTypeArgs) = 0x1B, + // Or(args::RTypeArgs) = 0x1C, + // Not(args::RTypeArgs) = 0x1D, + // Xor(args::RTypeArgs) = 0x1E, + // Nand(args::RTypeArgs) = 0x1F, + // Nor(args::RTypeArgs) = 0x20, + // Xnor(args::RTypeArgs) = 0x21, + + // // Misc + // Interrupt(Interrupt) = 0x22, + // IntReturn = 0x23, + // Halt = 0x24, + + // // Immediate Arithmetic + // AddImmediate(args::ITypeArgs) = 0x25, + // SubImmediate(args::ITypeArgs) = 0x26, + + // Fake Instructions + Data(u32) = 0x3E, + Segment(u32) = 0x3F, +} diff --git a/c_compiler/src/codegen.rs b/c_compiler/src/codegen.rs index 308fe43..d5bbb66 100644 --- a/c_compiler/src/codegen.rs +++ b/c_compiler/src/codegen.rs @@ -1,4 +1,5 @@ use std::hash::Hash; +use std::sync::LazyLock; use std::sync::atomic::AtomicU32; use std::time::SystemTime; use std::{collections::HashMap, path::PathBuf}; @@ -18,23 +19,32 @@ pub struct CodeGenerator { imports: HashMap, globals: Vec, functions: Vec, + symbols: Vec, allocator: RegisterAllocator, - call_stack: Vec, } +static GLOBAL_METHODS: LazyLock> = LazyLock::new(|| { + hash_map! { + "print" => "print::print", + "printnum" => "print::print_num" + } +}); + fn import(name: &str, path: &str) -> String { format!("include {name}: \"{}\"", path) } impl CodeGenerator { + const RET: &'static str = "\tjmp _ret"; + pub fn new(ast: Program) -> Self { CodeGenerator { ast, imports: HashMap::new(), globals: Vec::new(), functions: Vec::new(), + symbols: Vec::new(), allocator: RegisterAllocator::new(), - call_stack: Vec::new(), } } @@ -47,11 +57,15 @@ impl CodeGenerator { self.include("print", "./lib/io/print.dsa"); for block in self.ast.clone().declarations { - self.generate_block(block.clone())?; + match block { + Declaration::Variable { name, .. } => self.symbols.push(name), + Declaration::Function { name, .. } => self.symbols.push(name), + Declaration::Import { name, .. } => self.symbols.push(name), + } } - for func in &self.functions { - println!("{func}"); + for block in self.ast.clone().declarations { + self.generate_block(block.clone())?; } self.generate_layout() @@ -88,6 +102,7 @@ impl CodeGenerator { dsa![mov bpr, spr], dsa![push zero], dsa![call main], + dsa![call print::print_newline], dsa![lwi message, rg0], dsa![push rg0], dsa![call print::print], @@ -96,6 +111,13 @@ impl CodeGenerator { dsa![pop zero], dsa![hlt] ], + "", + comment!("Function return boilerplate"), + block! [ "_ret" + dsa![mov bpr, spr], + dsa![pop bpr], + dsa![return] + ], // block! [ "main" // dsa![push bpr], // dsa![mov spr, bpr], @@ -131,6 +153,9 @@ impl CodeGenerator { self.functions.push(format!("{func}\n")); } + Declaration::Import { name, path } => { + self.imports.insert(name, path); + } }; Ok(()) @@ -143,8 +168,6 @@ impl CodeGenerator { params: &[Parameter], body: &[Statement], ) -> Vec { - self.call_stack.push(name.to_string()); - let mut code = Vec::new(); // Reset allocator for new function @@ -154,6 +177,7 @@ impl CodeGenerator { code.push(format!("{}:", name)); code.push("\tpush bpr".to_string()); code.push("\tmov spr, bpr".to_string()); + code.push(String::new()); // Allocate parameters to registers or stack locations for (i, param) in params.iter().enumerate() { @@ -170,13 +194,14 @@ impl CodeGenerator { code.extend(stmt_code); } - // Function epilogue - code.push(format!("_ret_{name}:")); - code.push("\tmov bpr, spr".to_string()); - code.push("\tpop bpr".to_string()); - code.push("\treturn".to_string()); + // automatically return at function end + if let Some(x) = code.last() + && x == Self::RET + { + } else { + code.push(Self::RET.to_string()); + } - self.call_stack.pop(); code } @@ -212,7 +237,7 @@ impl CodeGenerator { let (result_reg, expr_code) = self.generate_expression(e)?; code.extend(expr_code); code.push(format!("\tstw {}, bpr, 8", result_reg)); - code.push(format!("\tjmp _ret_{}", self.call_stack.last().unwrap())); + code.push(format!("\tjmp _ret")); self.allocator.free_temp(&result_reg); } } @@ -419,7 +444,7 @@ impl CodeGenerator { _ => return Err(format!("Unsupported binary operator: {:?}", op)), } - // Free operand registers + // Free operand registers (allocator will protect variables) self.allocator.free_temp(&left_reg); self.allocator.free_temp(&right_reg); @@ -427,9 +452,11 @@ impl CodeGenerator { } Expression::Call { name, args } => { - // Save caller-saved registers - let save_code = self.allocator.save_caller_saved(); - code.extend(save_code); + // Save caller-saved registers and track which ones we saved + let saved_regs = self.allocator.get_caller_saved_registers(); + for reg in &saved_regs { + code.push(format!("\tpush {}", reg)); + } // Evaluate and push arguments in reverse order let mut arg_regs = Vec::new(); @@ -440,18 +467,31 @@ impl CodeGenerator { arg_regs.push(arg_reg); } - if self.functions.contains_key(name) { + if GLOBAL_METHODS.contains_key(name.as_str()) { + code.push(format!("\tcall {}", GLOBAL_METHODS[name.as_str()])); + } else if self.symbols.contains(name) { // Call local function code.push(format!("\tcall {}", name)); + } else { + return Err(format!("undefined function {name}")); } - if self.imports - + // Result is in rg0, allocate a register and move it + let (result_reg, result_alloc) = self.allocator.alloc_temp()?; + code.extend(result_alloc); + code.push(format!("\tpop {}", result_reg)); // Clean up arguments - for _ in 0..args.len() { - code.push("\tpop zero".to_string()); + if args.len() > 1 { + for _ in 0..(args.len() - 1) { + code.push("\tpop zero".to_string()); + } + } + + // Restore caller-saved registers in reverse order (LIFO) + for reg in saved_regs.iter().rev() { + code.push(format!("\tpop {}", reg)); } // Free argument registers @@ -459,16 +499,6 @@ impl CodeGenerator { self.allocator.free_temp(®); } - // Result is in rg0, allocate a register and move it - let (result_reg, result_alloc) = self.allocator.alloc_temp()?; - code.extend(result_alloc); - - if result_reg != "rg0" { - code.push(format!("\tmov rg0, {}", result_reg)); - } - - // Restore caller-saved registers (simplified - you'd track which ones) - Ok((result_reg, code)) } diff --git a/c_compiler/src/lexer.rs b/c_compiler/src/lexer.rs index f6ccc74..60cf402 100644 --- a/c_compiler/src/lexer.rs +++ b/c_compiler/src/lexer.rs @@ -10,10 +10,13 @@ pub enum TokenType { Else, While, Return, + Include, // Identifiers and literals Identifier(String), Number(i32), + String(String), + Char(char), // Operators Plus, @@ -35,10 +38,22 @@ pub enum TokenType { RBrace, Semicolon, Comma, + Colon, + Namespace, Eof, } +pub enum Type { + Int32, + Int16, + Int8, + Uint32, + Uint16, + Uint8, + Char, +} + #[derive(Debug, Clone)] pub struct Token { pub token_type: TokenType, @@ -150,6 +165,45 @@ impl Lexer { ident } + fn read_string(&mut self) -> Result { + let mut string = String::new(); + self.advance(); // Consume the opening quote + + while let Some(ch) = self.peek(0) { + if ch == '"' { + self.advance(); // Consume the closing quote + return Ok(string); + } else if ch == '\\' { + self.advance(); // Consume the backslash + if let Some(escaped_char) = self.peek(0) { + string.push(escaped_char); + self.advance(); + } + } else { + string.push(ch); + self.advance(); + } + } + + Err(String::from("Unexpected EOF")) + } + + fn read_char(&mut self) -> Result { + self.advance(); // Consume the opening quote + + if let Some(ch) = self.peek(0) { + self.advance(); + if self.peek(0) == Some('\'') { + self.advance(); + return Ok(ch); + } else { + Err(String::from("expected closing quote")) + } + } else { + Err(String::from("expected character")) + } + } + pub fn tokenize(&mut self) -> Result, String> { let mut tokens = Vec::new(); @@ -168,6 +222,12 @@ impl Lexer { let token_type = if ch.is_ascii_digit() { let num = self.read_number(); TokenType::Number(num) + } else if ch == '"' { + let string = self.read_string()?; + TokenType::String(string) + } else if ch == '\'' { + let char = self.read_char()?; + TokenType::Char(char) } else if ch.is_alphabetic() || ch == '_' { let ident = self.read_identifier(); match ident.as_str() { @@ -176,10 +236,20 @@ impl Lexer { "else" => TokenType::Else, "while" => TokenType::While, "return" => TokenType::Return, + "include" => TokenType::Include, _ => TokenType::Identifier(ident), } } else { match ch { + ':' if self.peek(1) == Some(':') => { + self.advance(); + self.advance(); + TokenType::Namespace + } + ':' => { + self.advance(); + TokenType::Colon + } '=' if self.peek(1) == Some('=') => { self.advance(); self.advance(); diff --git a/c_compiler/src/main.rs b/c_compiler/src/main.rs index 06758c8..eae157b 100644 --- a/c_compiler/src/main.rs +++ b/c_compiler/src/main.rs @@ -1,7 +1,10 @@ +#![feature(hash_map_macro)] + use std::fmt; use crate::{codegen::CodeGenerator, lexer::Lexer, parser::Parser}; +// mod assembly; pub mod codegen; pub mod lexer; pub mod parser; diff --git a/c_compiler/src/parser.rs b/c_compiler/src/parser.rs index 734cb04..86f2b00 100644 --- a/c_compiler/src/parser.rs +++ b/c_compiler/src/parser.rs @@ -23,6 +23,10 @@ pub enum Declaration { name: String, init: Option, }, + Import { + name: String, + path: String, + }, } #[derive(Debug, Clone)] @@ -225,6 +229,32 @@ impl Parser { } fn parse_declaration(&mut self) -> Result { + // check for an import + if let TokenType::Include = self.current().token_type { + self.advance(); + + let name = + if let TokenType::Identifier(id) = self.current().clone().token_type { + Some(id) + } else { + None + } + .ok_or(String::from("Expected identifier"))?; + + self.advance(); + self.expect(TokenType::Colon)?; + + let path = if let TokenType::String(id) = self.current().clone().token_type { + Some(id) + } else { + None + } + .ok_or(String::from("Expected string literal"))?; + + self.advance(); + return Ok(Declaration::Import { name, path }); + } + self.expect(TokenType::Int)?; let name = match &self.current().token_type { diff --git a/c_compiler/src/registers.rs b/c_compiler/src/registers.rs index d13babd..1d042e9 100644 --- a/c_compiler/src/registers.rs +++ b/c_compiler/src/registers.rs @@ -81,7 +81,17 @@ impl RegisterAllocator { } /// Free a temporary register after use + /// NOTE: This will NOT free registers that contain variables! + /// Variables persist throughout their scope and must not be freed pub fn free_temp(&mut self, reg: &str) { + // Check if this register contains a variable + if self.register_contents.contains_key(reg) { + // This register holds a variable - don't free it! + // Variables are only freed when they go out of scope via free_var() + return; + } + + // This is a true temporary - safe to free self.in_use.insert(reg.to_string(), false); } @@ -243,6 +253,16 @@ impl RegisterAllocator { self.variable_locations.remove(var_name); } + /// Get list of registers that contain variables and are in use + /// These need to be saved before function calls + pub fn get_caller_saved_registers(&self) -> Vec { + self.register_contents + .iter() + .filter(|(reg, _)| *self.in_use.get(*reg).unwrap_or(&false)) + .map(|(reg, _)| reg.clone()) + .collect() + } + /// Save caller-saved registers before a function call /// Returns assembly code to save them pub fn save_caller_saved(&mut self) -> Vec { diff --git a/resources/dsa/code.dsa b/resources/dsa/code.dsa new file mode 100644 index 0000000..6a78ddc --- /dev/null +++ b/resources/dsa/code.dsa @@ -0,0 +1,139 @@ + +// GENERATED BY DSA-C COMPILER +// Generated at 2026-01-31 01:39:55 + +// Imports +include maths: "./lib/maths/core.dsa" +include print: "lib/io/print.dsa" + +// Globals & Reserved Memory + + +// Entry Point +dw stack: 0x10000 +db message: "Process Exited with code:" +_init: + ldw stack, bpr + mov bpr, spr + push zero + call main + call print::print_newline + lwi message, rg0 + push rg0 + call print::print + pop zero + call print::print_hex_word + pop zero + hlt + + +// Function return boilerplate +_ret: + mov bpr, spr + pop bpr + return + + +factorial: + push bpr + mov spr, bpr + + ldw bpr, rg0, 8 + lli 1, rg1 + cmp rg0, rg1 + lli 0, rg2 + jgt _cmp_end_1 + lli 1, rg2 +_cmp_end_1: + cmp rg2, zero + jeq _else_3 +_then_2: + lli 1, rg1 + stw rg1, bpr, 8 + jmp _ret + jmp _end_4 +_else_3: + nop +_end_4: + push rg0 + lli 1, rg1 + sub rg0, rg1, rg2 + push rg2 + call factorial + pop rg1 + pop rg0 + push rg1 + push rg0 + call maths::multiply + pop rg2 + pop zero + stw rg2, bpr, 8 + jmp _ret + +add_: + push bpr + mov spr, bpr + + ldw bpr, rg0, 8 + ldw bpr, rg1, 12 + add rg0, rg1, rg2 + stw rg2, bpr, 8 + jmp _ret + +greater: + push bpr + mov spr, bpr + + ldw bpr, rg0, 8 + ldw bpr, rg1, 12 + add rg0, rg0, rg2 + add rg1, rg1, rg3 + cmp rg2, rg3 + lli 0, rg4 + jle _cmp_end_5 + lli 1, rg4 +_cmp_end_5: + cmp rg4, zero + jeq _else_7 +_then_6: + stw rg0, bpr, 8 + jmp _ret + jmp _end_8 +_else_7: + add rg1, rg0, rg2 + stw rg2, bpr, 8 + jmp _ret +_end_8: + jmp _ret + +main: + push bpr + mov spr, bpr + + lli 5, rg0 + push rg0 + lli 5, rg1 + push rg1 + call add_ + pop rg2 + pop zero + push rg2 + lli 5, rg0 + push rg0 + call greater + pop rg1 + pop zero + push rg1 + call print::print_num + pop rg0 + lli 5, rg0 + push rg0 + call factorial + pop rg1 + push rg1 + call print::print_num + pop rg0 + lli 0, rg0 + stw rg0, bpr, 8 + jmp _ret + diff --git a/resources/dsa/main.dsa b/resources/dsa/main.dsa index 552c80f..3020247 100644 --- a/resources/dsa/main.dsa +++ b/resources/dsa/main.dsa @@ -16,55 +16,17 @@ init: dw string: "hello world" start: - lwi 37, rg0 - lwi 12, rg1 - push rg0 - push rg1 - call maths::divmod - pop rg0 // result - pop rg1 // remainder + lwi 1, rg0 + lwi 2, rg1 - push rg1 push rg0 - call print::print_hex_byte - call print::print_whitespace + push rg1 + call maths::multiply + pop rg0 pop zero - call print::print_hex_byte - call print::print_newline - - lwi string, rg0 - //lwi 10, rg0 - pusha 4 push rg0 - call print::print - //call fib::fib_n - pop zero - call print::print_newline - popa 4 - - pusha 4 - push rg0 - call print::print - //call fib::fib_n - pop zero - call print::print_newline - popa 4 - - pusha 4 - push rg0 - call print::print - //call fib::fib_n - pop zero - call print::print_newline - popa 4 - - pusha 4 - push rg0 - call print::print - //call fib::fib_n - pop zero - call print::print_newline - popa 4 + call print::print_num + pop zero hlt