Compare commits

5 Commits

Author SHA1 Message Date
zxq5 52ef7872f0 compiler working for some mathematical expressions, function calls and
simple conditionals
2026-01-31 13:28:42 +00:00
zxq5 e31deb594f fixed a bug with the multiply function in core.dsa and added a print_num
function to print.dsa for decimal numbers
2026-01-31 13:28:11 +00:00
zxq5 63c9d858b4 added a to-do list and bacon.toml for developing the compiler 2026-01-31 13:27:31 +00:00
zxq5 782c842a42 updated gitignore 2026-01-29 19:33:30 +00:00
zxq5 259746558f codegen progress 2026-01-29 19:29:48 +00:00
18 changed files with 1400 additions and 8364 deletions
+1
View File
@@ -1,2 +1,3 @@
/target
**/*.env
Cargo.lock
Generated
-4270
View File
File diff suppressed because it is too large Load Diff
+129
View File
@@ -0,0 +1,129 @@
# This is a configuration file for the bacon tool
#
# Complete help on configuration: https://dystroy.org/bacon/config/
#
# You may check the current default at
# https://github.com/Canop/bacon/blob/main/defaults/default-bacon.toml
default_job = "check"
[jobs.check]
command = ["cargo", "check", "--color", "always"]
need_stdout = false
[jobs.check-all]
command = ["cargo", "check", "--all-targets", "--color", "always"]
need_stdout = false
# Run clippy on the default target
[jobs.clippy]
command = [
"cargo", "clippy",
"--color", "always",
]
need_stdout = false
# Run clippy on all targets
# To disable some lints, you may change the job this way:
# [jobs.clippy-all]
# command = [
# "cargo", "clippy",
# "--all-targets",
# "--color", "always",
# "--",
# "-A", "clippy::bool_to_int_with_if",
# "-A", "clippy::collapsible_if",
# "-A", "clippy::derive_partial_eq_without_eq",
# ]
# need_stdout = false
[jobs.clippy-all]
command = [
"cargo", "clippy",
"--all-targets",
"--color", "always",
]
need_stdout = false
# This job lets you run
# - all tests: bacon test
# - a specific test: bacon test -- config::test_default_files
# - the tests of a package: bacon test -- -- -p config
[jobs.test]
command = [
"cargo", "test", "--color", "always",
"--", "--color", "always", # see https://github.com/Canop/bacon/issues/124
]
need_stdout = true
[jobs.nextest]
command = [
"cargo", "nextest", "run",
"--color", "always",
"--hide-progress-bar", "--failure-output", "final"
]
need_stdout = true
analyzer = "nextest"
[jobs.doc]
command = ["cargo", "doc", "--color", "always", "--no-deps"]
need_stdout = false
# If the doc compiles, then it opens in your browser and bacon switches
# to the previous job
[jobs.doc-open]
command = ["cargo", "doc", "--color", "always", "--no-deps", "--open"]
need_stdout = false
on_success = "back" # so that we don't open the browser at each change
# You can run your application and have the result displayed in bacon,
# if it makes sense for this crate.
# Don't forget the `--color always` part or the errors won't be
# properly parsed.
[jobs.run]
command = [
"cargo", "run",
"--color", "always",
"--",
"example.dsc",
"../resources/dsa/code.dsa"
# put launch parameters for your program behind a `--` separator
]
need_stdout = true
allow_warnings = true
background = true
# Run your long-running application (eg server) and have the result displayed in bacon.
# For programs that never stop (eg a server), `background` is set to false
# to have the cargo run output immediately displayed instead of waiting for
# program's end.
# 'on_change_strategy' is set to `kill_then_restart` to have your program restart
# on every change (an alternative would be to use the 'F5' key manually in bacon).
# If you often use this job, it makes sense to override the 'r' key by adding
# a binding `r = job:run-long` at the end of this file .
[jobs.run-long]
command = [
"cargo", "run",
"--color", "always",
# put launch parameters for your program behind a `--` separator
]
need_stdout = true
allow_warnings = true
background = false
on_change_strategy = "kill_then_restart"
# This parameterized job runs the example of your choice, as soon
# as the code compiles.
# Call it as
# bacon ex -- my-example
[jobs.ex]
command = ["cargo", "run", "--color", "always", "--example"]
need_stdout = true
allow_warnings = true
# You may define here keybindings that would be specific to
# a project, for example a shortcut to launch a specific job.
# Shortcuts to internal functions (scrolling, toggling, etc.)
# should go in your personal global prefs.toml file instead.
[keybindings]
# alt-m = "job:my-job"
c = "job:clippy-all" # comment this to have 'c' run clippy on only the default target
+2 -9
View File
@@ -1,6 +1,4 @@
int x = 5;
int add(int a, int b) { return a + b; }
int var_x = 5;
int factorial(int n) {
if (n <= 1) {
@@ -10,12 +8,7 @@ int factorial(int n) {
}
int main() {
int x;
x = 5;
int x = 5;
int result;
int result = 5;
result = x + factorial(5);
int result = var_x + factorial(5);
print(result);
return 0;
}
+12
View File
@@ -0,0 +1,12 @@
int factorial(int n) {
if (n <= 1) {
return 1;
}
return n * factorial(n - 1);
}
int main() {
int res = factorial(3);
printnum(res);
return 0;
}
+27
View File
@@ -0,0 +1,27 @@
include print: "lib/io/print.dsa"
int factorial(int n) {
if (n <= 1) {
return 1;
}
return n * factorial(n - 1);
}
int add_(int a, int b) {
return a + b;
}
int greater(int a, int b) {
if (a + a > b + b) {
return a;
} else {
return b + a;
}
}
int main() {
printnum(greater(5, add_(5, 5)));
printnum(factorial(5));
return 0;
}
+106
View File
@@ -0,0 +1,106 @@
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
#[non_exhaustive]
pub enum Register {
// general purpose registers
Rg0,
Rg1,
Rg2,
Rg3,
Rg4,
Rg5,
Rg6,
Rg7,
Rg8,
Rg9,
Rga,
Rgb,
Rgc,
Rgd,
Rge,
Rgf,
// special purpose registers
Acc,
Spr,
Bpr,
Ret,
Idr,
Mmr,
Zero,
NoReg,
// system registers - can't be written to by instructions.
Mar,
Mdr,
Sts,
Cir,
Pcx,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u8)]
#[non_exhaustive]
/// A list of all current instructions in the DSA Assembly language.
pub enum Instruction {
// No-op
Nop = 0x0,
// Data transfer instructions
Mov(Register, Register) = 0x1,
Movs(Register, Register) = 0x2,
Ldb(Register, Register, Option<u32>) = 0x3,
Ldbs(Register, Register, Option<u32>) = 0x4,
Ldh(Register, Register, Option<u32>) = 0x5,
Ldhs(Register, Register, Option<u32>) = 0x6,
Ldw(Register, Register, Option<u32>) = 0x7,
Stb(Register, Register, Option<u32>) = 0x8,
Sth(Register, Register, Option<u32>) = 0x9,
Stw(Register, Register, Option<u32>) = 0xA,
Lli(u16, Register) = 0xB,
Lui(u16, Register) = 0xC,
// Jump Instructions
Jump(u16, Register) = 0xD,
JumpEq(u16, Register) = 0xE,
JumpNeq(u16, Register) = 0xF,
JumpGt(u16, Register) = 0x10,
JumpGe(u16, Register) = 0x11,
JumpLt(u16, Register) = 0x12,
JumpLe(u16, Register) = 0x13,
// Comparison
Compare(Register, Register) = 0x14,
// // Arithmetic
// Add(args::RTypeArgs) = 0x19,
// Sub(args::RTypeArgs) = 0x1A,
// Increment(args::RTypeArgs) = 0x15,
// Decrement(args::RTypeArgs) = 0x16,
// ShiftLeft(args::RTypeArgs) = 0x17,
// ShiftRight(args::RTypeArgs) = 0x18,
// // Logical
// And(args::RTypeArgs) = 0x1B,
// Or(args::RTypeArgs) = 0x1C,
// Not(args::RTypeArgs) = 0x1D,
// Xor(args::RTypeArgs) = 0x1E,
// Nand(args::RTypeArgs) = 0x1F,
// Nor(args::RTypeArgs) = 0x20,
// Xnor(args::RTypeArgs) = 0x21,
// // Misc
// Interrupt(Interrupt) = 0x22,
// IntReturn = 0x23,
// Halt = 0x24,
// // Immediate Arithmetic
// AddImmediate(args::ITypeArgs) = 0x25,
// SubImmediate(args::ITypeArgs) = 0x26,
// Fake Instructions
Data(u32) = 0x3E,
Segment(u32) = 0x3F,
}
+448 -39
View File
@@ -1,30 +1,50 @@
use std::hash::Hash;
use std::sync::LazyLock;
use std::sync::atomic::AtomicU32;
use std::time::SystemTime;
use std::{collections::HashMap, path::PathBuf};
use chrono::{DateTime, Local};
use crate::registers::RegisterAllocator;
use crate::{block, cmd, comment, dsa};
use crate::parser::{ConstExpr, Declaration, Program};
use crate::parser::{
BinaryOperator, ConstExpr, Declaration, Expression, Parameter, Program, Statement,
UnaryOperator,
};
pub struct CodeGenerator {
ast: Program,
imports: HashMap<String, String>,
globals: Vec<String>,
functions: Vec<String>,
symbols: Vec<String>,
allocator: RegisterAllocator,
}
static GLOBAL_METHODS: LazyLock<HashMap<&str, &str>> = LazyLock::new(|| {
hash_map! {
"print" => "print::print",
"printnum" => "print::print_num"
}
});
fn import(name: &str, path: &str) -> String {
format!("include {name}: \"{}\"", path)
}
impl CodeGenerator {
const RET: &'static str = "\tjmp _ret";
pub fn new(ast: Program) -> Self {
CodeGenerator {
ast,
imports: HashMap::new(),
globals: Vec::new(),
functions: Vec::new(),
symbols: Vec::new(),
allocator: RegisterAllocator::new(),
}
}
@@ -37,44 +57,20 @@ impl CodeGenerator {
self.include("print", "./lib/io/print.dsa");
for block in self.ast.clone().declarations {
self.generate_block(block.clone());
match block {
Declaration::Variable { name, .. } => self.symbols.push(name),
Declaration::Function { name, .. } => self.symbols.push(name),
Declaration::Import { name, .. } => self.symbols.push(name),
}
}
for block in self.ast.clone().declarations {
self.generate_block(block.clone())?;
}
self.generate_layout()
}
fn generate_block(&mut self, block: Declaration) {
match block {
Declaration::Variable { name, init } => self.globals.push(format!(
"dw {}: {}",
name,
init.unwrap_or(ConstExpr::Number(0))
)),
Declaration::Function {
name,
return_type,
params,
body,
} => {
let function_start = format!(
"{name}: \n\t\
push bpr \n\t\
mov spr, bpr"
);
let function_end = format!(
"\n\t\
mov bpr, spr \n\t\
pop bpr \n\t\
return\n"
);
self.functions
.push(format!("{function_start}\n{function_end}"));
}
}
}
fn generate_layout(&mut self) -> Result<String, String> {
let datetime: DateTime<Local> = SystemTime::now().into();
Ok(dsa![
@@ -106,6 +102,7 @@ impl CodeGenerator {
dsa![mov bpr, spr],
dsa![push zero],
dsa![call main],
dsa![call print::print_newline],
dsa![lwi message, rg0],
dsa![push rg0],
dsa![call print::print],
@@ -114,19 +111,431 @@ impl CodeGenerator {
dsa![pop zero],
dsa![hlt]
],
block! [ "main"
dsa![push bpr],
dsa![mov spr, bpr],
dsa![lwi 67, rg1],
dsa![stw rg1, spr, 8],
"",
comment!("Function return boilerplate"),
block! [ "_ret"
dsa![mov bpr, spr],
dsa![pop bpr],
dsa![return]
],
// block! [ "main"
// dsa![push bpr],
// dsa![mov spr, bpr],
// dsa![lwi 67, rg1],
// dsa![stw rg1, spr, 8],
// dsa![mov bpr, spr],
// dsa![pop bpr],
// dsa![return]
// ],
"",
self.functions.join("\n"),
])
}
fn generate_global(&mut self, name: &str, init: Option<ConstExpr>) {
self.globals.push(format!(
"dw {}: {}",
name,
init.unwrap_or(ConstExpr::Number(0))
))
}
fn generate_block(&mut self, block: Declaration) -> Result<(), String> {
match block {
Declaration::Variable { name, init } => self.generate_global(&name, init),
Declaration::Function {
name,
return_type,
params,
body,
} => {
let func = self.generate_function(&name, &params, &body).join("\n");
self.functions.push(format!("{func}\n"));
}
Declaration::Import { name, path } => {
self.imports.insert(name, path);
}
};
Ok(())
}
// Example: Generate code for a function
fn generate_function(
&mut self,
name: &str,
params: &[Parameter],
body: &[Statement],
) -> Vec<String> {
let mut code = Vec::new();
// Reset allocator for new function
self.allocator.reset();
// Function prologue
code.push(format!("{}:", name));
code.push("\tpush bpr".to_string());
code.push("\tmov spr, bpr".to_string());
code.push(String::new());
// Allocate parameters to registers or stack locations
for (i, param) in params.iter().enumerate() {
let offset = 8 + (i as i32 * 4); // Parameters start at bpr+8
// Track that this parameter is at a stack location
let (reg, mut load_code) = self.allocator.alloc_var(&param.name).unwrap();
code.extend(load_code);
code.push(format!("\tldw bpr, {}, {}", reg, offset));
}
// Generate code for function body
for stmt in body {
let stmt_code = self.generate_statement(stmt).unwrap();
code.extend(stmt_code);
}
// automatically return at function end
if let Some(x) = code.last()
&& x == Self::RET
{
} else {
code.push(Self::RET.to_string());
}
code
}
// Example: Generate code for a statement
fn generate_statement(&mut self, stmt: &Statement) -> Result<Vec<String>, String> {
let mut code = Vec::new();
match stmt {
Statement::Assign {
name,
declare_type,
value,
} => {
if let Some(expr) = value {
// Evaluate expression
let (result_reg, expr_code) = self.generate_expression(expr)?;
code.extend(expr_code);
// Store result in variable
let store_code = self.allocator.store_var(name, &result_reg);
code.extend(store_code);
// Free temporary register
self.allocator.free_temp(&result_reg);
} else {
// Just declaring variable without initialization
self.allocator.alloc_var(name)?;
}
}
Statement::Return { expr } => {
if let Some(e) = expr {
let (result_reg, expr_code) = self.generate_expression(e)?;
code.extend(expr_code);
code.push(format!("\tstw {}, bpr, 8", result_reg));
code.push(format!("\tjmp _ret"));
self.allocator.free_temp(&result_reg);
}
}
Statement::If {
condition,
then_stmt,
else_stmt,
} => {
// Generate condition
let (cond_reg, cond_code) = self.generate_expression(condition)?;
code.extend(cond_code);
// Compare with zero
code.push(format!("\tcmp {}, zero", cond_reg));
self.allocator.free_temp(&cond_reg);
// Generate unique labels
let then_label = format!("_then_{}", self.get_unique_label());
let else_label = format!("_else_{}", self.get_unique_label());
let end_label = format!("_end_{}", self.get_unique_label());
// Jump to else if condition is false (equal to zero)
code.push(format!("\tjeq {}", else_label));
// Then block
code.push(format!("{}:", then_label));
for s in then_stmt {
code.extend(self.generate_statement(s)?);
}
if then_stmt.len() == 0 {
code.push("\tnop".to_string());
}
code.push(format!("\tjmp {}", end_label));
// Else block
code.push(format!("{}:", else_label));
for s in else_stmt {
code.extend(self.generate_statement(s)?);
}
if else_stmt.len() == 0 {
code.push("\tnop".to_string());
}
code.push(format!("{}:", end_label));
}
Statement::While { condition, body } => {
let loop_start = format!("_while_start_{}", self.get_unique_label());
let loop_end = format!("_while_end_{}", self.get_unique_label());
code.push(format!("{}:", loop_start));
// Generate condition
let (cond_reg, cond_code) = self.generate_expression(condition)?;
code.extend(cond_code);
code.push(format!("\tcmp {}, zero", cond_reg));
self.allocator.free_temp(&cond_reg);
code.push(format!("\tjeq {}", loop_end));
// Loop body
for s in body {
code.extend(self.generate_statement(s)?);
}
code.push(format!("\tjmp {}", loop_start));
code.push(format!("{}:", loop_end));
}
Statement::Expression { expr } => {
let (result_reg, expr_code) = self.generate_expression(expr)?;
code.extend(expr_code);
self.allocator.free_temp(&result_reg);
}
Statement::Block(statements) => {
for s in statements {
code.extend(self.generate_statement(s)?);
}
}
}
Ok(code)
}
// Example: Generate code for an expression
// Returns (register containing result, assembly code)
fn generate_expression(
&mut self,
expr: &Expression,
) -> Result<(String, Vec<String>), String> {
let mut code = Vec::new();
match expr {
Expression::Number { value } => {
let (reg, alloc_code) = self.allocator.alloc_temp()?;
code.extend(alloc_code);
// Load immediate value
code.push(format!("\tlli {}, {}", value & 0xFFFF, reg));
if *value > 0xFFFF || *value < 0 {
code.push(format!("\tlui {}, {}", (value >> 16) & 0xFFFF, reg));
}
Ok((reg, code))
}
Expression::Variable { name, .. } => {
let (reg, load_code) = self.allocator.load_var(name)?;
code.extend(load_code);
Ok((reg, code))
}
Expression::Binary { op, left, right } => {
// Evaluate left operand
let (left_reg, left_code) = self.generate_expression(left)?;
code.extend(left_code);
// Evaluate right operand
let (right_reg, right_code) = self.generate_expression(right)?;
code.extend(right_code);
// Allocate result register
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
code.extend(result_alloc);
// Generate operation
match op {
BinaryOperator::Add => {
code.push(format!(
"\tadd {}, {}, {}",
left_reg, right_reg, result_reg
));
}
BinaryOperator::Sub => {
code.push(format!(
"\tsub {}, {}, {}",
left_reg, right_reg, result_reg
));
}
BinaryOperator::Mul => {
self.include("maths", "./lib/maths/core.dsa");
// Call multiply function
code.push(format!("\tpush {}", right_reg));
code.push(format!("\tpush {}", left_reg));
code.push("\tcall maths::multiply".to_string());
code.push(format!("\tpop {}", result_reg));
code.push("\tpop zero".to_string());
}
// Comparison operators - return 1 (true) or 0 (false)
BinaryOperator::Eq => {
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
code.push(format!("\tlli 0, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjne {}", end_label)); // If not equal, skip setting to 1
code.push(format!("\tlli 1, {}", result_reg));
code.push(format!("{}:", end_label));
}
BinaryOperator::Ne => {
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
code.push(format!("\tlli 0, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjeq {}", end_label)); // If equal, skip setting to 1
code.push(format!("\tlli 1, {}", result_reg));
code.push(format!("{}:", end_label));
}
BinaryOperator::Lt => {
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
code.push(format!("\tlli 0, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjge {}", end_label)); // If greater or equal, skip setting to 1
code.push(format!("\tlli 1, {}", result_reg));
code.push(format!("{}:", end_label));
}
BinaryOperator::Le => {
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
code.push(format!("\tlli 0, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjgt {}", end_label)); // If greater than, skip setting to 1
code.push(format!("\tlli 1, {}", result_reg));
code.push(format!("{}:", end_label));
}
BinaryOperator::Gt => {
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
code.push(format!("\tlli 0, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjle {}", end_label)); // If less or equal, skip setting to 1
code.push(format!("\tlli 1, {}", result_reg));
code.push(format!("{}:", end_label));
}
BinaryOperator::Ge => {
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
code.push(format!("\tlli 0, {}", result_reg));
let end_label = format!("_cmp_end_{}", self.get_unique_label());
code.push(format!("\tjlt {}", end_label)); // If less than, skip setting to 1
code.push(format!("\tlli 1, {}", result_reg));
code.push(format!("{}:", end_label));
}
_ => return Err(format!("Unsupported binary operator: {:?}", op)),
}
// Free operand registers (allocator will protect variables)
self.allocator.free_temp(&left_reg);
self.allocator.free_temp(&right_reg);
Ok((result_reg, code))
}
Expression::Call { name, args } => {
// Save caller-saved registers and track which ones we saved
let saved_regs = self.allocator.get_caller_saved_registers();
for reg in &saved_regs {
code.push(format!("\tpush {}", reg));
}
// Evaluate and push arguments in reverse order
let mut arg_regs = Vec::new();
for arg in args.iter().rev() {
let (arg_reg, arg_code) = self.generate_expression(arg)?;
code.extend(arg_code);
code.push(format!("\tpush {}", arg_reg));
arg_regs.push(arg_reg);
}
if GLOBAL_METHODS.contains_key(name.as_str()) {
code.push(format!("\tcall {}", GLOBAL_METHODS[name.as_str()]));
} else if self.symbols.contains(name) {
// Call local function
code.push(format!("\tcall {}", name));
} else {
return Err(format!("undefined function {name}"));
}
// Result is in rg0, allocate a register and move it
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
code.extend(result_alloc);
code.push(format!("\tpop {}", result_reg));
// Clean up arguments
if args.len() > 1 {
for _ in 0..(args.len() - 1) {
code.push("\tpop zero".to_string());
}
}
// Restore caller-saved registers in reverse order (LIFO)
for reg in saved_regs.iter().rev() {
code.push(format!("\tpop {}", reg));
}
// Free argument registers
for reg in arg_regs {
self.allocator.free_temp(&reg);
}
Ok((result_reg, code))
}
Expression::Unary { op, operand } => {
let (operand_reg, operand_code) = self.generate_expression(operand)?;
code.extend(operand_code);
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
code.extend(result_alloc);
match op {
UnaryOperator::Minus => {
// Negate: result = 0 - operand
code.push(format!("\tsub zero, {}, {}", operand_reg, result_reg));
}
UnaryOperator::Plus => {
// Just move
code.push(format!("\tmov {}, {}", operand_reg, result_reg));
}
}
self.allocator.free_temp(&operand_reg);
Ok((result_reg, code))
}
Expression::Empty => Ok(("zero".to_string(), code)),
}
}
// Helper for generating unique labels
fn get_unique_label(&mut self) -> String {
// You'd implement a counter here
static COUNTER: AtomicU32 = AtomicU32::new(0);
let val = COUNTER.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
(val + 1).to_string()
}
}
/// Build a single string from any number of arguments.
+70
View File
@@ -10,10 +10,13 @@ pub enum TokenType {
Else,
While,
Return,
Include,
// Identifiers and literals
Identifier(String),
Number(i32),
String(String),
Char(char),
// Operators
Plus,
@@ -35,10 +38,22 @@ pub enum TokenType {
RBrace,
Semicolon,
Comma,
Colon,
Namespace,
Eof,
}
pub enum Type {
Int32,
Int16,
Int8,
Uint32,
Uint16,
Uint8,
Char,
}
#[derive(Debug, Clone)]
pub struct Token {
pub token_type: TokenType,
@@ -150,6 +165,45 @@ impl Lexer {
ident
}
fn read_string(&mut self) -> Result<String, String> {
let mut string = String::new();
self.advance(); // Consume the opening quote
while let Some(ch) = self.peek(0) {
if ch == '"' {
self.advance(); // Consume the closing quote
return Ok(string);
} else if ch == '\\' {
self.advance(); // Consume the backslash
if let Some(escaped_char) = self.peek(0) {
string.push(escaped_char);
self.advance();
}
} else {
string.push(ch);
self.advance();
}
}
Err(String::from("Unexpected EOF"))
}
fn read_char(&mut self) -> Result<char, String> {
self.advance(); // Consume the opening quote
if let Some(ch) = self.peek(0) {
self.advance();
if self.peek(0) == Some('\'') {
self.advance();
return Ok(ch);
} else {
Err(String::from("expected closing quote"))
}
} else {
Err(String::from("expected character"))
}
}
pub fn tokenize(&mut self) -> Result<Vec<Token>, String> {
let mut tokens = Vec::new();
@@ -168,6 +222,12 @@ impl Lexer {
let token_type = if ch.is_ascii_digit() {
let num = self.read_number();
TokenType::Number(num)
} else if ch == '"' {
let string = self.read_string()?;
TokenType::String(string)
} else if ch == '\'' {
let char = self.read_char()?;
TokenType::Char(char)
} else if ch.is_alphabetic() || ch == '_' {
let ident = self.read_identifier();
match ident.as_str() {
@@ -176,10 +236,20 @@ impl Lexer {
"else" => TokenType::Else,
"while" => TokenType::While,
"return" => TokenType::Return,
"include" => TokenType::Include,
_ => TokenType::Identifier(ident),
}
} else {
match ch {
':' if self.peek(1) == Some(':') => {
self.advance();
self.advance();
TokenType::Namespace
}
':' => {
self.advance();
TokenType::Colon
}
'=' if self.peek(1) == Some('=') => {
self.advance();
self.advance();
+4
View File
@@ -1,10 +1,14 @@
#![feature(hash_map_macro)]
use std::fmt;
use crate::{codegen::CodeGenerator, lexer::Lexer, parser::Parser};
// mod assembly;
pub mod codegen;
pub mod lexer;
pub mod parser;
mod registers;
// ============================================================================
// Main & Tests
+45 -15
View File
@@ -17,12 +17,16 @@ pub enum Declaration {
name: String,
return_type: Type,
params: Vec<Parameter>,
body: Statement,
body: Block,
},
Variable {
name: String,
init: Option<ConstExpr>,
},
Import {
name: String,
path: String,
},
}
#[derive(Debug, Clone)]
@@ -44,11 +48,11 @@ pub enum Type {
Struct(String),
}
pub type Block = Vec<Statement>;
#[derive(Debug, Clone)]
pub enum Statement {
Compound {
statements: Vec<Statement>,
},
Block(Block),
Assign {
// left side
name: String,
@@ -62,12 +66,12 @@ pub enum Statement {
},
If {
condition: Expression,
then_stmt: Box<Statement>,
else_stmt: Option<Box<Statement>>,
then_stmt: Block,
else_stmt: Block,
},
While {
condition: Expression,
body: Box<Statement>,
body: Vec<Statement>,
},
Return {
expr: Option<Expression>,
@@ -225,6 +229,32 @@ impl Parser {
}
fn parse_declaration(&mut self) -> Result<Declaration, String> {
// check for an import
if let TokenType::Include = self.current().token_type {
self.advance();
let name =
if let TokenType::Identifier(id) = self.current().clone().token_type {
Some(id)
} else {
None
}
.ok_or(String::from("Expected identifier"))?;
self.advance();
self.expect(TokenType::Colon)?;
let path = if let TokenType::String(id) = self.current().clone().token_type {
Some(id)
} else {
None
}
.ok_or(String::from("Expected string literal"))?;
self.advance();
return Ok(Declaration::Import { name, path });
}
self.expect(TokenType::Int)?;
let name = match &self.current().token_type {
@@ -271,7 +301,7 @@ impl Parser {
}
self.expect(TokenType::RParen)?;
let body = self.parse_compound_stmt()?;
let body = self.parse_block()?;
Ok(Declaration::Function {
name,
@@ -302,7 +332,7 @@ impl Parser {
}
}
fn parse_compound_stmt(&mut self) -> Result<Statement, String> {
fn parse_block(&mut self) -> Result<Block, String> {
self.expect(TokenType::LBrace)?;
let mut statements = Vec::new();
@@ -311,12 +341,12 @@ impl Parser {
}
self.expect(TokenType::RBrace)?;
Ok(Statement::Compound { statements })
Ok(statements)
}
fn parse_statement(&mut self) -> Result<Statement, String> {
match &self.current().token_type {
TokenType::LBrace => Ok(self.parse_compound_stmt()?),
TokenType::LBrace => Ok(Statement::Block(self.parse_block()?)),
TokenType::If => self.parse_if_stmt(),
TokenType::While => self.parse_while_stmt(),
TokenType::Return => self.parse_return_stmt(),
@@ -408,13 +438,13 @@ impl Parser {
self.expect(TokenType::LParen)?;
let condition = self.parse_expression()?;
self.expect(TokenType::RParen)?;
let then_stmt = Box::new(self.parse_statement()?);
let then_stmt = self.parse_block()?;
let else_stmt = if matches!(self.current().token_type, TokenType::Else) {
self.advance();
Some(Box::new(self.parse_statement()?))
self.parse_block()?
} else {
None
Vec::new()
};
Ok(Statement::If {
@@ -429,7 +459,7 @@ impl Parser {
self.expect(TokenType::LParen)?;
let condition = self.parse_expression()?;
self.expect(TokenType::RParen)?;
let body = Box::new(self.parse_statement()?);
let body = self.parse_block()?;
Ok(Statement::While { condition, body })
}
+344
View File
@@ -0,0 +1,344 @@
use std::collections::HashMap;
/// Register allocator for DSA assembly generation
/// Manages general-purpose registers (rg0-rgf) and handles stack spilling
pub struct RegisterAllocator {
/// Available general-purpose registers
available_registers: Vec<String>,
/// Maps variable names to their current location (register or stack offset)
variable_locations: HashMap<String, Location>,
/// Maps registers to the variables they currently hold
register_contents: HashMap<String, String>,
/// Current stack offset for local variables (relative to bpr)
/// Starts at -4 (going downward from base pointer)
stack_offset: i32,
/// Track which registers are currently in use
in_use: HashMap<String, bool>,
}
#[derive(Debug, Clone)]
pub enum Location {
Register(String),
Stack(i32), // offset from bpr
}
impl RegisterAllocator {
pub fn new() -> Self {
// Initialize with available GP registers (rg0-rgf = 16 registers)
let registers = vec![
"rg0", "rg1", "rg2", "rg3", "rg4", "rg5", "rg6", "rg7", "rg8", "rg9", "rga",
"rgb", "rgc", "rgd", "rge", "rgf",
]
.into_iter()
.map(String::from)
.collect();
RegisterAllocator {
available_registers: registers,
variable_locations: HashMap::new(),
register_contents: HashMap::new(),
stack_offset: -4, // Start at -4 (first local below saved bpr)
in_use: HashMap::new(),
}
}
/// Allocate a temporary register for expression evaluation
/// Returns the register name and optionally assembly code to save it
pub fn alloc_temp(&mut self) -> Result<(String, Vec<String>), String> {
let mut code = Vec::new();
// Try to find an unused register
for reg in &self.available_registers {
if !self.in_use.get(reg).unwrap_or(&false) {
self.in_use.insert(reg.clone(), true);
return Ok((reg.clone(), code));
}
}
// All registers in use - need to spill one
// Choose the first register with a variable we can spill
// Find a register to spill
let reg_to_spill = self
.available_registers
.iter()
.find(|reg| self.register_contents.contains_key(*reg))
.cloned();
if let Some(reg) = reg_to_spill {
// Spill this variable to stack
let spill_code = self.spill_register(&reg)?;
code.extend(spill_code);
self.in_use.insert(reg.clone(), true);
return Ok((reg, code));
}
Err("No registers available and nothing to spill".to_string())
}
/// Free a temporary register after use
/// NOTE: This will NOT free registers that contain variables!
/// Variables persist throughout their scope and must not be freed
pub fn free_temp(&mut self, reg: &str) {
// Check if this register contains a variable
if self.register_contents.contains_key(reg) {
// This register holds a variable - don't free it!
// Variables are only freed when they go out of scope via free_var()
return;
}
// This is a true temporary - safe to free
self.in_use.insert(reg.to_string(), false);
}
/// Allocate a register for a named variable
/// Returns the register and any necessary assembly code
pub fn alloc_var(&mut self, var_name: &str) -> Result<(String, Vec<String>), String> {
// Check if variable already has a location
if let Some(location) = self.variable_locations.get(var_name).cloned() {
match location {
Location::Register(reg) => {
return Ok((reg.clone(), Vec::new()));
}
Location::Stack(offset) => {
// Variable is on stack, load it into a register
let (reg, mut code) = self.alloc_temp()?;
code.push(format!("\tldw bpr, {}, {}", reg, offset));
// Update location to register
self.variable_locations
.insert(var_name.to_string(), Location::Register(reg.clone()));
self.register_contents
.insert(reg.clone(), var_name.to_string());
return Ok((reg, code));
}
}
}
// Variable doesn't have a location yet, allocate a new register
let (reg, code) = self.alloc_temp()?;
self.variable_locations
.insert(var_name.to_string(), Location::Register(reg.clone()));
self.register_contents
.insert(reg.clone(), var_name.to_string());
Ok((reg, code))
}
/// Get the current location of a variable
pub fn get_var_location(&self, var_name: &str) -> Option<&Location> {
self.variable_locations.get(var_name)
}
/// Load a variable into a register (allocating if necessary)
/// Returns the register and assembly code to load it
pub fn load_var(&mut self, var_name: &str) -> Result<(String, Vec<String>), String> {
self.alloc_var(var_name)
}
/// Store a value from a register into a variable
/// Updates tracking and returns any necessary assembly code
pub fn store_var(&mut self, var_name: &str, source_reg: &str) -> Vec<String> {
let mut code = Vec::new();
// Check if variable already has a location
if let Some(location) = self.variable_locations.get(var_name) {
match location {
Location::Register(dest_reg) => {
if dest_reg != source_reg {
code.push(format!("\tmov {}, {}", source_reg, dest_reg));
}
}
Location::Stack(offset) => {
code.push(format!("\tstw {}, bpr, {}", source_reg, offset));
}
}
} else {
// Variable doesn't exist yet - try to allocate a register
if let Some(free_reg) = self.find_free_register() {
if &free_reg != source_reg {
code.push(format!("\tmov {}, {}", source_reg, free_reg));
}
self.variable_locations
.insert(var_name.to_string(), Location::Register(free_reg.clone()));
self.register_contents
.insert(free_reg.clone(), var_name.to_string());
self.in_use.insert(free_reg, true);
} else {
// No free registers - allocate on stack
code.push(format!("\tstw {}, bpr, {}", source_reg, self.stack_offset));
self.variable_locations
.insert(var_name.to_string(), Location::Stack(self.stack_offset));
self.stack_offset -= 4; // Move to next stack slot
}
}
code
}
/// Spill a register to the stack
/// Returns assembly code to perform the spill
fn spill_register(&mut self, reg: &str) -> Result<Vec<String>, String> {
let mut code = Vec::new();
if let Some(var_name) = self.register_contents.get(reg).cloned() {
// Store register content to stack
code.push(format!("\tstw {}, bpr, {}", reg, self.stack_offset));
// Update variable location
self.variable_locations
.insert(var_name.clone(), Location::Stack(self.stack_offset));
// Remove from register tracking
self.register_contents.remove(reg);
// Move to next stack slot
self.stack_offset -= 4;
}
Ok(code)
}
/// Find a free register (not currently in use)
fn find_free_register(&self) -> Option<String> {
for reg in &self.available_registers {
if !self.in_use.get(reg).unwrap_or(&false) {
return Some(reg.clone());
}
}
None
}
/// Spill all registers to stack (useful before function calls)
pub fn spill_all(&mut self) -> Vec<String> {
let mut code = Vec::new();
let regs_to_spill: Vec<String> = self.register_contents.keys().cloned().collect();
for reg in regs_to_spill {
if let Ok(spill_code) = self.spill_register(&reg) {
code.extend(spill_code);
}
}
code
}
/// Get the total stack space needed for local variables
pub fn get_stack_size(&self) -> i32 {
-self.stack_offset // Convert negative offset to positive size
}
/// Reset allocator for a new function
pub fn reset(&mut self) {
self.variable_locations.clear();
self.register_contents.clear();
self.stack_offset = -4;
self.in_use.clear();
}
/// Mark a variable as dead (no longer needed)
/// Frees its register if it's in one
pub fn free_var(&mut self, var_name: &str) {
if let Some(Location::Register(reg)) = self.variable_locations.get(var_name) {
let reg = reg.clone();
self.register_contents.remove(&reg);
self.in_use.insert(reg, false);
}
self.variable_locations.remove(var_name);
}
/// Get list of registers that contain variables and are in use
/// These need to be saved before function calls
pub fn get_caller_saved_registers(&self) -> Vec<String> {
self.register_contents
.iter()
.filter(|(reg, _)| *self.in_use.get(*reg).unwrap_or(&false))
.map(|(reg, _)| reg.clone())
.collect()
}
/// Save caller-saved registers before a function call
/// Returns assembly code to save them
pub fn save_caller_saved(&mut self) -> Vec<String> {
let mut code = Vec::new();
// For simplicity, save all currently used registers
// In a more sophisticated compiler, you'd only save registers that are live
for (reg, var_name) in self.register_contents.clone() {
if *self.in_use.get(&reg).unwrap_or(&false) {
code.push(format!("\tpush {}", reg));
}
}
code
}
/// Restore caller-saved registers after a function call
/// Returns assembly code to restore them
pub fn restore_caller_saved(&mut self, saved_regs: &[String]) -> Vec<String> {
let mut code = Vec::new();
// Restore in reverse order (LIFO)
for reg in saved_regs.iter().rev() {
code.push(format!("\tpop {}", reg));
}
code
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_basic_allocation() {
let mut allocator = RegisterAllocator::new();
let (reg1, code1) = allocator.alloc_temp().unwrap();
assert_eq!(code1.len(), 0); // No spill needed
assert_eq!(reg1, "rg0");
let (reg2, code2) = allocator.alloc_temp().unwrap();
assert_eq!(code2.len(), 0);
assert_eq!(reg2, "rg1");
allocator.free_temp(&reg1);
let (reg3, code3) = allocator.alloc_temp().unwrap();
assert_eq!(code3.len(), 0);
assert_eq!(reg3, "rg0"); // Reuses freed register
}
#[test]
fn test_variable_allocation() {
let mut allocator = RegisterAllocator::new();
let (reg, _) = allocator.alloc_var("x").unwrap();
assert_eq!(reg, "rg0");
// Requesting same variable again should return same register
let (reg2, _) = allocator.alloc_var("x").unwrap();
assert_eq!(reg2, "rg0");
}
#[test]
fn test_stack_allocation() {
let mut allocator = RegisterAllocator::new();
// Allocate all 16 registers
for i in 0..16 {
allocator.alloc_var(&format!("var{}", i)).unwrap();
}
// Next allocation should spill to stack
let (reg, code) = allocator.alloc_var("var16").unwrap();
assert!(code.len() > 0); // Should have spill code
}
}
-3985
View File
File diff suppressed because it is too large Load Diff
+139
View File
@@ -0,0 +1,139 @@
// GENERATED BY DSA-C COMPILER
// Generated at 2026-01-31 01:39:55
// Imports
include maths: "./lib/maths/core.dsa"
include print: "lib/io/print.dsa"
// Globals & Reserved Memory
// Entry Point
dw stack: 0x10000
db message: "Process Exited with code:"
_init:
ldw stack, bpr
mov bpr, spr
push zero
call main
call print::print_newline
lwi message, rg0
push rg0
call print::print
pop zero
call print::print_hex_word
pop zero
hlt
// Function return boilerplate
_ret:
mov bpr, spr
pop bpr
return
factorial:
push bpr
mov spr, bpr
ldw bpr, rg0, 8
lli 1, rg1
cmp rg0, rg1
lli 0, rg2
jgt _cmp_end_1
lli 1, rg2
_cmp_end_1:
cmp rg2, zero
jeq _else_3
_then_2:
lli 1, rg1
stw rg1, bpr, 8
jmp _ret
jmp _end_4
_else_3:
nop
_end_4:
push rg0
lli 1, rg1
sub rg0, rg1, rg2
push rg2
call factorial
pop rg1
pop rg0
push rg1
push rg0
call maths::multiply
pop rg2
pop zero
stw rg2, bpr, 8
jmp _ret
add_:
push bpr
mov spr, bpr
ldw bpr, rg0, 8
ldw bpr, rg1, 12
add rg0, rg1, rg2
stw rg2, bpr, 8
jmp _ret
greater:
push bpr
mov spr, bpr
ldw bpr, rg0, 8
ldw bpr, rg1, 12
add rg0, rg0, rg2
add rg1, rg1, rg3
cmp rg2, rg3
lli 0, rg4
jle _cmp_end_5
lli 1, rg4
_cmp_end_5:
cmp rg4, zero
jeq _else_7
_then_6:
stw rg0, bpr, 8
jmp _ret
jmp _end_8
_else_7:
add rg1, rg0, rg2
stw rg2, bpr, 8
jmp _ret
_end_8:
jmp _ret
main:
push bpr
mov spr, bpr
lli 5, rg0
push rg0
lli 5, rg1
push rg1
call add_
pop rg2
pop zero
push rg2
lli 5, rg0
push rg0
call greater
pop rg1
pop zero
push rg1
call print::print_num
pop rg0
lli 5, rg0
push rg0
call factorial
pop rg1
push rg1
call print::print_num
pop rg0
lli 0, rg0
stw rg0, bpr, 8
jmp _ret
+64
View File
@@ -28,6 +28,11 @@
// push pcx
// jmp print::print_word
//
// usage for print_num:
// push (register containing number to print in decimal)
// push pcx
// jmp print::print_num
//
include maths "../maths/core.dsa"
@@ -209,6 +214,65 @@ print_newline:
// _end saves the display state
jmp _end
// ------------------------------------------
// prints arg[0] as a decimal number to the screen.
print_num:
push bpr
mov spr, bpr
ldw bpr, rg0, 8 // load number to print
lli 0, rg5 // rg5 = digit counter
// check if number is zero
cmp rg0, zero
jne _print_num_extract_digits
// special case: print '0' for zero
lli 0x30, rg6
push rg6 // push digit to stack buffer
lli 1, rg5 // we have 1 digit
jmp _print_num_output
_print_num_extract_digits:
// divide by 10 repeatedly to get digits
cmp rg0, zero
jeq _print_num_output
// call divmod(rg0, 10)
push rg0 // dividend
lli 10, rg1
push rg1 // divisor (10)
call maths::divmod
pop rg0 // quotient (continue dividing this)
pop rg1 // remainder (the digit)
// convert digit to ASCII and push to stack buffer
addi rg1, 0x30, rg6 // convert to ASCII
push rg6 // push digit to stack
inc rg5 // increment digit counter
jmp _print_num_extract_digits
_print_num_output:
// now print digits (pop them off in reverse order)
ldw current, rg1 // get display pointer
_print_num_output_loop:
// check if we've printed all digits
cmp rg5, zero
jeq _print_num_done
// pop digit and print it
pop rg6
stb rg6, rg1
addi rg1, 1
dec rg5
jmp _print_num_output_loop
_print_num_done:
jmp _end
// ------------------------------------------
// resets the cursor position on the screen to 0x20000. (0,0)
reset:
+1
View File
@@ -16,6 +16,7 @@ multiply:
ldw bpr, rg0, 8 // load op 2
ldw bpr, rg1, 12 // load op 1
lwi 0, rg2 // initialise rg2 to zero
_multiply_loop:
add rg2, rg0, rg2
+6 -44
View File
@@ -16,55 +16,17 @@ init:
dw string: "hello world"
start:
lwi 37, rg0
lwi 12, rg1
lwi 1, rg0
lwi 2, rg1
push rg0
push rg1
call maths::divmod
pop rg0 // result
pop rg1 // remainder
push rg1
push rg0
call print::print_hex_byte
call print::print_whitespace
call maths::multiply
pop rg0
pop zero
call print::print_hex_byte
call print::print_newline
lwi string, rg0
//lwi 10, rg0
pusha 4
push rg0
call print::print
//call fib::fib_n
call print::print_num
pop zero
call print::print_newline
popa 4
pusha 4
push rg0
call print::print
//call fib::fib_n
pop zero
call print::print_newline
popa 4
pusha 4
push rg0
call print::print
//call fib::fib_n
pop zero
call print::print_newline
popa 4
pusha 4
push rg0
call print::print
//call fib::fib_n
pop zero
call print::print_newline
popa 4
hlt
View File