Merge compiler and emulator progress from last few months into main. #11

Merged
zxq5 merged 55 commits from compiler into main 2026-02-14 11:54:15 +00:00
10 changed files with 466 additions and 79 deletions
Showing only changes of commit 52ef7872f0 - Show all commits
+1 -1
View File
@@ -7,6 +7,6 @@ int factorial(int n) {
int main() {
int res = factorial(3);
print(res);
printnum(res);
return 0;
}
+27
View File
@@ -0,0 +1,27 @@
include print: "lib/io/print.dsa"
int factorial(int n) {
if (n <= 1) {
return 1;
}
return n * factorial(n - 1);
}
int add_(int a, int b) {
return a + b;
}
int greater(int a, int b) {
if (a + a > b + b) {
return a;
} else {
return b + a;
}
}
int main() {
printnum(greater(5, add_(5, 5)));
printnum(factorial(5));
return 0;
}
+106
View File
@@ -0,0 +1,106 @@
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
#[non_exhaustive]
pub enum Register {
// general purpose registers
Rg0,
Rg1,
Rg2,
Rg3,
Rg4,
Rg5,
Rg6,
Rg7,
Rg8,
Rg9,
Rga,
Rgb,
Rgc,
Rgd,
Rge,
Rgf,
// special purpose registers
Acc,
Spr,
Bpr,
Ret,
Idr,
Mmr,
Zero,
NoReg,
// system registers - can't be written to by instructions.
Mar,
Mdr,
Sts,
Cir,
Pcx,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u8)]
#[non_exhaustive]
/// A list of all current instructions in the DSA Assembly language.
pub enum Instruction {
// No-op
Nop = 0x0,
// Data transfer instructions
Mov(Register, Register) = 0x1,
Movs(Register, Register) = 0x2,
Ldb(Register, Register, Option<u32>) = 0x3,
Ldbs(Register, Register, Option<u32>) = 0x4,
Ldh(Register, Register, Option<u32>) = 0x5,
Ldhs(Register, Register, Option<u32>) = 0x6,
Ldw(Register, Register, Option<u32>) = 0x7,
Stb(Register, Register, Option<u32>) = 0x8,
Sth(Register, Register, Option<u32>) = 0x9,
Stw(Register, Register, Option<u32>) = 0xA,
Lli(u16, Register) = 0xB,
Lui(u16, Register) = 0xC,
// Jump Instructions
Jump(u16, Register) = 0xD,
JumpEq(u16, Register) = 0xE,
JumpNeq(u16, Register) = 0xF,
JumpGt(u16, Register) = 0x10,
JumpGe(u16, Register) = 0x11,
JumpLt(u16, Register) = 0x12,
JumpLe(u16, Register) = 0x13,
// Comparison
Compare(Register, Register) = 0x14,
// // Arithmetic
// Add(args::RTypeArgs) = 0x19,
// Sub(args::RTypeArgs) = 0x1A,
// Increment(args::RTypeArgs) = 0x15,
// Decrement(args::RTypeArgs) = 0x16,
// ShiftLeft(args::RTypeArgs) = 0x17,
// ShiftRight(args::RTypeArgs) = 0x18,
// // Logical
// And(args::RTypeArgs) = 0x1B,
// Or(args::RTypeArgs) = 0x1C,
// Not(args::RTypeArgs) = 0x1D,
// Xor(args::RTypeArgs) = 0x1E,
// Nand(args::RTypeArgs) = 0x1F,
// Nor(args::RTypeArgs) = 0x20,
// Xnor(args::RTypeArgs) = 0x21,
// // Misc
// Interrupt(Interrupt) = 0x22,
// IntReturn = 0x23,
// Halt = 0x24,
// // Immediate Arithmetic
// AddImmediate(args::ITypeArgs) = 0x25,
// SubImmediate(args::ITypeArgs) = 0x26,
// Fake Instructions
Data(u32) = 0x3E,
Segment(u32) = 0x3F,
}
+63 -33
View File
@@ -1,4 +1,5 @@
use std::hash::Hash;
use std::sync::LazyLock;
use std::sync::atomic::AtomicU32;
use std::time::SystemTime;
use std::{collections::HashMap, path::PathBuf};
@@ -18,23 +19,32 @@ pub struct CodeGenerator {
imports: HashMap<String, String>,
globals: Vec<String>,
functions: Vec<String>,
symbols: Vec<String>,
allocator: RegisterAllocator,
call_stack: Vec<String>,
}
static GLOBAL_METHODS: LazyLock<HashMap<&str, &str>> = LazyLock::new(|| {
hash_map! {
"print" => "print::print",
"printnum" => "print::print_num"
}
});
fn import(name: &str, path: &str) -> String {
format!("include {name}: \"{}\"", path)
}
impl CodeGenerator {
const RET: &'static str = "\tjmp _ret";
pub fn new(ast: Program) -> Self {
CodeGenerator {
ast,
imports: HashMap::new(),
globals: Vec::new(),
functions: Vec::new(),
symbols: Vec::new(),
allocator: RegisterAllocator::new(),
call_stack: Vec::new(),
}
}
@@ -47,11 +57,15 @@ impl CodeGenerator {
self.include("print", "./lib/io/print.dsa");
for block in self.ast.clone().declarations {
self.generate_block(block.clone())?;
match block {
Declaration::Variable { name, .. } => self.symbols.push(name),
Declaration::Function { name, .. } => self.symbols.push(name),
Declaration::Import { name, .. } => self.symbols.push(name),
}
}
for func in &self.functions {
println!("{func}");
for block in self.ast.clone().declarations {
self.generate_block(block.clone())?;
}
self.generate_layout()
@@ -88,6 +102,7 @@ impl CodeGenerator {
dsa![mov bpr, spr],
dsa![push zero],
dsa![call main],
dsa![call print::print_newline],
dsa![lwi message, rg0],
dsa![push rg0],
dsa![call print::print],
@@ -96,6 +111,13 @@ impl CodeGenerator {
dsa![pop zero],
dsa![hlt]
],
"",
comment!("Function return boilerplate"),
block! [ "_ret"
dsa![mov bpr, spr],
dsa![pop bpr],
dsa![return]
],
// block! [ "main"
// dsa![push bpr],
// dsa![mov spr, bpr],
@@ -131,6 +153,9 @@ impl CodeGenerator {
self.functions.push(format!("{func}\n"));
}
Declaration::Import { name, path } => {
self.imports.insert(name, path);
}
};
Ok(())
@@ -143,8 +168,6 @@ impl CodeGenerator {
params: &[Parameter],
body: &[Statement],
) -> Vec<String> {
self.call_stack.push(name.to_string());
let mut code = Vec::new();
// Reset allocator for new function
@@ -154,6 +177,7 @@ impl CodeGenerator {
code.push(format!("{}:", name));
code.push("\tpush bpr".to_string());
code.push("\tmov spr, bpr".to_string());
code.push(String::new());
// Allocate parameters to registers or stack locations
for (i, param) in params.iter().enumerate() {
@@ -170,13 +194,14 @@ impl CodeGenerator {
code.extend(stmt_code);
}
// Function epilogue
code.push(format!("_ret_{name}:"));
code.push("\tmov bpr, spr".to_string());
code.push("\tpop bpr".to_string());
code.push("\treturn".to_string());
// automatically return at function end
if let Some(x) = code.last()
&& x == Self::RET
{
} else {
code.push(Self::RET.to_string());
}
self.call_stack.pop();
code
}
@@ -212,7 +237,7 @@ impl CodeGenerator {
let (result_reg, expr_code) = self.generate_expression(e)?;
code.extend(expr_code);
code.push(format!("\tstw {}, bpr, 8", result_reg));
code.push(format!("\tjmp _ret_{}", self.call_stack.last().unwrap()));
code.push(format!("\tjmp _ret"));
self.allocator.free_temp(&result_reg);
}
}
@@ -419,7 +444,7 @@ impl CodeGenerator {
_ => return Err(format!("Unsupported binary operator: {:?}", op)),
}
// Free operand registers
// Free operand registers (allocator will protect variables)
self.allocator.free_temp(&left_reg);
self.allocator.free_temp(&right_reg);
@@ -427,9 +452,11 @@ impl CodeGenerator {
}
Expression::Call { name, args } => {
// Save caller-saved registers
let save_code = self.allocator.save_caller_saved();
code.extend(save_code);
// Save caller-saved registers and track which ones we saved
let saved_regs = self.allocator.get_caller_saved_registers();
for reg in &saved_regs {
code.push(format!("\tpush {}", reg));
}
// Evaluate and push arguments in reverse order
let mut arg_regs = Vec::new();
@@ -440,18 +467,31 @@ impl CodeGenerator {
arg_regs.push(arg_reg);
}
if self.functions.contains_key(name) {
if GLOBAL_METHODS.contains_key(name.as_str()) {
code.push(format!("\tcall {}", GLOBAL_METHODS[name.as_str()]));
} else if self.symbols.contains(name) {
// Call local function
code.push(format!("\tcall {}", name));
} else {
return Err(format!("undefined function {name}"));
}
if self.imports
// Result is in rg0, allocate a register and move it
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
code.extend(result_alloc);
code.push(format!("\tpop {}", result_reg));
// Clean up arguments
for _ in 0..args.len() {
code.push("\tpop zero".to_string());
if args.len() > 1 {
for _ in 0..(args.len() - 1) {
code.push("\tpop zero".to_string());
}
}
// Restore caller-saved registers in reverse order (LIFO)
for reg in saved_regs.iter().rev() {
code.push(format!("\tpop {}", reg));
}
// Free argument registers
@@ -459,16 +499,6 @@ impl CodeGenerator {
self.allocator.free_temp(&reg);
}
// Result is in rg0, allocate a register and move it
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
code.extend(result_alloc);
if result_reg != "rg0" {
code.push(format!("\tmov rg0, {}", result_reg));
}
// Restore caller-saved registers (simplified - you'd track which ones)
Ok((result_reg, code))
}
+70
View File
@@ -10,10 +10,13 @@ pub enum TokenType {
Else,
While,
Return,
Include,
// Identifiers and literals
Identifier(String),
Number(i32),
String(String),
Char(char),
// Operators
Plus,
@@ -35,10 +38,22 @@ pub enum TokenType {
RBrace,
Semicolon,
Comma,
Colon,
Namespace,
Eof,
}
pub enum Type {
Int32,
Int16,
Int8,
Uint32,
Uint16,
Uint8,
Char,
}
#[derive(Debug, Clone)]
pub struct Token {
pub token_type: TokenType,
@@ -150,6 +165,45 @@ impl Lexer {
ident
}
fn read_string(&mut self) -> Result<String, String> {
let mut string = String::new();
self.advance(); // Consume the opening quote
while let Some(ch) = self.peek(0) {
if ch == '"' {
self.advance(); // Consume the closing quote
return Ok(string);
} else if ch == '\\' {
self.advance(); // Consume the backslash
if let Some(escaped_char) = self.peek(0) {
string.push(escaped_char);
self.advance();
}
} else {
string.push(ch);
self.advance();
}
}
Err(String::from("Unexpected EOF"))
}
fn read_char(&mut self) -> Result<char, String> {
self.advance(); // Consume the opening quote
if let Some(ch) = self.peek(0) {
self.advance();
if self.peek(0) == Some('\'') {
self.advance();
return Ok(ch);
} else {
Err(String::from("expected closing quote"))
}
} else {
Err(String::from("expected character"))
}
}
pub fn tokenize(&mut self) -> Result<Vec<Token>, String> {
let mut tokens = Vec::new();
@@ -168,6 +222,12 @@ impl Lexer {
let token_type = if ch.is_ascii_digit() {
let num = self.read_number();
TokenType::Number(num)
} else if ch == '"' {
let string = self.read_string()?;
TokenType::String(string)
} else if ch == '\'' {
let char = self.read_char()?;
TokenType::Char(char)
} else if ch.is_alphabetic() || ch == '_' {
let ident = self.read_identifier();
match ident.as_str() {
@@ -176,10 +236,20 @@ impl Lexer {
"else" => TokenType::Else,
"while" => TokenType::While,
"return" => TokenType::Return,
"include" => TokenType::Include,
_ => TokenType::Identifier(ident),
}
} else {
match ch {
':' if self.peek(1) == Some(':') => {
self.advance();
self.advance();
TokenType::Namespace
}
':' => {
self.advance();
TokenType::Colon
}
'=' if self.peek(1) == Some('=') => {
self.advance();
self.advance();
+3
View File
@@ -1,7 +1,10 @@
#![feature(hash_map_macro)]
use std::fmt;
use crate::{codegen::CodeGenerator, lexer::Lexer, parser::Parser};
// mod assembly;
pub mod codegen;
pub mod lexer;
pub mod parser;
+30
View File
@@ -23,6 +23,10 @@ pub enum Declaration {
name: String,
init: Option<ConstExpr>,
},
Import {
name: String,
path: String,
},
}
#[derive(Debug, Clone)]
@@ -225,6 +229,32 @@ impl Parser {
}
fn parse_declaration(&mut self) -> Result<Declaration, String> {
// check for an import
if let TokenType::Include = self.current().token_type {
self.advance();
let name =
if let TokenType::Identifier(id) = self.current().clone().token_type {
Some(id)
} else {
None
}
.ok_or(String::from("Expected identifier"))?;
self.advance();
self.expect(TokenType::Colon)?;
let path = if let TokenType::String(id) = self.current().clone().token_type {
Some(id)
} else {
None
}
.ok_or(String::from("Expected string literal"))?;
self.advance();
return Ok(Declaration::Import { name, path });
}
self.expect(TokenType::Int)?;
let name = match &self.current().token_type {
+20
View File
@@ -81,7 +81,17 @@ impl RegisterAllocator {
}
/// Free a temporary register after use
/// NOTE: This will NOT free registers that contain variables!
/// Variables persist throughout their scope and must not be freed
pub fn free_temp(&mut self, reg: &str) {
// Check if this register contains a variable
if self.register_contents.contains_key(reg) {
// This register holds a variable - don't free it!
// Variables are only freed when they go out of scope via free_var()
return;
}
// This is a true temporary - safe to free
self.in_use.insert(reg.to_string(), false);
}
@@ -243,6 +253,16 @@ impl RegisterAllocator {
self.variable_locations.remove(var_name);
}
/// Get list of registers that contain variables and are in use
/// These need to be saved before function calls
pub fn get_caller_saved_registers(&self) -> Vec<String> {
self.register_contents
.iter()
.filter(|(reg, _)| *self.in_use.get(*reg).unwrap_or(&false))
.map(|(reg, _)| reg.clone())
.collect()
}
/// Save caller-saved registers before a function call
/// Returns assembly code to save them
pub fn save_caller_saved(&mut self) -> Vec<String> {
+139
View File
@@ -0,0 +1,139 @@
// GENERATED BY DSA-C COMPILER
// Generated at 2026-01-31 01:39:55
// Imports
include maths: "./lib/maths/core.dsa"
include print: "lib/io/print.dsa"
// Globals & Reserved Memory
// Entry Point
dw stack: 0x10000
db message: "Process Exited with code:"
_init:
ldw stack, bpr
mov bpr, spr
push zero
call main
call print::print_newline
lwi message, rg0
push rg0
call print::print
pop zero
call print::print_hex_word
pop zero
hlt
// Function return boilerplate
_ret:
mov bpr, spr
pop bpr
return
factorial:
push bpr
mov spr, bpr
ldw bpr, rg0, 8
lli 1, rg1
cmp rg0, rg1
lli 0, rg2
jgt _cmp_end_1
lli 1, rg2
_cmp_end_1:
cmp rg2, zero
jeq _else_3
_then_2:
lli 1, rg1
stw rg1, bpr, 8
jmp _ret
jmp _end_4
_else_3:
nop
_end_4:
push rg0
lli 1, rg1
sub rg0, rg1, rg2
push rg2
call factorial
pop rg1
pop rg0
push rg1
push rg0
call maths::multiply
pop rg2
pop zero
stw rg2, bpr, 8
jmp _ret
add_:
push bpr
mov spr, bpr
ldw bpr, rg0, 8
ldw bpr, rg1, 12
add rg0, rg1, rg2
stw rg2, bpr, 8
jmp _ret
greater:
push bpr
mov spr, bpr
ldw bpr, rg0, 8
ldw bpr, rg1, 12
add rg0, rg0, rg2
add rg1, rg1, rg3
cmp rg2, rg3
lli 0, rg4
jle _cmp_end_5
lli 1, rg4
_cmp_end_5:
cmp rg4, zero
jeq _else_7
_then_6:
stw rg0, bpr, 8
jmp _ret
jmp _end_8
_else_7:
add rg1, rg0, rg2
stw rg2, bpr, 8
jmp _ret
_end_8:
jmp _ret
main:
push bpr
mov spr, bpr
lli 5, rg0
push rg0
lli 5, rg1
push rg1
call add_
pop rg2
pop zero
push rg2
lli 5, rg0
push rg0
call greater
pop rg1
pop zero
push rg1
call print::print_num
pop rg0
lli 5, rg0
push rg0
call factorial
pop rg1
push rg1
call print::print_num
pop rg0
lli 0, rg0
stw rg0, bpr, 8
jmp _ret
+7 -45
View File
@@ -16,55 +16,17 @@ init:
dw string: "hello world"
start:
lwi 37, rg0
lwi 12, rg1
push rg0
push rg1
call maths::divmod
pop rg0 // result
pop rg1 // remainder
lwi 1, rg0
lwi 2, rg1
push rg1
push rg0
call print::print_hex_byte
call print::print_whitespace
push rg1
call maths::multiply
pop rg0
pop zero
call print::print_hex_byte
call print::print_newline
lwi string, rg0
//lwi 10, rg0
pusha 4
push rg0
call print::print
//call fib::fib_n
pop zero
call print::print_newline
popa 4
pusha 4
push rg0
call print::print
//call fib::fib_n
pop zero
call print::print_newline
popa 4
pusha 4
push rg0
call print::print
//call fib::fib_n
pop zero
call print::print_newline
popa 4
pusha 4
push rg0
call print::print
//call fib::fib_n
pop zero
call print::print_newline
popa 4
call print::print_num
pop zero
hlt