continued on register allocator rewrite, slow progress as scoping is

proving to be a challenge
This commit is contained in:
2026-02-14 02:46:29 +00:00
parent d66baf6f99
commit 201b18069b
10 changed files with 1153 additions and 790 deletions
+1
View File
@@ -7,3 +7,4 @@ authors.workspace = true
[dependencies]
chrono = "0.4.43"
common = { path = "../common" }
uuid = { version = "1.20.0", features = ["v4"] }
File diff suppressed because it is too large Load Diff
+283 -124
View File
@@ -1,58 +1,6 @@
use std::fmt;
use crate::{
backend::dsa::registers::Register,
model::{CompilerError, Expression},
};
pub struct CodeGen {
// For building the final program
program: InsBlock,
// For generating temporary blocks
label_counter: usize,
stack_offset: i32,
}
impl CodeGen {
pub fn new() -> Self {
Self {
program: InsBlock::new(),
label_counter: 0,
stack_offset: 0,
}
}
/// Emit directly to program (for top-level constructs)
pub fn emit(&mut self, instr: Instruction) {
self.program.push(instr);
}
/// Emit a block to program
pub fn emit_block(&mut self, block: InsBlock) {
self.program.append(block);
}
/// Build expression (returns block for composition)
pub fn build_expr(&mut self, expr: &Expression) -> Result<InsBlock, CompilerError> {
// ... returns InstrBlock
todo!()
}
/// Get final output
pub fn finish(mut self) -> String {
// Optimize before final output
// self.program.remove_dead_code();
// self.program.optimize_peephole();
self.program
.instructions
.iter()
.map(|i| i.to_string())
.collect::<Vec<_>>()
.join("\n")
}
}
use crate::backend::dsa::registers::Register;
pub struct InsBlock {
instructions: Vec<Instruction>,
@@ -65,6 +13,10 @@ impl InsBlock {
}
}
pub fn insert(&mut self, index: usize, instr: Instruction) {
self.instructions.insert(index, instr);
}
pub fn push(&mut self, instr: Instruction) {
self.instructions.push(instr);
}
@@ -90,11 +42,56 @@ impl InsBlock {
}
}
impl From<Vec<Instruction>> for InsBlock {
fn from(instructions: Vec<Instruction>) -> Self {
Self { instructions }
}
}
impl From<Instruction> for InsBlock {
fn from(instr: Instruction) -> Self {
Self {
instructions: vec![instr],
}
}
}
#[derive(Debug, Clone)]
pub enum Instruction {
// Labels and comments
Label(Label),
Comment(String),
// Data Directives
Db {
label: String,
data: Vec<u8>,
},
Dh {
label: String,
data: Vec<u16>,
},
Dw {
label: String,
data: Vec<u32>,
},
DString {
// alias for db.
label: String,
data: String,
},
Resx {
label: String,
size: u32,
},
// Include
Include {
name: String,
path: String,
},
// Data movement
Mov {
src: Register,
@@ -107,28 +104,28 @@ pub enum Instruction {
// Memory operations
Ldb {
addr: MemOperand,
src: MemOperand,
dest: Register,
},
Ldh {
addr: MemOperand,
src: MemOperand,
dest: Register,
},
Ldw {
addr: MemOperand,
src: MemOperand,
dest: Register,
},
Stb {
src: Register,
addr: MemOperand,
dest: MemOperand,
},
Sth {
src: Register,
addr: MemOperand,
dest: MemOperand,
},
Stw {
src: Register,
addr: MemOperand,
dest: MemOperand,
},
// Immediate loads
@@ -140,6 +137,14 @@ pub enum Instruction {
imm: Imm,
dest: Register,
},
Lwi {
imm: Imm,
dest: Register,
},
LwiLabel {
label: String,
dest: Register,
},
// Arithmetic
Add {
@@ -214,8 +219,8 @@ pub enum Instruction {
},
Shr {
src1: Register,
rsh: Register,
ish: u16,
r_shamt: Register,
i_shamt: u16,
dest: Register,
},
@@ -270,23 +275,67 @@ pub enum Instruction {
},
}
pub enum DataDirective {
U8(Vec<u8>),
U16(Vec<u16>),
U32(Vec<u32>),
String(String),
Char(char),
}
impl fmt::Display for Instruction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::Label(l) => write!(f, "{}:", l),
Self::Comment(c) => write!(f, "; {}", c),
Self::Comment(c) => write!(f, "// {}", c),
Self::Include { name, path } => write!(f, "include {name}: \"{}\"", path),
Self::Db { label, data } => write!(
f,
"db {}: {}",
label,
data.iter()
.map(|&b| format!("{:#04X}", b))
.collect::<Vec<String>>()
.join(", ")
),
Self::Dh { label, data } => write!(
f,
"dh {}: {}",
label,
data.iter()
.map(|&b| format!("{:#06X}", b))
.collect::<Vec<String>>()
.join(", ")
),
Self::Dw { label, data } => write!(
f,
"dw {}: {}",
label,
data.iter()
.map(|&b| format!("{:#08X}", b))
.collect::<Vec<String>>()
.join(", ")
),
Self::DString { label, data } => write!(f, "db {}: \"{}\"", label, data),
Self::Resx { label, size } => write!(f, "resx {}: {}", label, size),
Self::Mov { src, dest } => write!(f, " mov {}, {}", src, dest),
Self::Movs { src, dest } => write!(f, " movs {}, {}", src, dest),
Self::Ldb { addr, dest } => {
write!(f, " ldb {}, {}", format_mem_operand(addr), dest)
Self::Ldb { src: addr, dest } => {
let (reg, offset) = reg_and_offset(addr);
write!(f, " ldb {}, {}, {}", reg, dest, offset)
}
Self::Ldh { addr, dest } => {
write!(f, " ldh {}, {}", format_mem_operand(addr), dest)
Self::Ldh { src: addr, dest } => {
let (reg, offset) = reg_and_offset(addr);
write!(f, " ldh {}, {}, {}", reg, dest, offset)
}
Self::Ldw { addr, dest } => {
write!(f, " ldw {}, {}", format_mem_operand(addr), dest)
Self::Ldw { src, dest } => {
let (reg, offset) = reg_and_offset(src);
write!(f, " ldw {}, {}, {}", reg, dest, offset)
}
// Self::Ldbs { addr, dest } => {
// write!(f, " ldbs {}, {}", format_mem_operand(addr), dest)
@@ -297,18 +346,23 @@ impl fmt::Display for Instruction {
// Self::Ldws { addr, dest } => {
// write!(f, " ldws {}, {}", format_mem_operand(addr), dest)
// }
Self::Stb { src, addr } => {
write!(f, " stb {}, {}", src, format_mem_operand(addr))
Self::Stb { src, dest: addr } => {
let (reg, offset) = reg_and_offset(addr);
write!(f, " stb {}, {}, {}", src, reg, offset)
}
Self::Sth { src, addr } => {
write!(f, " sth {}, {}", src, format_mem_operand(addr))
Self::Sth { src, dest: addr } => {
let (reg, offset) = reg_and_offset(addr);
write!(f, " sth {}, {}, {}", src, reg, offset)
}
Self::Stw { src, addr } => {
write!(f, " stw {}, {}", src, format_mem_operand(addr))
Self::Stw { src, dest: addr } => {
let (reg, offset) = reg_and_offset(addr);
write!(f, " stw {}, {}, {}", src, reg, offset)
}
Self::Lli { imm, dest } => write!(f, " lli {}, {}", imm, dest),
Self::Lui { imm, dest } => write!(f, " lui {}, {}", imm, dest),
Self::Lwi { imm, dest } => write!(f, " lwi {}, {}", imm, dest),
Self::LwiLabel { label, dest } => write!(f, " lwi {}, {}", label, dest),
// arithmetic
Self::Add { src1, src2, dest } => {
@@ -340,16 +394,16 @@ impl fmt::Display for Instruction {
}
Self::IAdd { src, imm, dest } => {
if let Some(d) = dest {
write!(f, " iadd {}, {}, {}", src, imm, d)
write!(f, " addi {}, {}, {}", src, imm, d)
} else {
write!(f, " iadd {}, {}", src, imm)
write!(f, " addi {}, {}", src, imm)
}
}
Self::ISub { src, imm, dest } => {
if let Some(d) = dest {
write!(f, " isub {}, {}, {}", src, imm, d)
write!(f, " subi {}, {}, {}", src, imm, d)
} else {
write!(f, " isub {}, {}", src, imm)
write!(f, " subi {}, {}", src, imm)
}
}
@@ -364,8 +418,8 @@ impl fmt::Display for Instruction {
}
Self::Shr {
src1,
rsh: r_shamt,
ish: i_shamt,
r_shamt,
i_shamt,
dest,
} => {
write!(f, " shl {}, {}, {}, {}", src1, r_shamt, i_shamt, dest)
@@ -401,47 +455,94 @@ impl fmt::Display for Instruction {
}
}
}
impl Instruction {
// data directives
pub fn db_string(label: impl Into<String>, data: impl Into<String>) -> Self {
Self::DString {
label: label.into(),
data: data.into(),
}
}
pub fn db_word(label: impl Into<String>, data: u32) -> Self {
Self::Dw {
label: label.into(),
data: vec![data],
}
}
pub fn db_bytes(label: impl Into<String>, data: &[u8]) -> Self {
Self::Db {
label: label.into(),
data: data.to_vec(),
}
}
// Movement
pub fn mov(src: Register, dest: Register) -> Self {
Self::Mov { src, dest }
pub fn mov<R1, R2>(src: R1, dest: R2) -> Self
where
R1: Into<Register>,
R2: Into<Register>,
{
Self::Mov {
src: src.into(),
dest: dest.into(),
}
}
// Memory loads
pub fn ldw_reg(base: Register, dest: Register) -> Self {
pub fn ldw_reg<R>(base: R, dest: Register) -> Self
where
R: Into<Register>,
{
Self::Ldw {
addr: MemOperand::RegIndirect(base),
src: MemOperand::RegIndirect(base.into()),
dest,
}
}
pub fn ldw_reg_offset(base: Register, offset: i32, dest: Register) -> Self {
pub fn ldw_reg_offset<R>(base: R, dest: Register, offset: i32) -> Self
where
R: Into<Register>,
{
Self::Ldw {
addr: MemOperand::RegOffset(base, offset),
src: MemOperand::RegOffset(base.into(), offset),
dest,
}
}
pub fn ldw_label(label: impl Into<Label>, dest: Register) -> Self {
Self::Ldw {
addr: MemOperand::Label(label.into()),
src: MemOperand::Label(label.into()),
dest,
}
}
// Memory stores
pub fn stw_reg(src: Register, base: Register) -> Self {
pub fn stw_reg<R>(src: Register, base: R) -> Self
where
R: Into<Register>,
{
Self::Stw {
src,
addr: MemOperand::RegIndirect(base),
dest: MemOperand::RegIndirect(base.into()),
}
}
pub fn stw_reg_offset(src: Register, base: Register, offset: i32) -> Self {
pub fn stw_reg_offset<R>(src: Register, base: R, offset: i32) -> Self
where
R: Into<Register>,
{
Self::Stw {
src,
addr: MemOperand::RegOffset(base, offset),
dest: MemOperand::RegOffset(base.into(), offset),
}
}
pub fn stw_label(src: Register, label: impl Into<Label>) -> Self {
Self::Stw {
src,
dest: MemOperand::Label(label.into()),
}
}
@@ -454,20 +555,74 @@ impl Instruction {
Self::Sub { src1, src2, dest }
}
pub fn iadd(src: Register, imm: i32) -> Self {
Self::IAdd {
src,
imm: Imm(imm),
dest: None,
pub fn and(src1: Register, src2: Register, dest: Register) -> Self {
Self::And { src1, src2, dest }
}
pub fn or(src1: Register, src2: Register, dest: Register) -> Self {
Self::Or { src1, src2, dest }
}
pub fn xor(src1: Register, src2: Register, dest: Register) -> Self {
Self::Xor { src1, src2, dest }
}
pub fn not(src: Register, dest: Register) -> Self {
Self::Not { src, dest }
}
pub fn shl(src1: Register, r_shamt: Register, i_shamt: u16, dest: Register) -> Self {
Self::Shl {
src1,
r_shamt,
i_shamt,
dest,
}
}
pub fn iadd_dest(src: Register, imm: i32, dest: Register) -> Self {
pub fn shr(src1: Register, r_shamt: Register, i_shamt: u16, dest: Register) -> Self {
Self::Shr {
src1,
r_shamt,
i_shamt,
dest,
}
}
pub fn iadd(src: Register, value: i64) -> Self {
let imm = Imm(value.abs() as u32);
if value < 0 {
Self::ISub {
src,
imm,
dest: None,
}
} else {
Self::IAdd {
src,
imm: Imm(imm),
imm,
dest: None,
}
}
}
pub fn iadd_dest(src: Register, value: i32, dest: Register) -> Self {
let imm = Imm(value.abs() as u32);
if value < 0 {
Self::ISub {
src,
imm,
dest: Some(dest),
}
} else {
Self::IAdd {
src,
imm,
dest: Some(dest),
}
}
}
pub fn inc(reg: Register) -> Self {
@@ -479,20 +634,25 @@ impl Instruction {
}
// Immediate loads
pub fn load_imm32(value: u32, dest: Register) -> Vec<Self> {
let lower = (value & 0xFFFF) as i32;
let upper = ((value >> 16) & 0xFFFF) as i32;
vec![
pub fn lwi(value: u32, dest: Register) -> Self {
if value > 0xFFFF {
Self::Lwi {
imm: Imm(value),
dest,
}
} else {
Self::Lli {
imm: Imm(lower),
imm: Imm(value),
dest,
},
Self::Lui {
imm: Imm(upper),
}
}
}
pub fn lwi_label(label: impl Into<String>, dest: Register) -> Self {
Self::LwiLabel {
label: label.into(),
dest,
},
]
}
}
// Control flow
@@ -544,6 +704,13 @@ impl Instruction {
pub fn comment(text: impl Into<String>) -> Self {
Self::Comment(text.into())
}
pub fn include(name: impl Into<String>, path: impl Into<String>) -> Self {
Self::Include {
name: name.into(),
path: path.into(),
}
}
}
// Convenience trait for Label conversion
@@ -559,20 +726,12 @@ impl From<&str> for Label {
}
}
fn format_mem_operand(op: &MemOperand) -> String {
fn reg_and_offset(op: &MemOperand) -> (String, i32) {
match op {
MemOperand::RegIndirect(reg) => format!("{}", reg),
MemOperand::RegOffset(reg, offset) => {
if *offset >= 0 {
format!("{}, {}", reg, offset)
} else {
format!("{}, {}", reg, offset)
}
}
MemOperand::Label(label) => format!("{}", label),
MemOperand::LabelOffset(label, offset) => {
format!("{}, {}", label, offset)
}
MemOperand::RegIndirect(reg) => (reg.to_string(), 0),
MemOperand::RegOffset(reg, offset) => (reg.to_string(), *offset),
MemOperand::Label(label) => (label.to_string(), 0),
MemOperand::LabelOffset(label, offset) => (label.to_string(), *offset),
}
}
@@ -591,7 +750,7 @@ pub enum MemOperand {
/// Immediate value (16-bit or 32-bit)
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Imm(pub i32);
pub struct Imm(pub u32);
impl fmt::Display for Imm {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+40 -84
View File
@@ -1,12 +1,14 @@
use std::{collections::HashMap, fmt};
use crate::model::CompilerError;
use crate::{
backend::dsa::instruction::{InsBlock, Instruction},
model::CompilerError,
};
/// Register allocator for DSA assembly generation
/// Manages general-purpose registers (rg0-rgf) and handles stack spilling
pub struct RegisterAllocator {
/// Available general-purpose registers
/// Maps variable names to their current location (register or stack offset)
variable_locations: HashMap<String, Location>,
@@ -79,14 +81,14 @@ impl RegisterAllocator {
/// Allocate a temporary register for expression evaluation
/// Returns the register name and optionally assembly code to save it
pub fn alloc_temp(&mut self) -> Result<(Register, Vec<String>), CompilerError> {
pub fn alloc_temp(&mut self) -> Result<(Register, InsBlock), CompilerError> {
// Try to find an unused register
// println!("finding! {:#?}", self.in_use);
if let Some(reg) = self.find_free_register() {
self.in_use[reg as usize].1 = true;
return Ok((reg, Vec::new()));
return Ok((reg, InsBlock::new()));
}
// All registers in use - need to spill one
@@ -156,11 +158,11 @@ impl RegisterAllocator {
pub fn alloc_var(
&mut self,
var_name: &str,
) -> Result<(Register, Vec<String>), CompilerError> {
) -> Result<(Register, InsBlock), CompilerError> {
if let Some(mut location) = self.variable_locations.get(var_name).cloned() {
// if the var is in a register we can use it already.
if let Some(reg) = location.register {
return Ok((reg, Vec::new()));
return Ok((reg, InsBlock::new()));
}
// if the variable is on the stack only, we need to get it in a register.
@@ -174,12 +176,11 @@ impl RegisterAllocator {
// Load from bpr + offset (offset is negative)
// code.push(format!("\tsubi bpr {} {}", -(offset + 4), reg));
code.push(format!(
"\tldw spr, {}, {} // spr+{}: {}",
code.push(Instruction::ldw_reg_offset(
Register::Spr,
reg,
offset - self.stack_offset,
offset - self.stack_offset,
var_name
));
// Update location to register
@@ -212,34 +213,33 @@ impl RegisterAllocator {
pub fn load_var(
&mut self,
var_name: &str,
) -> Result<(Register, Vec<String>), CompilerError> {
) -> Result<(Register, InsBlock), CompilerError> {
self.alloc_var(var_name)
}
/// Store a value from a register into a variable
/// Updates tracking and returns any necessary assembly code
pub fn store_var(&mut self, var_name: &str, source_reg: &Register) -> Vec<String> {
pub fn store_var(&mut self, var_name: &str, source_reg: &Register) -> InsBlock {
let mut block = InsBlock::new();
// Check if variable already has a location
if let Some(location) = self.variable_locations.get(var_name) {
// if the variable exists in a register we write to that.
if let Some(reg) = location.register {
if reg == *source_reg {
return vec![format!(
"\tmov {}, {} // save var:{} reg:{}",
source_reg, reg, var_name, reg
)];
block.push(Instruction::mov(*source_reg, reg));
return block;
}
}
// if the variable exists on the stack but not a register we write here.
if let Some(offset) = location.stack {
return vec![format!(
"\tstw {}, spr, {} // save var:{} offset:{}",
source_reg,
block.push(Instruction::stw_reg_offset(
*source_reg,
Register::Spr,
offset - self.stack_offset,
var_name,
offset
)];
));
return block;
}
}
@@ -254,7 +254,7 @@ impl RegisterAllocator {
.insert(*source_reg, var_name.to_string());
self.in_use[*source_reg as usize].1 = true;
return Vec::new();
return block;
}
// if current register isn't free, (eg is another variable) we assign somewhere
@@ -266,7 +266,8 @@ impl RegisterAllocator {
.insert(free_reg.clone(), var_name.to_string());
self.in_use[free_reg as usize].1 = true;
return vec![format!("\tmov {}, {}", source_reg, free_reg)];
block.push(Instruction::mov(*source_reg, free_reg));
return block;
}
// No free registers - allocate on stack
@@ -280,11 +281,8 @@ impl RegisterAllocator {
/// spill a register to the stack (WITHOUT FREEING)
/// DO NOT USE this if it's for a pointer!!!!
pub fn _spill_register(
&mut self,
reg: &Register,
) -> Result<Vec<String>, CompilerError> {
let mut code = Vec::new();
pub fn _spill_register(&mut self, reg: &Register) -> Result<InsBlock, CompilerError> {
let mut code = InsBlock::new();
// check if the variable is declared.
if let Some(var_name) = self.register_contents.get(reg).cloned()
@@ -292,13 +290,10 @@ impl RegisterAllocator {
{
// check if var is on the stack
if let Some(offset) = location.stack {
// ensure stack value is up to date with register value.
code.push(format!(
"\tstw {}, spr, {} // save var:{} offset:{}",
reg,
code.push(Instruction::stw_reg_offset(
*reg,
Register::Spr,
offset - self.stack_offset,
var_name,
offset
));
return Ok(code);
}
@@ -309,10 +304,7 @@ impl RegisterAllocator {
// if the variable is not on the stack:
// push register to stack (spr decrements automatically)
let offset = self.stack_offset;
code.push(format!(
"\tpush {} // free var:{} offset:{}",
reg, var_name, offset
));
code.push(Instruction::push(*reg));
// Update variable location - it's now at current spr
// Note: We track offset from bpr for consistency
@@ -332,9 +324,7 @@ impl RegisterAllocator {
pub fn free_register(
&mut self,
reg: &Register,
) -> Result<(i32, Vec<String>), CompilerError> {
let mut code = Vec::new();
) -> Result<(i32, Instruction), CompilerError> {
// check if the variable is declared.
if let Some(var_name) = self.register_contents.get(reg).cloned()
&& let Some(location) = self.variable_locations.get_mut(&var_name)
@@ -342,13 +332,11 @@ impl RegisterAllocator {
// check if var name is on the stack
if let Some(offset) = location.stack {
// store current register value in stack location
code.push(format!(
"\tstw {}, spr, {} // save var:{} offset:{}",
reg,
let code = Instruction::stw_reg_offset(
*reg,
Register::Spr,
offset - self.stack_offset,
var_name,
offset
));
);
// free the register.
location.register = None;
@@ -360,10 +348,7 @@ impl RegisterAllocator {
self.stack_offset -= 4;
let offset = self.stack_offset;
code.push(format!(
"\tpush {} // free var:{} offset:{}",
reg, var_name, offset
));
let code = Instruction::push(*reg);
// Update variable location
// Note: We track offset from bpr for consistency
@@ -390,15 +375,15 @@ impl RegisterAllocator {
}
/// Spill all registers to stack (useful before function calls)
pub fn _spill_all(&mut self) -> Vec<String> {
let mut code = Vec::new();
pub fn _spill_all(&mut self) -> InsBlock {
let mut code = InsBlock::new();
let regs_to_spill: Vec<Register> =
self.register_contents.keys().cloned().collect();
for reg in regs_to_spill {
if let Ok(spill_code) = self.free_register(&reg) {
code.extend(spill_code.1);
code.push(spill_code.1);
}
}
@@ -468,35 +453,6 @@ impl RegisterAllocator {
.map(|(reg, _)| reg.clone())
.collect()
}
/// Save caller-saved registers before a function call
/// Returns assembly code to save them
// pub fn _save_caller_saved(&mut self) -> Vec<String> {
// let mut code = Vec::new();
// // For simplicity, save all currently used registers
// // In a more sophisticated compiler, you'd only save registers that are live
// for (reg, _) in self.register_contents.clone() {
// if *self.in_use.get(reg as usize).unwrap_or(&false) {
// code.push(format!("\tpush {}", reg));
// }
// }
// code
// }
/// Restore caller-saved registers after a function call
/// Returns assembly code to restore them
pub fn _restore_caller_saved(&mut self, saved_regs: &[String]) -> Vec<String> {
let mut code = Vec::new();
// Restore in reverse order (LIFO)
for reg in saved_regs.iter().rev() {
code.push(format!("\tpop {}", reg));
}
code
}
}
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
+284 -4
View File
@@ -1,7 +1,287 @@
use std::{cell::RefCell, collections::HashMap, ops::Deref, rc::Rc};
use uuid::Uuid;
use crate::{
backend::dsa::{
instruction::{InsBlock, Instruction},
registers::{Register, RegisterAllocator},
variable::Variable,
},
model::CompilerError,
};
pub struct Allocator {
stack_offset: i32,
in_use: [(Register, bool); 16],
}
pub struct TempReg(Register);
pub struct AssignedReg(Register);
pub struct StackSlot(i32);
impl Deref for TempReg {
type Target = Register;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl Deref for AssignedReg {
type Target = Register;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl Deref for StackSlot {
type Target = i32;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl Allocator {
pub fn new() -> Self {
let mut in_use = [(Register::Null, false); 16];
in_use.copy_from_slice(&Register::get_gp().map(|r| (r, false))[0..16]);
Self {
stack_offset: 0,
in_use,
}
}
pub fn get_stack_offset(&self) -> i32 {
self.stack_offset
}
pub fn destroy_scope(&mut self, scope: &mut Scope) {
self.stack_offset = scope.entry_stack_offset;
for var in scope.variables.drain() {
if let Some(assigned) = var.1.register {
self.free_assigned(&assigned);
}
}
}
// what we need:
// - create var in register from temporary register. free temp and use it.
//
// - create var on stack from struct/array literal. return stack offset to write to.
//
// - spill var from register to stack. return stack offset to write to.
//
// - read/write var from stack+offset into register to use while preserving the stack
// slot.
//
// - read / write bytes from the stack+offset in a larger variable into a register.
pub fn read_var(&mut self, var: &mut Variable) -> Result<InsBlock, CompilerError> {
if let Some(slot) = &mut var.stack_slot {
if var.register.is_none() {
var.register = Some(self.allocate_var()?);
}
if let Some(reg) = &var.register {
return Ok(InsBlock::from(Instruction::ldw_reg_offset(
**reg,
Register::Spr,
**slot - self.stack_offset,
)));
}
unreachable!()
}
Err(CompilerError::Generic(format!(
"Tried to write var {} to stack but var was not assigned a reg and/or stack slot",
var.name
)))
}
pub fn write_var(&mut self, var: &mut Variable) -> Result<InsBlock, CompilerError> {
if let Some(slot) = &var.stack_slot {
if let Some(reg) = &var.register {
return Ok(InsBlock::from(Instruction::stw_reg_offset(
**reg,
Register::Spr,
**slot - self.stack_offset,
)));
}
}
Err(CompilerError::Generic(format!(
"Tried to write var {} to stack but var was not assigned a reg and/or stack slot",
var.name
)))
}
pub fn spill_var(&mut self, var: &mut Variable) -> Result<InsBlock, CompilerError> {
if let Some(slot) = &var.stack_slot {
let block = self.write_var(var)?;
if let Some(reg) = &var.register {
self.free_assigned(reg);
var.register = None;
}
return Ok(block);
}
// var doesn't have a stack slot so we need to create one
if let Some(reg) = &var.register {
let slot = self.allocate_stack_slot(var.size);
let block = InsBlock::from(Instruction::push(**reg));
self.free_assigned(reg);
var.register = None;
var.stack_slot = Some(slot);
return Ok(block);
}
return Err(CompilerError::Generic(
"spill_var called on a variable without a register".to_string(),
));
}
pub fn allocate_stack_slot(&mut self, size: usize) -> StackSlot {
self.stack_offset -= size as i32;
let offset = self.stack_offset;
StackSlot(offset)
}
pub fn allocate_var(&mut self) -> Result<AssignedReg, CompilerError> {
if let Some(reg) = self.find_free_register() {
Ok(AssignedReg(reg))
} else {
Err(CompilerError::Generic(
"No free registers available".to_string(),
))
}
}
pub fn allocate_temp(&mut self) -> Result<TempReg, CompilerError> {
// allocates a temporary register
if let Some(reg) = self.find_free_register() {
Ok(TempReg(reg))
} else {
todo!("an efficient stack spilling algorithm. needs scope awareness.");
}
}
pub fn free_temp(&mut self, temp: &TempReg) {
// frees a temporary register.
self.in_use[**temp as usize].1 = false;
}
fn free_assigned(&mut self, reg: &AssignedReg) {
// frees a register.
self.in_use[**reg as usize].1 = false;
}
// if we have register(s) free, return the first one.
fn find_free_register(&mut self) -> Option<Register> {
self.in_use.iter_mut().find_map(|(reg, used)| {
if !*used {
*used = true;
Some(*reg)
} else {
None
}
})
}
}
pub struct FunctionContext {
name: String,
stack_offset: i32,
registers: [(Register, bool); 16],
allocator: RefCell<Allocator>,
}
impl FunctionContext {
pub fn new(name: String) -> Self {
Self {
name,
allocator: RefCell::new(Allocator::new()),
}
}
pub fn get_stack_offset(&self) -> i32 {
self.allocator.borrow().get_stack_offset()
}
}
/// scope object
pub struct Scope<'a> {
/// outer scope, for a function this will be the global scope.
parent: Option<&'a mut Scope<'a>>,
context: Rc<FunctionContext>,
/// is the scope a function body or just a loop?
/// depending on the type, ending a scope will have different behaviour
r#type: ScopeType,
/// variables
variables: HashMap<Uuid, Variable>,
entry_stack_offset: i32,
}
impl<'a> Scope<'a> {
pub fn new(parent: &'a mut Scope<'a>, r#type: ScopeType) -> Scope<'a> {
Self {
entry_stack_offset: parent.context.get_stack_offset(),
context: Rc::clone(&parent.context),
parent: Some(parent),
r#type,
variables: HashMap::new(),
}
}
pub fn close(&mut self) -> Result<(), CompilerError> {
// closing a scope means we need to drop all variables in scope and free
// registers.
for (name, var) in self.variables.iter() {
todo!()
// if let Some(reg) = var.allocated_register {}
// if let Some(offset) = var.bpr_offset {
// self.stack_offset -= offset;
// }
}
Ok(())
}
pub fn alloc_temp_reg(&mut self) -> Result<(Register, InsBlock), CompilerError> {
todo!()
}
pub fn alloc_var_reg(&mut self) -> Result<(Register, InsBlock), CompilerError> {
todo!()
}
pub fn alloc_var_stack(&mut self) -> Result<(Register, InsBlock), CompilerError> {
todo!()
}
pub fn free_var_stack(&mut self) -> Result<(Register, InsBlock), CompilerError> {
todo!()
}
pub fn free_temp_reg(&mut self) -> Result<(Register, InsBlock), CompilerError> {
todo!()
}
}
#[derive(PartialEq, Copy, Clone, Debug)]
pub enum ScopeType {
Function,
IfBlock,
LoopBlock,
}
+32 -125
View File
@@ -1,43 +1,59 @@
use std::{collections::HashMap, hash::Hash};
use std::{collections::HashMap, hash::Hash, rc::Rc};
use uuid::Uuid;
use crate::{
backend::dsa::{
instruction::{InsBlock, Reg},
instruction::InsBlock,
registers::Register,
scope::{AssignedReg, FunctionContext, Scope, StackSlot},
},
model::{CompilerError, TypeId},
};
pub struct Variable {
name: String,
pub name: String,
pub uuid: Uuid,
/// the type of the variable.
r#type: TypeId,
/// size taken up in bytes.
/// if size > 4, value must be stored on the stack.
size: usize,
pub size: usize,
// location
/// this must be None if it cannot be stored in a register.
allocated_register: Option<Register>,
/// represents the offset from the base pointer (Bpr) of the stack frame.
/// needs to be signed as offset is positive for function args and negative for local
/// variables. as we can't access values at negative offsets, we use the following
/// formula: addr = Spr + offset - (Spr - Bpr) where we know (Spr-Bpr) at compile
/// time.
bpr_offset: Option<isize>,
pub stack_slot: Option<StackSlot>,
pub register: Option<AssignedReg>,
}
impl Variable {
pub fn new_uninit(name: String, r#type: TypeId) -> Self {
Self {
name,
uuid: Uuid::new_v4(),
size: r#type.size(),
r#type,
allocated_register: None,
bpr_offset: None,
stack_slot: None,
register: None,
}
}
pub fn new(
name: String,
r#type: TypeId,
scope: &'_ mut Scope,
) -> Result<Self, CompilerError> {
let mut var = Self::new_uninit(name, r#type);
var.alloc_default(scope);
Ok(var)
}
fn alloc_default(&mut self, scope: &'_ mut Scope) {
if self.size > 4 {
self.alloc_stack(scope).unwrap();
} else {
self.alloc_register(scope).unwrap();
}
}
@@ -67,35 +83,7 @@ impl Variable {
todo!("load var from stack to reg (if possible)")
}
pub fn new_local(
name: String,
r#type: TypeId,
scope: &'_ mut Scope,
) -> Result<Self, CompilerError> {
let mut var = Self::new_uninit(name, r#type);
var.alloc_register(scope)?;
Ok(var)
}
pub fn new_stack(
name: String,
r#type: TypeId,
scope: &'_ mut Scope,
) -> Result<Self, CompilerError> {
let mut init = Self::new_uninit(name, r#type);
init.alloc_stack(scope)?;
Ok(init)
}
pub fn drop(&mut self, scope: &'_ mut Scope) -> Result<(), CompilerError> {
if let Some(reg) = self.allocated_register {
todo!("dealloc reg in current function")
}
if let Some(offset) = self.bpr_offset {
todo!("free stack slot in current function")
}
Ok(())
}
@@ -103,84 +91,3 @@ impl Variable {
todo!()
}
}
/// scope object
pub struct Scope<'a> {
/// outer scope, for a function this will be the global scope.
parent: Option<&'a mut Scope<'a>>,
/// is the scope a function body or just a loop?
/// depending on the type, ending a scope will have different behaviour
r#type: ScopeType,
/// variables
variables: HashMap<String, Variable>,
/// tells us if a given register is being used or not.
/// this can be an array as registers have u8 representation.
in_use: [(Register, bool); 16],
stack_offset: i32,
}
impl<'a> Scope<'a> {
pub fn new(parent: &'a mut Scope<'a>, r#type: ScopeType) -> Scope<'a> {
Self {
stack_offset: parent.stack_offset,
parent: Some(parent),
r#type,
variables: HashMap::new(),
in_use: Register::get_gp().map(|reg| (reg, false)),
}
}
pub fn stack_offset(&self) -> i32 {
self.stack_offset
}
pub fn stack_offset_mut(&mut self) -> &mut i32 {
&mut self.stack_offset
}
pub fn close(&mut self) -> Result<(), CompilerError> {
// closing a scope means we need to drop all variables in scope and free registers.
for (name, var) in self.variables {
if let Some(reg) = var.allocated_register {
}
if let Some(off)
}
for reg in self.in_use
Ok(())
}
pub fn alloc_temp_reg(&mut self) -> Result<(Register, InsBlock), CompilerError> {
todo!()
}
pub fn alloc_var_reg(&mut self) -> Result<(Register, InsBlock), CompilerError> {
todo!()
}
pub fn alloc_var_stack(&mut self) -> Result<(Register, InsBlock), CompilerError> {
todo!()
}
pub fn free_var_stack(&mut self) -> Result<(Register, InsBlock), CompilerError> {
todo!()
}
pub fn free_temp_reg(&mut self) -> Result<(Register, InsBlock), CompilerError> {
todo!()
}
}
#[derive(PartialEq, Copy, Clone, Debug)]
enum ScopeType {
Function,
IfBlock,
LoopBlock,
}
+168 -22
View File
@@ -23,8 +23,9 @@ pub enum Token {
// Identifiers and literals
Identifier(Name),
String(String),
Integer(u64),
Char(char),
SignedInt(i32, Option<TypeId>),
UnsignedInt(u32, Option<TypeId>),
// Delimiters
LeftParen, // (
@@ -86,7 +87,7 @@ pub enum Token {
Eof,
}
use crate::model::Name;
use crate::model::{Name, TypeId};
use std::fmt;
impl fmt::Display for Name {
@@ -118,7 +119,8 @@ impl Token {
Token::As => "As",
Token::Identifier(_) => "Identifier",
Token::String(_) => "String",
Token::Integer(_) => "UnsignedInt",
Token::UnsignedInt(_, _) => "UnsignedInt",
Token::SignedInt(_, _) => "SignedInt",
Token::Char(_) => "Char",
Token::LeftParen => "LeftParen",
Token::RightParen => "RightParen",
@@ -388,8 +390,126 @@ impl<'a> Lexer<'a> {
// ========================================================================
fn scan_number(&mut self) -> Token {
// Check if number is negative
let is_negative = self.current == Some('-');
if is_negative {
self.advance(); // consume '-'
}
match self.read_number() {
Ok(num) => Token::Integer(num),
Ok((value, type_suffix)) => {
// Validate and construct appropriate token
if let Some(type_id) = type_suffix {
match type_id {
TypeId::I8 => {
let signed_val = if is_negative {
-(value as i32)
} else {
value as i32
};
if signed_val < i8::MIN as i32 || signed_val > i8::MAX as i32
{
self.error(&format!(
"Value {} out of range for i8",
signed_val
));
return Token::SignedInt(0, Some(TypeId::I8));
}
Token::SignedInt(signed_val, Some(TypeId::I8))
}
TypeId::I16 => {
let signed_val = if is_negative {
-(value as i32)
} else {
value as i32
};
if signed_val < i16::MIN as i32
|| signed_val > i16::MAX as i32
{
self.error(&format!(
"Value {} out of range for i16",
signed_val
));
return Token::SignedInt(0, Some(TypeId::I16));
}
Token::SignedInt(signed_val, Some(TypeId::I16))
}
TypeId::I32 => {
let signed_val = if is_negative {
if value > i32::MAX as u64 + 1 {
self.error(&format!(
"Value -{} out of range for i32",
value
));
return Token::SignedInt(0, Some(TypeId::I32));
}
-(value as i32)
} else {
if value > i32::MAX as u64 {
self.error(&format!(
"Value {} out of range for i32",
value
));
return Token::SignedInt(0, Some(TypeId::I32));
}
value as i32
};
Token::SignedInt(signed_val, Some(TypeId::I32))
}
TypeId::U8 => {
if is_negative {
self.error("Unsigned type u8 cannot be negative");
return Token::UnsignedInt(0, Some(TypeId::U8));
}
if value > u8::MAX as u64 {
self.error(&format!(
"Value {} out of range for u8",
value
));
return Token::UnsignedInt(0, Some(TypeId::U8));
}
Token::UnsignedInt(value as u32, Some(TypeId::U8))
}
TypeId::U16 => {
if is_negative {
self.error("Unsigned type u16 cannot be negative");
return Token::UnsignedInt(0, Some(TypeId::U16));
}
if value > u16::MAX as u64 {
self.error(&format!(
"Value {} out of range for u16",
value
));
return Token::UnsignedInt(0, Some(TypeId::U16));
}
Token::UnsignedInt(value as u32, Some(TypeId::U16))
}
TypeId::U32 => {
if is_negative {
self.error("Unsigned type u32 cannot be negative");
return Token::UnsignedInt(0, Some(TypeId::U32));
}
if value > u32::MAX as u64 {
self.error(&format!(
"Value {} out of range for u32",
value
));
return Token::UnsignedInt(0, Some(TypeId::U32));
}
Token::UnsignedInt(value as u32, Some(TypeId::U32))
}
_ => unreachable!(),
}
} else {
// No type suffix - decide based on sign
if is_negative {
let signed_val = -(value as i32);
Token::SignedInt(signed_val, None)
} else {
Token::UnsignedInt(value as u32, None)
}
}
}
Err(e) => {
self.error(&e);
// Skip the invalid number
@@ -399,31 +519,66 @@ impl<'a> Lexer<'a> {
}
self.advance();
}
Token::Integer(0)
Token::SignedInt(0, None)
}
}
}
fn read_number(&mut self) -> Result<u64, String> {
fn read_number(&mut self) -> Result<(u64, Option<TypeId>), String> {
// Check for hex (0x) or binary (0b) prefix
if self.current == Some('0') {
match self.peek() {
Some('x') | Some('X') => {
self.advance(); // consume '0'
self.advance(); // consume 'x'
return self.read_hex_number();
let value = self.read_hex_number()?;
let type_suffix = self.read_type_suffix()?;
return Ok((value, type_suffix));
}
Some('b') | Some('B') => {
self.advance(); // consume '0'
self.advance(); // consume 'b'
return self.read_binary_number();
let value = self.read_binary_number()?;
let type_suffix = self.read_type_suffix()?;
return Ok((value, type_suffix));
}
_ => {}
}
}
// Read decimal number
self.read_decimal_number()
let value = self.read_decimal_number()?;
let type_suffix = self.read_type_suffix()?;
Ok((value, type_suffix))
}
fn read_type_suffix(&mut self) -> Result<Option<TypeId>, String> {
// Check for type suffix like _i32, _u8, etc.
if self.peek() == Some('_') {
self.advance(); // consume '_'
let mut suffix = String::new();
while let Some(c) = self.peek() {
if c.is_ascii_alphanumeric() {
self.advance();
suffix.push(c);
} else {
break;
}
}
match suffix.as_str() {
"i8" => Ok(Some(TypeId::I8)),
"i16" => Ok(Some(TypeId::I16)),
"i32" => Ok(Some(TypeId::I32)),
"u8" => Ok(Some(TypeId::U8)),
"u16" => Ok(Some(TypeId::U16)),
"u32" => Ok(Some(TypeId::U32)),
_ => Err(format!("Invalid type suffix: {}", suffix)),
}
} else {
Ok(None)
}
}
fn read_decimal_number(&mut self) -> Result<u64, String> {
@@ -437,8 +592,10 @@ impl<'a> Lexer<'a> {
if c.is_ascii_digit() {
self.advance();
num_str.push(c);
} else if c == '_' {
// Allow underscores as separators (like Rust)
} else if c == '_'
&& self.peek_second().map_or(false, |ch| ch.is_ascii_digit())
{
// Allow underscores as separators only between digits
self.advance();
} else {
break;
@@ -883,17 +1040,6 @@ mod tests {
}
}
#[test]
fn test_numbers() {
let input = "42 0x2A 0b101010 123_456";
let mut lexer = Lexer::new(input);
assert_eq!(lexer.next_token(), Token::Integer(42));
assert_eq!(lexer.next_token(), Token::Integer(42));
assert_eq!(lexer.next_token(), Token::Integer(42));
assert_eq!(lexer.next_token(), Token::Integer(123456));
}
#[test]
fn test_namespaced_identifier() {
let input = "print::println std::io::read";
+12 -10
View File
@@ -1,8 +1,8 @@
use super::lexer::Token;
use crate::model::{
AssignmentOperator, BinaryOperator, Block, Call, CompilerError, ConstExpr,
Declaration, Dependency, Expression, Program, Statement, TypeId, UnaryOperator,
Variable,
Declaration, Dependency, Expression, Number, Program, Statement, TypeId,
UnaryOperator, Variable,
};
use crate::{expect_tt, expect_value};
use std::ops::{ControlFlow, FromResidual, Try};
@@ -83,7 +83,8 @@ impl Parser {
let value = self.next()?;
let init = match value {
Token::String(x) => Some(ConstExpr::String(x)),
Token::Integer(x) => Some(ConstExpr::Number(x as i32)),
Token::SignedInt(x, _) => Some(ConstExpr::Number(x)),
Token::UnsignedInt(x, _) => Some(ConstExpr::Number(x as i32)),
_ => {
return ParseResult::Reject(CompilerError::UnexpectedToken(
value.tt().to_string(),
@@ -699,12 +700,13 @@ impl Parser {
fn parse_primary(&mut self) -> ParseResult<Expression, CompilerError> {
match self.peek_next()? {
Token::Integer(value) => {
Token::UnsignedInt(value, type_id) => {
self.next()?;
ParseResult::Accept(Expression::Number {
value: value as isize,
type_id: None,
})
ParseResult::Accept(Expression::Number(Number::Unsigned(value, type_id)))
}
Token::SignedInt(value, type_id) => {
self.next()?;
ParseResult::Accept(Expression::Number(Number::Signed(value, type_id)))
}
Token::String(value) => {
self.next()?;
@@ -812,7 +814,7 @@ impl Parser {
let internal_type = self.parse_type()?;
let _ = expect_tt!(self.next()?, Semicolon)?;
let size = expect_value!(self.next()?, Integer)?;
let size = expect_value!(self.next()?, UnsignedInt)?;
let _ = expect_tt!(self.next()?, RightBracket)?;
@@ -976,7 +978,7 @@ macro_rules! expect_value {
($expr:expr, $variant:ident) => {{
let tok = $expr;
match tok.clone() {
Token::$variant(value) => ParseResult::Accept(value),
Token::$variant(first, ..) => ParseResult::Accept(first),
_ => {
ParseResult::Reject(CompilerError::UnexpectedToken(tok.tt().to_string()))
}
+10 -9
View File
@@ -289,12 +289,7 @@ pub enum Expression {
// Post-Semantic Analysis
type_id: Option<TypeId>,
},
Number {
value: isize,
// Post-Semantic Analysis
type_id: Option<TypeId>,
},
Number(Number),
StringLiteral(String),
CharLiteral(char),
ArrayLiteral {
@@ -308,6 +303,12 @@ pub enum Expression {
},
}
#[derive(Debug, Clone)]
pub enum Number {
Signed(i32, Option<TypeId>),
Unsigned(u32, Option<TypeId>),
}
#[derive(Debug, Clone)]
pub struct Call {
pub name: Name,
@@ -342,9 +343,9 @@ impl Expression {
pub fn type_id(&self) -> Result<TypeId, CompilerError> {
match self {
Expression::Number { type_id, .. } => {
type_id.clone().ok_or(CompilerError::UnknownType)
}
Expression::Number(
Number::Signed(_, type_id) | Number::Unsigned(_, type_id),
) => type_id.clone().ok_or(CompilerError::UnknownType),
Expression::StringLiteral(_) => Ok(TypeId::Ptr(Box::new(TypeId::Char))),
Expression::CharLiteral(_) => Ok(TypeId::Char),
Expression::Call { type_id, .. } => {
+21 -21
View File
@@ -1,9 +1,9 @@
// GENERATED BY DSC COMPILER
// Generated at 2026-02-10 19:26:10
// Generated at 2026-02-14 02:44:56
// Imports
include print: "./lib/io/print.dsa"
include arena: "./lib/memory/arena_alloc.dsa"
include print: "./lib/io/print.dsa"
//
// Globals & Reserved Memory
//
@@ -11,7 +11,7 @@ include arena: "./lib/memory/arena_alloc.dsa"
dw stack: 0x010000
db message: "Process Exited with code:"
_init:
ldw stack, bpr 0
ldw stack, bpr, 0
mov bpr, spr
push zero
call main
@@ -52,9 +52,9 @@ main:
pop rg3
pop zero
lli 32, rg0
ldw spr, rg2 0
stw rg2, spr 0
ldw spr, rg2, 0
push rg3
stw rg2, spr, 4
// push arg 1
push rg0
// push arg 0
@@ -62,48 +62,48 @@ main:
call arena::alloc
pop rg4
pop zero
ldw spr, rg0 4
ldw spr, rg0, 4
stw rg0, spr, 4
push rg4
stw rg0, spr 8
// push arg 0
push rg0
call print::print_hex_word
pop zero
call print::print_newline
ldw spr, rg0 4
stw rg0, spr 4
ldw spr, rg0, 4
stw rg0, spr, 4
// push arg 0
push rg0
call print::print_hex_word
pop zero
call print::print_newline
ldw spr, rg0 0
stw rg0, spr 0
ldw spr, rg0, 0
stw rg0, spr, 0
// push arg 0
push rg0
call print::print_hex_word
pop zero
call print::print_newline
ldw spr, rg0 0
ldw rg0, rg2 0
stw rg0, spr 0
ldw spr, rg0, 0
ldw rg0, rg2, 0
stw rg0, spr, 0
// push arg 0
push rg2
call print::print_num
pop zero
call print::print_newline
lli 42, rg2
ldw spr, rg5 0
stw rg2, rg5 0
stw rg5, spr 0
ldw spr, rg5, 0
stw rg2, rg5, 0
stw rg5, spr, 0
// push arg 0
push rg5
call print::print_hex_word
pop zero
call print::print_newline
ldw spr, rg2 0
ldw rg2, rg5 0
stw rg2, spr 0
ldw spr, rg2, 0
ldw rg2, rg5, 0
stw rg2, spr, 0
// push arg 0
push rg5
call print::print_num
@@ -115,5 +115,5 @@ main:
call print::println
pop zero
lli 0, rg5
stw rg5, bpr 8
stw rg5, bpr, 8
jmp _ret