Compare commits

6 Commits

29 changed files with 742 additions and 2555 deletions
Generated
+1
View File
@@ -270,6 +270,7 @@ dependencies = [
"common", "common",
"num_cpus", "num_cpus",
"threadpool", "threadpool",
"uuid",
] ]
[[package]] [[package]]
+1
View File
@@ -17,3 +17,4 @@ clap = { version = "4.5.40", features = ["derive"] }
common = { path = "../common" } common = { path = "../common" }
num_cpus = "1.17.0" num_cpus = "1.17.0"
threadpool = "1.8.1" threadpool = "1.8.1"
uuid = { version = "1.17.0", features = ["v4"] }
-264
View File
@@ -1,264 +0,0 @@
use std::{
collections::HashSet,
fs,
path::{self, Path, PathBuf},
sync::{Arc, Mutex},
thread::{self, JoinHandle},
};
use crate::assembler::{AssembleError, Token, expand_pseudo_ops, lexer, quick_hash};
use crate::assembler::{Node, Parser, resolve_dependencies};
use crate::util::logging::Logger;
// pub fn new_assemble(path: &Path) {
// let program = Program::new();
// let program_ref = ProgramRef::new(program);
// let task = Module::build(path.to_path_buf(), program_ref.clone());
// program_ref.add_task(task);
// // wait on all tasks to finish
// for task in program_ref.get_tasks() {
// let module = task.module.join().unwrap();
// program_ref.add_module(module);
// }
// }
pub struct Program {
pub main_path: PathBuf,
registry: HashSet<u64>,
modules: Vec<Module>,
tasks: Vec<Task>,
logger: Logger,
}
impl Program {
#[must_use]
pub fn new() -> Self {
Self {
registry: HashSet::new(),
modules: Vec::new(),
tasks: Vec::new(),
main_path: PathBuf::new(),
logger: Logger::new(),
}
}
pub fn add_task(&mut self, task: Task) {
self.tasks.push(task);
}
}
impl Default for Program {
fn default() -> Self {
Self::new()
}
}
pub struct ProgramRef {
program: Arc<Mutex<Program>>,
}
impl ProgramRef {
#[must_use]
pub fn new(program: Program) -> Self {
Self {
program: Arc::new(Mutex::new(program)),
}
}
pub fn register(&self, path: &Path) {
self.program
.lock()
.expect("Failed to acquire program lock")
.registry
.insert(quick_hash(path));
}
#[must_use]
pub fn is_registered(&self, path: &Path) -> bool {
self.program
.lock()
.expect("Failed to acquire program lock")
.registry
.contains(&quick_hash(path))
}
// pub fn get_tasks(&self) -> Vec<&Task> {
// self.program.lock().unwrap().tasks.iter().collect()
// }
pub fn add_task(&self, task: Task) {
self.program
.lock()
.expect("Failed to acquire program lock")
.add_task(task);
}
pub fn add_module(&self, module: Module) {
self.program
.lock()
.expect("Failed to acquire program lock")
.modules
.push(module);
}
pub fn log(&self, message: &str) {
self.program
.lock()
.expect("Failed to acquire program lock")
.logger
.log(message);
}
}
impl Clone for ProgramRef {
fn clone(&self) -> Self {
Self {
program: self.program.clone(),
}
}
}
pub struct Module {
pub path: PathBuf,
pub hash: u64,
pub nodes: Vec<Node>,
program: ProgramRef,
}
impl Module {
#[must_use]
pub const fn new(
path: PathBuf,
hash: u64,
nodes: Vec<Node>,
program: ProgramRef,
) -> Self {
Self {
path,
hash,
nodes,
program,
}
}
pub fn build(path: PathBuf, program: ProgramRef) -> Result<Task, AssembleError> {
// Spawn a thread that creates the main function and executes the lexer and parser.
let handle = thread::spawn(move || {
let mut module =
Self::new(path.clone(), quick_hash(&path), Vec::new(), program.clone());
match module.lex() {
Ok(tokens) => {
module.parse(tokens);
module.expand();
module.prepare_dependencies();
module
}
Err(why) => {
eprintln!(
"Error building program at path `{}`: {why}",
path.display()
);
// TODO: Find a way to make this work without panicking.
unreachable!()
}
}
});
Ok(Task { module: handle })
}
fn lex(&self) -> Result<Vec<Token>, AssembleError> {
if let Ok(path) = self.path.canonicalize() {
self.program.log(&format!(
"{:20} {:20} [{}]",
"Building",
self.get_filename(),
path.display()
));
}
let src = fs::read_to_string(&self.path)
.map_err(|_| AssembleError::InvalidFile(self.path.clone()))?;
let file_hash = quick_hash(&self.path);
self.program
.log(&format!("{:20} {:20}", "Tokenising", self.get_filename()));
lexer::lexer(src, file_hash)
}
fn parse(&mut self, tokens: Vec<Token>) -> Result<(), AssembleError> {
self.program
.log(&format!("{:20} {:20}", "Parsing", self.get_filename()));
let parsed = Parser::parse_nodes(tokens)?;
self.nodes = parsed;
Ok(())
}
fn expand(&mut self) -> Result<(), AssembleError> {
self.program
.log(&format!("{:20} {:20}", "Expanding", self.get_filename()));
let expanded = expand_pseudo_ops(self.nodes.clone(), self.hash)?;
self.nodes = expanded;
Ok(())
}
fn prepare_dependencies(&self) -> Result<(), AssembleError> {
let nodes = resolve_dependencies(
self.nodes.clone(),
self.path.parent().expect("File should have a parent path!"),
)?;
let dependencies = Parser::get_dependencies(&nodes, &self.path)?;
for dep in dependencies {
if self.program.is_registered(&dep) {
// we have already built this module!
continue;
}
self.program.register(&dep);
// create new module
// add the task to the program
match Self::build(dep, self.program.clone()) {
Ok(task) => self.program.add_task(task),
Err(why) => {
eprintln!("Error building program: {why}");
}
}
}
Ok(())
}
/// Gets the filename from a [`PathBuf`].
fn get_filename(&self) -> &str {
self.path
.file_name()
.and_then(|f| f.to_str())
.unwrap_or_default()
}
/// Gets the parent filepath from a [`PathBuf`].
fn get_parent(&self) -> &str {
self.path
.parent()
.and_then(|f| f.to_str())
.unwrap_or_default()
}
}
pub struct Task {
module: JoinHandle<Module>,
}
-351
View File
@@ -1,351 +0,0 @@
use common::{args, prelude::*};
use crate::assembler::model::{Node, Opcode};
use crate::{assembler::AssembleError, expect_token};
fn log(message: &str) {
println!("\x1b[32mINFO:\x1b[0m {message}");
}
pub fn codegen(nodes: Vec<Node>) -> Result<Vec<Instruction>, AssembleError> {
let mut instructions = vec![];
for node in nodes {
instructions.push(
build_instruction(&node)
.unwrap_or_else(|_| panic!("Failed to build instruction: {node:?}")),
);
}
println!("------------------------");
log("Compilation Success ✅");
Ok(instructions)
}
fn build_instruction(node: &Node) -> Result<Instruction, AssembleError> {
let opcode = node.opcode();
let args = node.args();
match opcode {
Opcode::Nop => Ok(Instruction::Nop),
Opcode::Mov | Opcode::Movs => build_mov_instruction(opcode, &args),
Opcode::Ldb
| Opcode::Ldw
| Opcode::Ldh
| Opcode::Ldbs
| Opcode::Ldhs
| Opcode::Stb
| Opcode::Stw
| Opcode::Sth => build_memory_instruction(opcode, &args),
Opcode::Lli | Opcode::Lui => build_load_immediate_instruction(opcode, &args),
Opcode::Jmp
| Opcode::Jeq
| Opcode::Jne
| Opcode::Jgt
| Opcode::Jge
| Opcode::Jlt
| Opcode::Jle => build_jump_instruction(opcode, &args),
Opcode::Cmp => build_compare_instruction(&args),
Opcode::Inc | Opcode::Dec => build_inc_dec_instruction(opcode, &args),
Opcode::Shl | Opcode::Shr => build_shift_instruction(opcode, &args),
Opcode::Add
| Opcode::Sub
| Opcode::And
| Opcode::Or
| Opcode::Xor
| Opcode::Nand
| Opcode::Nor
| Opcode::Xnor => build_arithmetic_instruction(opcode, &args),
Opcode::AddI | Opcode::SubI => {
build_arithmetic_immediate_instruction(opcode, &args)
}
Opcode::Not => build_not_instruction(&args),
Opcode::Int => build_interrupt_instruction(&args),
Opcode::Irt => Ok(Instruction::IntReturn),
Opcode::Hlt => Ok(Instruction::Halt),
Opcode::Data => build_data_instruction(&args),
Opcode::Segment => build_segment_instruction(&args),
// These pseudo-instructions should have been expanded!
Opcode::Db
| Opcode::Dh
| Opcode::Dw
| Opcode::Resb
| Opcode::Resh
| Opcode::Resw
| Opcode::Push
| Opcode::Pop
| Opcode::Lwi
| Opcode::Include
| Opcode::Call
| Opcode::Return
| Opcode::Pusha
| Opcode::Popa => Err(AssembleError::InvalidArg),
}
}
fn build_mov_instruction(
opcode: Opcode,
args: &[crate::assembler::model::Token],
) -> Result<Instruction, AssembleError> {
let Some(src_token) = args.first() else {
return Err(AssembleError::MissingArgument(0));
};
let Some(dest_token) = args.get(1) else {
return Err(AssembleError::MissingArgument(1));
};
let src = expect_token!(src_token, Register)?;
let dest = expect_token!(dest_token, Register)?;
match opcode {
Opcode::Mov => Ok(Instruction::Mov(args!(R, sr1: src, dr: dest))),
Opcode::Movs => Ok(Instruction::MovSigned(args!(R, sr1: src, dr: dest))),
_ => unreachable!(),
}
}
fn build_memory_instruction(
opcode: Opcode,
args: &[crate::assembler::model::Token],
) -> Result<Instruction, AssembleError> {
let Some(base_token) = args.first() else {
return Err(AssembleError::MissingArgument(0));
};
let Some(dest_token) = args.get(1) else {
return Err(AssembleError::MissingArgument(1));
};
let Some(offset_token) = args.get(2) else {
return Err(AssembleError::MissingArgument(2));
};
let base = expect_token!(base_token, Register)?;
let dest = expect_token!(dest_token, Register)?;
let offset = expect_token!(offset_token, Immediate)?;
let instruction_args = args!(I, immediate: offset as u16, r1: base, r2: dest);
match opcode {
Opcode::Ldb => Ok(Instruction::LoadByte(instruction_args)),
Opcode::Ldw => Ok(Instruction::LoadWord(instruction_args)),
Opcode::Ldh => Ok(Instruction::LoadHalfword(instruction_args)),
Opcode::Ldbs => Ok(Instruction::LoadByteSigned(instruction_args)),
Opcode::Ldhs => Ok(Instruction::LoadHalfwordSigned(instruction_args)),
Opcode::Stb => Ok(Instruction::StoreByte(instruction_args)),
Opcode::Stw => Ok(Instruction::StoreWord(instruction_args)),
Opcode::Sth => Ok(Instruction::StoreHalfword(instruction_args)),
_ => unreachable!(),
}
}
fn build_load_immediate_instruction(
opcode: Opcode,
args: &[crate::assembler::model::Token],
) -> Result<Instruction, AssembleError> {
let Some(value_token) = args.first() else {
return Err(AssembleError::MissingArgument(0));
};
let Some(dest_token) = args.get(1) else {
return Err(AssembleError::MissingArgument(1));
};
let value = expect_token!(value_token, Immediate)?;
let dest = expect_token!(dest_token, Register)?;
match opcode {
Opcode::Lli => {
let instruction_args = args!(I, immediate: value as u16, r1: dest);
Ok(Instruction::LoadLowerImmediate(instruction_args))
}
Opcode::Lui => {
let upper_value = value >> 16;
let instruction_args = args!(I, immediate: upper_value as u16, r1: dest);
Ok(Instruction::LoadUpperImmediate(instruction_args))
}
_ => unreachable!(),
}
}
fn build_jump_instruction(
opcode: Opcode,
args: &[crate::assembler::model::Token],
) -> Result<Instruction, AssembleError> {
let Some(address_token) = args.first() else {
return Err(AssembleError::MissingArgument(0));
};
let Some(offset_token) = args.get(1) else {
return Err(AssembleError::MissingArgument(1));
};
let address = expect_token!(address_token, Immediate)?;
let offset = expect_token!(offset_token, Register)?;
let instruction_args = args!(I, immediate: address as u16, r1: offset);
match opcode {
Opcode::Jmp => Ok(Instruction::Jump(instruction_args)),
Opcode::Jeq => Ok(Instruction::JumpEq(instruction_args)),
Opcode::Jne => Ok(Instruction::JumpNeq(instruction_args)),
Opcode::Jgt => Ok(Instruction::JumpGt(instruction_args)),
Opcode::Jge => Ok(Instruction::JumpGe(instruction_args)),
Opcode::Jlt => Ok(Instruction::JumpLt(instruction_args)),
Opcode::Jle => Ok(Instruction::JumpLe(instruction_args)),
_ => unreachable!(),
}
}
fn build_compare_instruction(
args: &[crate::assembler::model::Token],
) -> Result<Instruction, AssembleError> {
let Some(left_token) = args.first() else {
return Err(AssembleError::MissingArgument(0));
};
let Some(right_token) = args.get(1) else {
return Err(AssembleError::MissingArgument(1));
};
let left = expect_token!(left_token, Register)?;
let right = expect_token!(right_token, Register)?;
Ok(Instruction::Compare(args!(R, sr1: left, sr2: right)))
}
fn build_inc_dec_instruction(
opcode: Opcode,
args: &[crate::assembler::model::Token],
) -> Result<Instruction, AssembleError> {
let Some(reg_token) = args.first() else {
return Err(AssembleError::MissingArgument(0));
};
let reg = expect_token!(reg_token, Register)?;
match opcode {
Opcode::Inc => Ok(Instruction::Increment(args!(R, sr1: reg))),
Opcode::Dec => Ok(Instruction::Decrement(args!(R, sr1: reg))),
_ => unreachable!(),
}
}
fn build_shift_instruction(
opcode: Opcode,
args: &[crate::assembler::model::Token],
) -> Result<Instruction, AssembleError> {
let Some(reg_token) = args.first() else {
return Err(AssembleError::MissingArgument(0));
};
let Some(amount_token) = args.get(1) else {
return Err(AssembleError::MissingArgument(1));
};
let reg = expect_token!(reg_token, Register)?;
let amount = expect_token!(amount_token, Immediate)? as u8;
match opcode {
Opcode::Shl => Ok(Instruction::ShiftLeft(args!(R, sr1: reg, shamt: amount))),
Opcode::Shr => Ok(Instruction::ShiftRight(args!(R, sr1: reg, shamt: amount))),
_ => unreachable!(),
}
}
fn build_arithmetic_instruction(
opcode: Opcode,
args: &[crate::assembler::model::Token],
) -> Result<Instruction, AssembleError> {
let Some(left_token) = args.first() else {
return Err(AssembleError::MissingArgument(0));
};
let Some(right_token) = args.get(1) else {
return Err(AssembleError::MissingArgument(1));
};
let Some(dest_token) = args.get(2) else {
return Err(AssembleError::MissingArgument(2));
};
let left = expect_token!(left_token, Register)?;
let right = expect_token!(right_token, Register)?;
let dest = expect_token!(dest_token, Register)?;
let instruction_args = args!(R, sr1: left, sr2: right, dr: dest);
match opcode {
Opcode::Add => Ok(Instruction::Add(instruction_args)),
Opcode::Sub => Ok(Instruction::Sub(instruction_args)),
Opcode::And => Ok(Instruction::And(instruction_args)),
Opcode::Or => Ok(Instruction::Or(instruction_args)),
Opcode::Xor => Ok(Instruction::Xor(instruction_args)),
Opcode::Nand => Ok(Instruction::Nand(instruction_args)),
Opcode::Nor => Ok(Instruction::Nor(instruction_args)),
Opcode::Xnor => Ok(Instruction::Xnor(instruction_args)),
_ => unreachable!(),
}
}
fn build_arithmetic_immediate_instruction(
opcode: Opcode,
args: &[crate::assembler::model::Token],
) -> Result<Instruction, AssembleError> {
let Some(reg_token) = args.first() else {
return Err(AssembleError::MissingArgument(0));
};
let Some(immediate_token) = args.get(1) else {
return Err(AssembleError::MissingArgument(1));
};
let Some(dest_token) = args.get(2) else {
return Err(AssembleError::MissingArgument(2));
};
let reg = expect_token!(reg_token, Register)?;
let immediate = expect_token!(immediate_token, Immediate)? as u16;
let dest = expect_token!(dest_token, Register)?;
let instruction_args = args!(I, immediate: immediate, r1: reg, r2: dest);
match opcode {
Opcode::AddI => Ok(Instruction::AddImmediate(instruction_args)),
Opcode::SubI => Ok(Instruction::SubImmediate(instruction_args)),
_ => unreachable!(),
}
}
fn build_not_instruction(
args: &[crate::assembler::model::Token],
) -> Result<Instruction, AssembleError> {
let Some(reg_token) = args.first() else {
return Err(AssembleError::MissingArgument(0));
};
let Some(dest_token) = args.get(1) else {
return Err(AssembleError::MissingArgument(1));
};
let reg = expect_token!(reg_token, Register)?;
let dest = expect_token!(dest_token, Register)?;
Ok(Instruction::Not(args!(R, sr1: reg, dr: dest)))
}
fn build_interrupt_instruction(
args: &[crate::assembler::model::Token],
) -> Result<Instruction, AssembleError> {
let Some(code_token) = args.first() else {
return Err(AssembleError::MissingArgument(0));
};
let code = expect_token!(code_token, Immediate)? as u8;
Ok(Instruction::Interrupt(Interrupt::Software(code)))
}
fn build_data_instruction(
args: &[crate::assembler::model::Token],
) -> Result<Instruction, AssembleError> {
let Some(immediate_token) = args.first() else {
return Err(AssembleError::MissingArgument(0));
};
let immediate = expect_token!(immediate_token, Immediate)?;
Ok(Instruction::Data(immediate))
}
fn build_segment_instruction(
args: &[crate::assembler::model::Token],
) -> Result<Instruction, AssembleError> {
let Some(immediate_token) = args.first() else {
return Err(AssembleError::MissingArgument(0));
};
let immediate = expect_token!(immediate_token, Immediate)?;
Ok(Instruction::Segment(immediate))
}
-368
View File
@@ -1,368 +0,0 @@
use common::prelude::Register;
use crate::assembler::model::{Node, Opcode, Token};
use crate::{assembler::AssembleError, expect_token, expect_type, node};
pub fn expand_pseudo_ops(
mut nodes: Vec<Node>,
module: u64,
) -> Result<Vec<Node>, AssembleError> {
let mut result = Vec::<Node>::with_capacity(nodes.len());
for node in &mut nodes {
if try_expand(node.clone(), &mut result, module).is_err() {
result.push(node.clone());
}
}
Ok(result)
}
fn try_expand(
node: Node,
result: &mut Vec<Node>,
_module: u64,
) -> Result<(), AssembleError> {
match node.opcode() {
Opcode::Push => expand_push(&node, result)?,
Opcode::Pop => expand_pop(&node, result)?,
Opcode::Pusha => expand_pusha(&node, result)?,
Opcode::Popa => expand_popa(&node, result)?,
Opcode::Call => expand_call(&node, result)?,
Opcode::Return => expand_return(&node, result),
Opcode::Ldb | Opcode::Ldbs | Opcode::Ldh | Opcode::Ldhs | Opcode::Ldw => {
expand_ldx(&node, result)?;
}
Opcode::Stb | Opcode::Sth | Opcode::Stw => expand_stx(&node, result)?,
Opcode::Lwi => expand_lwi(&node, result)?,
Opcode::Resb | Opcode::Resh | Opcode::Resw => expand_resx(&node, result)?,
Opcode::Db | Opcode::Dh | Opcode::Dw => expand_dx(&node, result)?,
_ => result.push(node),
}
Ok(())
}
fn expand_push(current: &Node, nodes: &mut Vec<Node>) -> Result<(), AssembleError> {
let label = current.label();
let Ok(arg0) = current.arg(0) else {
return Err(AssembleError::Generic);
};
let reg = expect_type!(arg0, Register)?;
let spr = Token::Register(Register::Spr);
nodes.extend(vec![
node!(label, Opcode::SubI, spr, 4, spr),
node!(None, Opcode::Stw, reg, spr, 0),
]);
Ok(())
}
fn expand_pusha(current: &Node, nodes: &mut Vec<Node>) -> Result<(), AssembleError> {
let label = current.label();
let Ok(arg0) = current.arg(0) else {
return Err(AssembleError::Generic);
};
let count = expect_token!(arg0, Immediate)?;
let spr = Token::Register(Register::Spr);
let registers: Vec<Register> = Register::general();
nodes.push(node!(
label,
Opcode::SubI,
spr,
Token::Immediate(count * 4),
spr
));
nodes.extend((0..count).rev().map(|i| {
node!(
None,
Opcode::Stw,
Token::Register(registers[i as usize]),
spr,
Token::Immediate(i * 4)
)
}));
Ok(())
}
fn expand_popa(current: &Node, nodes: &mut Vec<Node>) -> Result<(), AssembleError> {
let label = current.label();
let Ok(arg0) = current.arg(0) else {
return Err(AssembleError::Generic);
};
let count = expect_token!(arg0, Immediate)?;
let spr = Token::Register(Register::Spr);
let registers: Vec<Register> = Register::general();
nodes.extend((0..count).rev().map(|i| {
node!(
{ if i == 0 { label.clone() } else { None } },
Opcode::Ldw,
spr,
Token::Register(registers[i as usize]),
Token::Immediate(i * 4)
)
}));
nodes.push(node!(
None,
Opcode::AddI,
spr,
Token::Immediate(count * 4),
spr
));
Ok(())
}
fn expand_call(current: &Node, nodes: &mut Vec<Node>) -> Result<(), AssembleError> {
let label = current.label();
let Ok(arg0) = current.arg(0) else {
return Err(AssembleError::Generic);
};
let addr = expect_type!(arg0, Symbol)?;
let spr = Token::Register(Register::Spr);
let pcx = Token::Register(Register::Pcx);
let zero = Token::Register(Register::Zero);
nodes.extend(vec![
node!(label, Opcode::SubI, spr, 4, spr),
node!(None, Opcode::Stw, pcx, spr, 0),
node!(None, Opcode::Jmp, addr, zero),
]);
Ok(())
}
fn expand_return(current: &Node, nodes: &mut Vec<Node>) {
let label = current.label();
let spr = Token::Register(Register::Spr);
let ret = Token::Register(Register::Ret);
nodes.extend(vec![
node!(label, Opcode::Ldw, spr, ret, 0),
node!(None, Opcode::AddI, spr, 4, spr),
node!(None, Opcode::Jmp, 4, ret),
]);
}
fn expand_pop(current: &Node, nodes: &mut Vec<Node>) -> Result<(), AssembleError> {
let label = current.label();
let Ok(arg0) = current.arg(0) else {
return Err(AssembleError::Generic);
};
let reg = expect_type!(arg0, Register)?;
let spr = Token::Register(Register::Spr);
nodes.extend(vec![
node!(label, Opcode::Ldw, spr, reg, 0),
node!(None, Opcode::AddI, spr, 4, spr),
]);
Ok(())
}
fn expand_ldx(current: &Node, nodes: &mut Vec<Node>) -> Result<(), AssembleError> {
let opcode = current.opcode();
let args: Vec<Token> = current.args().into_iter().take(3).collect();
let Some(name) = args.first() else {
return Err(AssembleError::MissingArgument(0));
};
let Some(reg) = args.get(1) else {
return Err(AssembleError::MissingArgument(1));
};
let Some(offset) = args.get(2) else {
return Err(AssembleError::MissingArgument(2));
};
let name = expect_type!(name, Symbol)?;
let reg = expect_type!(reg, Register)?;
let offset = expect_type!(offset, Immediate)?;
nodes.extend(vec![
node!(current.label(), Opcode::Lli, name, reg),
node!(None, Opcode::Lui, name, reg),
node!(None, opcode, reg, reg, offset),
]);
Ok(())
}
fn expand_stx(current: &Node, nodes: &mut Vec<Node>) -> Result<(), AssembleError> {
let opcode = current.opcode();
let args: Vec<Token> = current.args().into_iter().take(3).collect();
let Some(base) = args.first() else {
return Err(AssembleError::MissingArgument(0));
};
let Some(dest) = args.get(1) else {
return Err(AssembleError::MissingArgument(1));
};
let Some(offset) = args.get(2) else {
return Err(AssembleError::MissingArgument(2));
};
let base = expect_type!(base, Register)?;
let dest = expect_type!(dest, Symbol)?;
let offset = expect_type!(offset, Immediate)?;
let temp = Token::Register(Register::Acc);
nodes.extend(vec![
node!(current.label(), Opcode::Lli, dest, temp),
node!(None, Opcode::Lui, dest, temp),
node!(None, opcode, base, temp, offset),
]);
Ok(())
}
fn expand_lwi(current: &Node, nodes: &mut Vec<Node>) -> Result<(), AssembleError> {
let Ok(val) = current.arg(0) else {
return Err(AssembleError::MissingArgument(0));
};
let Ok(reg) = current.arg(1) else {
return Err(AssembleError::MissingArgument(1));
};
let val = expect_type!(val, Symbol, Immediate)?;
let reg = expect_type!(reg, Register)?;
nodes.extend(vec![
node!(current.label(), Opcode::Lli, val, reg),
node!(None, Opcode::Lui, val, reg),
]);
Ok(())
}
fn expand_resx(current: &Node, nodes: &mut Vec<Node>) -> Result<(), AssembleError> {
let Ok(region_label) = current.arg(0) else {
return Err(AssembleError::MissingArgument(0));
};
let Ok(size) = current.arg(1) else {
return Err(AssembleError::MissingArgument(1));
};
let region_label = expect_token!(region_label, Symbol)?;
let size = expect_token!(size, Immediate)?;
let units_per = match current.opcode() {
Opcode::Resb => 4,
Opcode::Resh => 2,
Opcode::Resw => 1,
_ => unreachable!(),
};
let mut buffer = vec![];
// push the inital node with the label
for _ in 0..size.div_ceil(units_per) {
// push the rest of the nodes
buffer.push(node!(None, Opcode::Data, 0));
}
buffer[0].symbol = Some(region_label);
nodes.extend(buffer);
Ok(())
}
fn expand_dx(current: &Node, nodes: &mut Vec<Node>) -> Result<(), AssembleError> {
let Ok(region_label) = current.arg(0) else {
return Err(AssembleError::MissingArgument(0));
};
let region_label = expect_token!(region_label, Symbol)?;
let size = match current.opcode() {
Opcode::Db => 4,
Opcode::Dh => 2,
Opcode::Dw => 1,
_ => unreachable!(),
};
let mut buffer = vec![];
let mut args = current.args();
let _label = args.remove(0);
for word in process_dx_data(args, size)? {
buffer.push(node!(None, Opcode::Data, Token::Immediate(word)));
}
buffer[0].symbol = Some(region_label);
nodes.extend(buffer);
Ok(())
}
fn process_dx_data(args: Vec<Token>, size: usize) -> Result<Vec<u32>, AssembleError> {
assert!(matches!(size, 1 | 2 | 4));
let mut buffer = Vec::<u8>::new();
// Process each token
for token in args {
match token {
Token::StringLit(mut s) => {
s.push('\0');
// Split string into chars and write as bytes
for ch in s.chars() {
// Convert char to bytes (UTF-8 encoding)
let mut char_buf = [0u8; 4];
let char_bytes = ch.encode_utf8(&mut char_buf);
buffer.extend_from_slice(char_bytes.as_bytes());
}
}
Token::Immediate(value) => {
// Split u32 into bytes (little-endian)
buffer.extend_from_slice(&value.to_be_bytes());
}
_ => {
return Err(AssembleError::Generic);
}
}
// Pad buffer to alignment boundary with zeros
let remainder = buffer.len() % size;
if remainder != 0 {
let padding = size - remainder;
buffer.resize(buffer.len() + padding, 0);
}
}
// Convert byte buffer to u32 chunks
// Pad final buffer to u32 boundary if needed
let remainder = buffer.len() % 4;
if remainder != 0 {
let padding = 4 - remainder;
buffer.resize(buffer.len() + padding, 0);
}
// Convert bytes to u32s efficiently using chunks_exact
let result = buffer
.chunks_exact(4)
.map(|chunk| {
// Convert 4 bytes to u32 (little-endian)
u32::from_be_bytes([chunk[0], chunk[1], chunk[2], chunk[3]])
})
.collect();
Ok(result)
}
-167
View File
@@ -1,167 +0,0 @@
use std::str::FromStr;
use crate::assembler::AssembleError;
use crate::assembler::model::{Module, Opcode, Symbol, Token};
use common::prelude::Register;
pub fn lexer(mut program: String, module: u64) -> Result<Vec<Token>, AssembleError> {
let mut tokens = Vec::new();
program = program.replace(',', "");
let lines = program.lines();
let mut literal = String::new();
for line in lines {
for (i, token) in line.split_whitespace().enumerate() {
if token.starts_with("//") {
break;
}
if let Some(stripped) = token.strip_prefix('"') {
literal.push_str(stripped);
}
if !literal.is_empty() {
if !token.starts_with('"') {
if i > 0 {
literal.push(' ');
}
literal.push_str(token);
}
if token.ends_with('"') {
literal.pop(); // remove the closing quote
tokens.push(Token::StringLit(literal));
literal = String::new();
}
continue;
}
if let Some(token) = parse_register(token)? {
tokens.push(token);
} else if let Some(token) = parse_opcode(token)? {
tokens.push(token);
} else if let Some(token) = parse_hex(token)? {
tokens.push(token);
} else if let Some(token) = parse_octal(token)? {
tokens.push(token);
} else if let Some(token) = parse_binary(token)? {
tokens.push(token);
} else if let Some(token) = parse_decimal(token)? {
tokens.push(token);
} else if let Some(token) = parse_label(token, module)? {
tokens.push(token);
} else if let Some(token) = parse_symbol(token, module)? {
tokens.push(token);
} else {
return Err(AssembleError::Generic);
}
}
}
Ok(tokens)
}
pub fn parse_register(token: &str) -> Result<Option<Token>, AssembleError> {
Ok(Register::try_from(token).map(Token::Register).ok())
}
pub fn parse_opcode(token: &str) -> Result<Option<Token>, AssembleError> {
if Opcode::OPCODES.contains(&token) {
Ok(Some(Token::Opcode(Opcode::from_str(token).expect(
"Opcode::from_str failed for a valid opcode token",
))))
} else {
Ok(None)
}
}
pub fn parse_hex(token: &str) -> Result<Option<Token>, AssembleError> {
if (token.len() < 3) | !token.starts_with("0x") {
return Ok(None);
}
let Some(lit) = &token.get(2..) else {
return Err(AssembleError::InvalidArg);
};
u32::from_str_radix(lit, 16).map_or(Err(AssembleError::Generic), |value| {
Ok(Some(Token::Immediate(value)))
})
}
pub fn parse_octal(token: &str) -> Result<Option<Token>, AssembleError> {
if (token.len() < 3) | !token.starts_with("0o") {
return Ok(None);
}
let Some(lit) = &token.get(2..) else {
return Err(AssembleError::InvalidArg);
};
u32::from_str_radix(lit, 8).map_or(Err(AssembleError::Generic), |value| {
Ok(Some(Token::Immediate(value)))
})
}
pub fn parse_binary(token: &str) -> Result<Option<Token>, AssembleError> {
if (token.len() < 3) | !token.starts_with("0b") {
return Ok(None);
}
let Some(lit) = &token.get(2..) else {
return Err(AssembleError::InvalidArg);
};
u32::from_str_radix(lit, 2).map_or(Err(AssembleError::Generic), |value| {
Ok(Some(Token::Immediate(value)))
})
}
pub fn parse_decimal(token: &str) -> Result<Option<Token>, AssembleError> {
let Ok(tok) = token.parse::<u32>() else {
return Ok(None);
};
Ok(Some(Token::Immediate(tok)))
}
pub fn parse_label(token: &str, module: u64) -> Result<Option<Token>, AssembleError> {
if token.ends_with(':') {
Ok(Some(Token::Symbol(Symbol {
name: token[0..token.len() - 1].to_string(),
module: Module::Resolved(module),
})))
} else {
Ok(None)
}
}
pub fn parse_symbol(token: &str, module: u64) -> Result<Option<Token>, AssembleError> {
let Some(tokc) = token.chars().next() else {
return Err(AssembleError::Generic); // TODO: What is this error?
};
if tokc.is_numeric() {
return Ok(None);
}
let mut split = token.splitn(2, "::");
let Some(symbol1) = split.next() else {
return Err(AssembleError::InvalidArg);
};
let symbol1 = symbol1.to_string();
if let Some(symbol2) = split.next() {
Ok(Some(Token::Symbol(Symbol {
name: symbol2.to_string(),
module: Module::Unresolved(symbol1),
})))
} else {
Ok(Some(Token::Symbol(Symbol {
name: symbol1,
module: Module::Resolved(module),
})))
}
}
-139
View File
@@ -1,139 +0,0 @@
//! Macros used throughout the assembler
use crate::assembler::model::{Node, Opcode, Symbol, Token};
/// Parse DSA assembly code with optional formatting
///
/// # Examples
/// ```
/// // With formatting:
/// let nodes = dsa!(hash, "mov r1, {}", 42)?;
///
/// // Without formatting:
/// let nodes = dsa!(hash, "mov r1, 42")?;
/// ```
#[macro_export]
macro_rules! dsa {
// Version with formatting arguments
($hash:expr, $input:expr, $($args:expr),+) => {{
let input = format!($input, $($args),+);
let tokens = $crate::lexer::lexer(input, $hash)?;
let parsed = $crate::parser::Parser::parse_nodes(tokens)?;
parsed
}};
// Version without formatting
($hash:expr, $input:expr) => {{
let input = String::from($input);
let tokens = $crate::lexer::lexer(input, $hash)?;
let parsed = $crate::parser::Parser::parse_nodes(tokens)?;
parsed
}};
}
/// Creates a new Node with the given symbol, opcode, and tokens
#[macro_export]
macro_rules! node {
($symbol: expr, $opcode: expr, args: $tokens: expr) => {
$crate::assembler::model::Node::new($symbol.clone(), $opcode.clone(), $tokens.clone())
};
($symbol: expr, $opcode: expr, $($tokens: expr),+) => {
$crate::assembler::model::Node::new(
$symbol.clone(),
$opcode.clone(),
vec![$(node!(@convert_token $tokens)),+]
)
};
($symbol: expr, $opcode: expr) => {
$crate::assembler::model::Node::new(
$symbol.clone(),
$opcode.clone(),
Vec::new()
)
};
(@convert_token $token: literal) => {
$crate::assembler::model::Token::Immediate($token)
};
(@convert_token $token: expr) => {
$token.clone()
};
}
/// Extracts a specific token type from a token
#[macro_export]
macro_rules! expect_token {
($token:expr, Symbol) => {
match $token {
$crate::assembler::model::Token::Symbol(value) => Ok(value.clone()),
other => Err($crate::assembler::AssembleError::UnexpectedToken(
other.clone(),
$crate::assembler::model::TokenType::Symbol,
)),
}
};
($token:expr, Register) => {
match $token {
$crate::assembler::model::Token::Register(value) => Ok(value.clone()),
other => Err($crate::assembler::AssembleError::UnexpectedToken(
other.clone(),
$crate::assembler::model::TokenType::Register,
)),
}
};
($token:expr, Immediate) => {
match $token {
$crate::assembler::model::Token::Immediate(value) => Ok(value.clone()),
other => Err($crate::assembler::AssembleError::UnexpectedToken(
other.clone(),
$crate::assembler::model::TokenType::Immediate,
)),
}
};
($token:expr, StringLit) => {
match $token {
$crate::assembler::model::Token::StringLit(value) => Ok(value.clone()),
other => Err($crate::assembler::AssembleError::UnexpectedToken(
other.clone(),
$crate::assembler::model::TokenType::StringLit,
)),
}
};
($token:expr, Opcode) => {
match $token {
$crate::assembler::model::Token::Opcode(value) => Ok(value.clone()),
other => Err($crate::assembler::AssembleError::UnexpectedToken(
other.clone(),
$crate::assembler::model::TokenType::Opcode,
)),
}
};
}
/// Checks if a token matches any of the specified types
#[macro_export]
macro_rules! expect_type {
($token:expr, $($variant:ident),+) => {{
let token = $token;
match &token {
$(
$crate::assembler::model::Token::$variant(_) => Ok(token.clone()),
)+
other => {
let expected_type = expect_type!(@get_first_type $($variant),+);
Err($crate::assembler::AssembleError::UnexpectedToken(
other.clone().clone(),
expected_type,
))
}
}
}};
(@get_first_type Symbol $(, $rest:ident)*) => { $crate::assembler::model::TokenType::Symbol };
(@get_first_type Register $(, $rest:ident)*) => { $crate::assembler::model::TokenType::Register };
(@get_first_type Immediate $(, $rest:ident)*) => { $crate::assembler::model::TokenType::Immediate };
(@get_first_type StringLit $(, $rest:ident)*) => { $crate::assembler::model::TokenType::StringLit };
(@get_first_type Opcode $(, $rest:ident)*) => { $crate::assembler::model::TokenType::Opcode };
}
-266
View File
@@ -1,266 +0,0 @@
#![allow(dead_code, unused)]
use std::{
collections::HashSet,
fmt, fs,
hash::{DefaultHasher, Hash, Hasher},
path::{Path, PathBuf},
sync::{Arc, Mutex, mpsc},
thread,
};
use common::prelude::Instruction;
// TODO: Use an actual logging or tracing library for pretty (scoped) output.
fn log(message: &str) {
println!("\x1b[32mINFO:\x1b[0m {message}");
}
// Module declarations
#[macro_use]
pub mod macros;
#[allow(clippy::module_inception)]
pub mod assembler;
pub mod codegen;
pub mod expand;
pub mod lexer;
pub mod model;
pub mod parser;
pub mod resolver;
// Re-exports
pub use self::{
codegen::codegen,
expand::expand_pseudo_ops,
lexer::lexer,
model::{Module, Node, Opcode, Symbol, Token, TokenType},
parser::{Parser, Program},
resolver::{create_sections, resolve_dependencies, resolve_symbols},
};
use crate::util::logging::{Entry, Logger};
pub struct CompilerEngine {
result_tx: mpsc::Sender<Result<Vec<Instruction>, AssembleError>>,
result_rx: Option<mpsc::Receiver<Result<Vec<Instruction>, AssembleError>>>,
is_running: bool,
}
impl CompilerEngine {
#[must_use]
pub fn new() -> Self {
let (tx, rx) = mpsc::channel();
Self {
result_tx: tx,
result_rx: Some(rx),
is_running: false,
}
}
/// Start the compilation process in a separate thread
pub fn start_compilation(&mut self, src: &Path) {
if self.is_running {
return;
}
let src = src.to_path_buf();
let tx = self.result_tx.clone();
thread::spawn(move || {
let result = assemble(&src);
tx.send(result)
.expect("Failed to send compilation result from worker thread");
});
self.is_running = true;
}
/// Check if compilation is complete and get the result
pub fn try_get_result(&mut self) -> Option<Result<Vec<Instruction>, AssembleError>> {
if !self.is_running {
return None;
}
match self
.result_rx
.as_ref()
.expect("result_rx should be Some while compilation is running")
.try_recv()
{
Ok(result) => {
self.is_running = false;
Some(result)
}
Err(mpsc::TryRecvError::Empty) => None,
Err(mpsc::TryRecvError::Disconnected) => {
self.is_running = false;
Some(Err(AssembleError::Generic))
}
}
}
/// Block until compilation is complete and return the result
pub fn wait_for_result(&mut self) -> Result<Vec<Instruction>, AssembleError> {
if !self.is_running {
return Err(AssembleError::Generic);
}
if let Ok(result) = self
.result_rx
.take()
.expect("result_rx should be Some while waiting for compilation result")
.recv()
{
self.is_running = false;
result
} else {
self.is_running = false;
Err(AssembleError::Generic)
}
}
}
fn assemble(src: &Path) -> Result<Vec<Instruction>, AssembleError> {
let mut modules = HashSet::new();
let mut program = Program::new();
let hash = quick_hash(src);
if modules.contains(&hash) {
return Ok(vec![]);
}
prepare_dependency(src, &mut modules, &mut program)?;
let mut nodes = program.nodes.clone();
create_sections(&mut nodes)?;
resolve_symbols(&mut nodes)?;
let instructions = codegen(nodes)?;
Ok(instructions)
}
impl Default for CompilerEngine {
fn default() -> Self {
Self::new()
}
}
fn prepare_dependency(
path: &Path,
modules: &mut HashSet<u64>,
program: &mut Program,
) -> Result<(), AssembleError> {
let filename = path
.file_name()
.and_then(|n| n.to_str())
.expect("Failed to get file name from path");
if let Ok(path) = path.canonicalize() {
log(&format!(
"{:20} {:20} [{}]",
"Building",
filename,
path.display()
));
}
let src = fs::read_to_string(path)
.map_err(|_| AssembleError::InvalidFile(path.to_path_buf()))?;
let file_hash = quick_hash(path);
log(&format!("{:20} {:20}", "Tokenising", filename));
let tokens = lexer::lexer(src, file_hash)?;
log(&format!("{:20} {:20}", "Parsing", filename));
let parsed = Parser::parse_nodes(tokens)?;
log(&format!("{:20} {:20}", "Resolving Deps", filename));
// Get the parent directory of the source file to use as the base directory
let base_dir = path
.parent()
.ok_or_else(|| AssembleError::InvalidFile(path.to_path_buf()))?;
let mut nodes = expand_pseudo_ops(parsed, file_hash)?;
nodes = resolve_dependencies(nodes, base_dir)?;
let deps = Parser::get_dependencies(&nodes, path)?;
log(&format!(
"{:20} {:20}",
"Expanding PseudoInstructions", filename
));
// add a section instruction
nodes.insert(
0,
node!(None, Opcode::Segment, Token::Immediate(file_hash as u32)),
);
for n in &nodes {
println!("{n}");
}
program.add_module(nodes);
for dep in deps {
log(&format!(
"{:20} {:20}",
"Including",
dep.file_name()
.and_then(|f| f.to_str())
.expect("Dependency path has no file name or is not valid UTF-8")
));
let dep_hash = quick_hash(&dep);
if modules.insert(dep_hash) {
prepare_dependency(dep.as_path(), modules, program)?;
}
}
Ok(())
}
#[derive(Debug, Clone)]
pub enum AssembleError {
Generic,
UnexpectedEof,
InvalidFile(PathBuf),
UnexpectedToken(Token, TokenType),
InvalidArg,
UndefinedSymbol(Symbol),
/// Contains the nth element missing from the instruction.
MissingArgument(u8),
}
impl fmt::Display for AssembleError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Generic => write!(f, "Generic error"),
Self::UnexpectedToken(tok, expected) => {
write!(f, "Unexpected token {tok:?}, expected {expected:?}")
}
Self::UnexpectedEof => write!(f, "Unexpected end of file"),
Self::InvalidFile(path) => write!(f, "Invalid file `{}`", path.display()),
Self::InvalidArg => write!(f, "Invalid argument"),
Self::UndefinedSymbol(symbol) => {
write!(f, "Undefined symbol {symbol}")
}
Self::MissingArgument(n) => {
write!(f, "Missing argument #{n} from instruction arguments.")
}
}
}
}
fn quick_hash(value: &Path) -> u64 {
let mut hasher = DefaultHasher::new();
value
.canonicalize()
.expect("Failed to canonicalize path for quick_hash")
.to_str()
.hash(&mut hasher);
hasher.finish()
}
-419
View File
@@ -1,419 +0,0 @@
use std::{fmt, str::FromStr};
use common::prelude::Register;
use crate::assembler::AssembleError;
#[derive(Debug, Clone)]
pub struct Node {
pub symbol: Option<Symbol>,
pub opcode: Opcode,
pub tokens: Vec<Token>,
}
impl Node {
#[must_use]
pub const fn new(symbol: Option<Symbol>, opcode: Opcode, tokens: Vec<Token>) -> Self {
Self {
symbol,
opcode,
tokens,
}
}
#[must_use]
pub fn label(&self) -> Option<Symbol> {
self.symbol.clone()
}
#[must_use]
pub const fn opcode(&self) -> Opcode {
self.opcode
}
#[must_use]
pub fn args(&self) -> Vec<Token> {
self.tokens.clone()
}
pub fn arg(&self, index: usize) -> Result<Token, AssembleError> {
self.args()
.get(index)
.cloned()
.ok_or(AssembleError::InvalidArg)
}
}
impl fmt::Display for Node {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let symbol = self
.label()
.as_ref()
.map_or_else(String::new, |symbol| format!("{symbol}:\n"));
write!(
f,
"\x1b[93m{} \t\x1b[94m{} \x1b[37m{:?} \x1b[0m",
symbol,
self.opcode(),
self.args()
)
}
}
impl fmt::Display for Symbol {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{} ( module: {})", self.name, self.module)
}
}
impl fmt::Display for Module {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::Unresolved(name) => write!(f, "{name}"),
Self::Resolved(name) => write!(f, "{name}"),
}
}
}
impl fmt::Display for Opcode {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::Nop => write!(f, "nop"),
Self::Mov => write!(f, "mov"),
Self::Movs => write!(f, "movs"),
Self::Ldb => write!(f, "ldb"),
Self::Ldbs => write!(f, "ldbs"),
Self::Ldh => write!(f, "ldh"),
Self::Ldhs => write!(f, "ldhs"),
Self::Ldw => write!(f, "ldw"),
Self::Stb => write!(f, "stb"),
Self::Sth => write!(f, "sth"),
Self::Stw => write!(f, "stw"),
Self::Lli => write!(f, "lli"),
Self::Lui => write!(f, "lui"),
Self::Jmp => write!(f, "jmp"),
Self::Jeq => write!(f, "jeq"),
Self::Jne => write!(f, "jne"),
Self::Jgt => write!(f, "jgt"),
Self::Jge => write!(f, "jge"),
Self::Jlt => write!(f, "jlt"),
Self::Jle => write!(f, "jle"),
Self::Cmp => write!(f, "cmp"),
Self::Inc => write!(f, "inc"),
Self::Dec => write!(f, "dec"),
Self::Shl => write!(f, "shl"),
Self::Shr => write!(f, "shr"),
Self::Add => write!(f, "add"),
Self::Sub => write!(f, "sub"),
Self::And => write!(f, "and"),
Self::Or => write!(f, "or"),
Self::Not => write!(f, "not"),
Self::Xor => write!(f, "xor"),
Self::Nand => write!(f, "nand"),
Self::Nor => write!(f, "nor"),
Self::Xnor => write!(f, "xnor"),
Self::Int => write!(f, "int"),
Self::Irt => write!(f, "irt"),
Self::Hlt => write!(f, "hlt"),
Self::AddI => write!(f, "addi"),
Self::SubI => write!(f, "subi"),
Self::Db => write!(f, "db"),
Self::Dh => write!(f, "dh"),
Self::Dw => write!(f, "dw"),
Self::Resb => write!(f, "resb"),
Self::Resh => write!(f, "resh"),
Self::Resw => write!(f, "resw"),
Self::Push => write!(f, "push"),
Self::Pop => write!(f, "pop"),
Self::Lwi => write!(f, "lwi"),
Self::Call => write!(f, "call"),
Self::Return => write!(f, "return"),
Self::Pusha => write!(f, "pusha"),
Self::Popa => write!(f, "popa"),
// meta instructions
Self::Include => write!(f, "include"),
Self::Data => write!(f, "data"),
Self::Segment => write!(f, "[SEGMENT]"),
}
}
}
#[derive(Debug, Clone, Eq)]
pub struct Symbol {
pub name: String,
pub module: Module,
}
impl std::hash::Hash for Symbol {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.name.hash(state);
self.module.hash(state);
}
}
impl PartialEq for Symbol {
fn eq(&self, other: &Self) -> bool {
self.name == other.name && self.module == other.module
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum Module {
Resolved(u64),
Unresolved(String),
}
#[derive(Debug, Clone)]
pub enum Token {
Symbol(Symbol),
Register(Register),
Immediate(u32),
StringLit(String),
Opcode(Opcode),
}
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
pub enum TokenType {
Symbol,
Register,
Immediate,
StringLit,
Opcode,
}
impl TokenType {
#[must_use]
pub const fn from_token(token: &Token) -> Self {
match token {
Token::Symbol(_) => Self::Symbol,
Token::Register(_) => Self::Register,
Token::Immediate(_) => Self::Immediate,
Token::StringLit(_) => Self::StringLit,
Token::Opcode(_) => Self::Opcode,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Opcode {
// Real instructions (0x00-0x26)
Nop,
Mov,
Movs,
Ldb,
Ldbs,
Ldh,
Ldhs,
Ldw,
Stb,
Sth,
Stw,
Lli,
Lui,
Jmp,
Jeq,
Jne,
Jgt,
Jge,
Jlt,
Jle,
Cmp,
Inc,
Dec,
Shl,
Shr,
Add,
Sub,
And,
Or,
Not,
Xor,
Nand,
Nor,
Xnor,
Int,
Irt,
Hlt,
AddI,
SubI,
// Pseudo-instructions
Db,
Dh,
Dw,
Resb,
Resh,
Resw,
Push,
Pop,
Pusha,
Popa,
Lwi,
Call,
Return,
// meta instructions (these aren't present in the binary as instructions)
Include,
Data,
Segment,
}
#[derive(Debug)]
pub enum OpcodeFromStrError {
InvalidRegister(&'static str),
InvalidOpcode(String),
}
impl std::fmt::Display for OpcodeFromStrError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::InvalidRegister(reg) => write!(f, "register does not exist: {reg}"),
Self::InvalidOpcode(op) => write!(f, "instruction does not exist: {op}"),
}
}
}
impl std::error::Error for OpcodeFromStrError {}
impl FromStr for Opcode {
type Err = OpcodeFromStrError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"nop" => Ok(Self::Nop),
"mov" => Ok(Self::Mov),
"movs" => Ok(Self::Movs),
"ldb" => Ok(Self::Ldb),
"ldbs" => Ok(Self::Ldbs),
"ldh" => Ok(Self::Ldh),
"ldhs" => Ok(Self::Ldhs),
"ldw" => Ok(Self::Ldw),
"stb" => Ok(Self::Stb),
"sth" => Ok(Self::Sth),
"stw" => Ok(Self::Stw),
"lli" => Ok(Self::Lli),
"lui" => Ok(Self::Lui),
"jmp" => Ok(Self::Jmp),
"jeq" => Ok(Self::Jeq),
"jne" => Ok(Self::Jne),
"jgt" => Ok(Self::Jgt),
"jge" => Ok(Self::Jge),
"jlt" => Ok(Self::Jlt),
"jle" => Ok(Self::Jle),
"cmp" => Ok(Self::Cmp),
"inc" => Ok(Self::Inc),
"dec" => Ok(Self::Dec),
"shl" => Ok(Self::Shl),
"shr" => Ok(Self::Shr),
"add" => Ok(Self::Add),
"sub" => Ok(Self::Sub),
"and" => Ok(Self::And),
"or" => Ok(Self::Or),
"not" => Ok(Self::Not),
"xor" => Ok(Self::Xor),
"nand" => Ok(Self::Nand),
"nor" => Ok(Self::Nor),
"xnor" => Ok(Self::Xnor),
"int" => Ok(Self::Int),
"irt" => Ok(Self::Irt),
"hlt" => Ok(Self::Hlt),
"addi" => Ok(Self::AddI),
"subi" => Ok(Self::SubI),
"db" => Ok(Self::Db),
"dh" => Ok(Self::Dh),
"dw" => Ok(Self::Dw),
"resb" => Ok(Self::Resb),
"resh" => Ok(Self::Resh),
"resw" => Ok(Self::Resw),
"push" => Ok(Self::Push),
"pop" => Ok(Self::Pop),
"lwi" => Ok(Self::Lwi),
"include" => Ok(Self::Include),
"call" => Ok(Self::Call),
"return" => Ok(Self::Return),
"pusha" => Ok(Self::Pusha),
"popa" => Ok(Self::Popa),
_ => Err(OpcodeFromStrError::InvalidOpcode(s.to_string())),
}
}
}
impl Opcode {
pub const OPCODES: &[&str] = &[
// Real instructions (0x00-0x26)
"nop", "mov", "movs", "ldb", "ldbs", "ldh", "ldhs", "ldw", "stb", "sth", "stw",
"lli", "lui", "jmp", "jeq", "jne", "jgt", "jge", "jlt", "jle", "cmp", "inc",
"dec", "shl", "shr", "add", "sub", "and", "or", "not", "xor", "nand", "nor",
"xnor", "int", "irt", "hlt", "addi", "subi", // Pseudo-instructions
"db", "dh", "dw", "resb", "resh", "resw", "push", "pop", "lwi", "call", "return",
"pusha", "popa", // meta instructions
"include",
];
#[must_use]
pub const fn to_opcode_value(&self) -> Option<u8> {
match self {
Self::Nop => Some(0x00),
Self::Mov => Some(0x01),
Self::Movs => Some(0x02),
Self::Ldb => Some(0x03),
Self::Ldbs => Some(0x04),
Self::Ldh => Some(0x05),
Self::Ldhs => Some(0x06),
Self::Ldw => Some(0x07),
Self::Stb => Some(0x08),
Self::Sth => Some(0x09),
Self::Stw => Some(0x0A),
Self::Lli => Some(0x0B),
Self::Lui => Some(0x0C),
Self::Jmp => Some(0x0D),
Self::Jeq => Some(0x0E),
Self::Jne => Some(0x0F),
Self::Jgt => Some(0x10),
Self::Jge => Some(0x11),
Self::Jlt => Some(0x12),
Self::Jle => Some(0x13),
Self::Cmp => Some(0x14),
Self::Inc => Some(0x15),
Self::Dec => Some(0x16),
Self::Shl => Some(0x17),
Self::Shr => Some(0x18),
Self::Add => Some(0x19),
Self::Sub => Some(0x1A),
Self::And => Some(0x1B),
Self::Or => Some(0x1C),
Self::Not => Some(0x1D),
Self::Xor => Some(0x1E),
Self::Nand => Some(0x1F),
Self::Nor => Some(0x20),
Self::Xnor => Some(0x21),
Self::Int => Some(0x22),
Self::Irt => Some(0x23),
Self::Hlt => Some(0x24),
Self::AddI => Some(0x25),
Self::SubI => Some(0x26),
Self::Segment => Some(0x27),
// Pseudo-instructions don't have opcode values
_ => None,
}
}
#[must_use]
pub const fn is_pseudo_instruction(&self) -> bool {
matches!(
self,
Self::Db
| Self::Dh
| Self::Dw
| Self::Resb
| Self::Resh
| Self::Resw
| Self::Push
| Self::Pop
| Self::Lwi
)
}
}
-368
View File
@@ -1,368 +0,0 @@
use std::path::{Path, PathBuf};
use crate::{assembler::AssembleError, expect_token, expect_type, node};
use crate::assembler::model::{Node, Opcode, Token};
use common::prelude::*;
pub struct Parser {
tokens: Vec<Token>,
nodes: Vec<Node>,
}
#[derive(Debug)]
pub struct Program {
pub nodes: Vec<Node>,
}
impl Program {
#[must_use]
pub const fn new() -> Self {
Self { nodes: vec![] }
}
pub fn add_module(&mut self, module: Vec<Node>) {
self.nodes.extend(module);
}
pub fn parser(&mut self) -> Parser {
Parser {
tokens: vec![],
nodes: self.nodes.clone(),
}
}
}
impl Default for Program {
fn default() -> Self {
Self::new()
}
}
impl Parser {
pub fn parse_nodes(tokens: Vec<Token>) -> Result<Vec<Node>, AssembleError> {
let mut self_ = Self {
tokens: tokens.into_iter().rev().collect(),
nodes: vec![],
};
while !self_.tokens.is_empty() {
let ins = self_.parse_instruction()?;
self_.nodes.push(ins);
}
Ok(self_.nodes.clone())
}
pub fn get_dependencies(
nodes: &Vec<Node>,
source_path: &Path,
) -> Result<Vec<PathBuf>, AssembleError> {
let mut dependencies = Vec::new();
// Get the parent directory of the source file to use as the base directory
let base_dir = source_path
.parent()
.ok_or_else(|| AssembleError::InvalidFile(source_path.to_path_buf()))?;
for node in nodes {
if node.opcode() == Opcode::Include {
let path_str = expect_token!(
node.args().get(1).ok_or(AssembleError::Generic)?,
StringLit
)?;
let path = PathBuf::from(path_str);
// If the path is not absolute, make it relative to the base directory
let full_path = if path.is_absolute() {
path
} else {
base_dir.join(path)
};
dependencies.push(full_path);
}
}
Ok(dependencies)
}
#[expect(clippy::too_many_lines, clippy::cognitive_complexity)]
fn parse_instruction(&mut self) -> Result<Node, AssembleError> {
if self.tokens.is_empty() {
unreachable!();
}
// check if the Node starts with a label
let label = expect_token!(self.peek_next()?, Symbol).ok();
if label.is_some() {
self.tokens.pop();
}
let opcode = expect_token!(self.next()?, Opcode)?;
let args: Vec<Token>;
match opcode {
// R-type instructions
Opcode::Mov | Opcode::Movs => {
let reg1 = expect_type!(self.next()?, Register, Symbol)?;
let reg2 = expect_type!(self.next()?, Register, Symbol)?;
args = vec![reg1, reg2];
}
Opcode::Ldb | Opcode::Ldbs | Opcode::Ldh | Opcode::Ldhs | Opcode::Ldw => {
let base = expect_type!(self.next()?, Register, Symbol)?;
let dest = expect_type!(self.next()?, Register)?;
let mut offset = Token::Immediate(0);
if let Ok(next) = self.peek_next()
&& expect_type!(next, Immediate).is_ok() {
offset = self.next()?;
}
args = vec![base, dest, offset];
}
Opcode::Stb | Opcode::Sth | Opcode::Stw => {
let base = expect_type!(self.next()?, Register)?;
let dest = expect_type!(self.next()?, Register, Symbol)?;
let mut offset = Token::Immediate(0);
if let Ok(next) = self.peek_next()
&& expect_type!(next, Immediate).is_ok() {
offset = self.next()?;
}
args = vec![base, dest, offset];
}
Opcode::Add
| Opcode::Sub
| Opcode::And
| Opcode::Or
| Opcode::Xor
| Opcode::Nand
| Opcode::Nor
| Opcode::Xnor => {
let src1 = expect_type!(self.next()?, Register, Symbol)?;
let src2 = expect_type!(self.next()?, Register, Symbol)?;
let dest = expect_type!(self.next()?, Register, Symbol)?;
args = vec![src1, src2, dest];
}
Opcode::Not | Opcode::Cmp => {
let reg1 = expect_type!(self.next()?, Register, Symbol)?;
let reg2 = expect_type!(self.next()?, Register, Symbol)?;
args = vec![reg1, reg2];
}
Opcode::Shl | Opcode::Shr => {
let reg = expect_type!(self.next()?, Register, Symbol)?;
let num = expect_type!(self.next()?, Immediate)?;
args = vec![reg, num];
}
Opcode::Inc | Opcode::Dec => {
let reg = expect_type!(self.next()?, Register, Symbol)?;
args = vec![reg];
}
Opcode::Include => {
let mod_name = expect_type!(self.next()?, Symbol)?;
let path = expect_type!(self.next()?, StringLit)?;
args = vec![mod_name, path];
}
// J-type instructions
Opcode::Jmp
| Opcode::Jeq
| Opcode::Jne
| Opcode::Jgt
| Opcode::Jge
| Opcode::Jlt
| Opcode::Jle => {
let imm = expect_type!(self.next()?, Immediate, Symbol)?;
let offset = match self.peek_next() {
Ok(token) => {
if expect_type!(token, Register).is_ok() {
self.next()?
} else {
Token::Register(Register::Zero)
}
}
Err(_) => Token::Register(Register::Zero),
};
args = vec![imm, offset];
}
Opcode::Call => {
let addr = expect_type!(self.next()?, Symbol)?;
args = vec![addr];
}
// I-type instructions
Opcode::Lui | Opcode::Lli | Opcode::Lwi => {
let imm = expect_type!(self.next()?, Immediate, Symbol)?;
let reg = expect_type!(self.next()?, Register)?;
args = vec![imm, reg];
}
// Immediate Arithmetic
Opcode::AddI | Opcode::SubI => {
let reg = expect_type!(self.next()?, Register)?;
let imm = expect_type!(self.next()?, Immediate)?;
let reg2 = if expect_type!(self.peek_next()?, Register).is_ok() {
self.next()?
} else {
reg.clone()
};
args = vec![reg, imm, reg2];
}
// D-type pseudoinstructions (data definition)
Opcode::Resb | Opcode::Resh | Opcode::Resw => {
let name = expect_type!(self.next()?, Symbol)?;
let num = expect_type!(self.next()?, Immediate)?;
args = vec![name, num];
}
Opcode::Db | Opcode::Dh | Opcode::Dw => {
args = self.parse_data_definition(opcode)?;
}
// E-type pseudoinstructions (stack operations)
Opcode::Push | Opcode::Pop => {
let reg = expect_type!(self.next()?, Register, Symbol)?;
args = vec![reg];
}
Opcode::Pusha | Opcode::Popa => {
let count =
expect_type!(self.next()?, Immediate).unwrap_or(Token::Immediate(8));
args = vec![count];
}
// Special instructions
Opcode::Int => {
let val = expect_type!(self.next()?, Immediate)?;
args = vec![val];
}
// Instructions with no arguments
Opcode::Hlt | Opcode::Nop | Opcode::Irt | Opcode::Return => {
args = vec![];
}
Opcode::Data | Opcode::Segment => {
return Err(AssembleError::Generic);
}
}
Ok(node!(label, opcode, args: args))
}
fn parse_data_definition(
&mut self,
opcode: Opcode,
) -> Result<Vec<Token>, AssembleError> {
let mut values = Vec::new();
let name = expect_type!(self.next()?, Symbol)?;
values.push(name);
match opcode {
Opcode::Db => {
// db can take string literals or u8 immediates
while !self.tokens.is_empty() {
let token = self
.tokens
.last()
.expect("Expected a token for data definition, but found none");
match token {
Token::StringLit(_) => {
values.push(self.tokens.pop().expect(
"Expected a token for data definition, but found none",
));
}
Token::Immediate(val) if u8::try_from(*val).is_ok() => {
values.push(self.tokens.pop().expect(
"Expected a token for data definition, but found none",
));
}
_ => break,
}
}
}
Opcode::Dh => {
// dh can take u16 immediates
while !self.tokens.is_empty() {
let token = self
.tokens
.last()
.expect("Expected a token for data definition, but found none");
match token {
Token::StringLit(_) => {
values.push(self.tokens.pop().expect(
"Expected a token for data definition, but found none",
));
}
Token::Immediate(val) if u16::try_from(*val).is_ok() => {
values.push(self.tokens.pop().expect(
"Expected a token for data definition, but found none",
));
}
_ => break,
}
}
}
Opcode::Dw => {
// dw can take u32 immediates
while !self.tokens.is_empty() {
match self
.tokens
.last()
.expect("Expected a token for data definition, but found none")
{
Token::StringLit(_) => {
values.push(self.tokens.pop().expect(
"Expected a token for data definition, but found none",
));
}
Token::Immediate(val) => {
values.push(self.tokens.pop().expect(
"Expected a token for data definition, but found none",
));
}
_ => break,
}
}
}
_ => unreachable!(),
}
Ok(values)
}
fn next(&mut self) -> Result<Token, AssembleError> {
if self.tokens.is_empty() {
Err(AssembleError::UnexpectedEof)
} else {
Ok(self
.tokens
.pop()
.expect("tokens vector was unexpectedly empty in next()"))
}
}
fn peek_next(&self) -> Result<Token, AssembleError> {
if self.tokens.is_empty() {
Err(AssembleError::UnexpectedEof)
} else {
Ok(self
.tokens
.last()
.expect("peek_next called on empty tokens vector")
.clone())
}
}
}
-156
View File
@@ -1,156 +0,0 @@
use std::{
collections::HashMap,
fs::canonicalize,
path::{Path, PathBuf},
};
use common::prelude::Register;
use crate::assembler::quick_hash;
use crate::assembler::{
log,
model::{Module, Node, Opcode, Symbol, Token},
};
use crate::{assembler::AssembleError, node};
pub fn resolve_symbols(nodes: &mut [Node]) -> Result<(), AssembleError> {
let symbol_table = generate_symbol_table(nodes);
for node in nodes.iter_mut() {
match node.opcode() {
Opcode::Jmp
| Opcode::Jeq
| Opcode::Jne
| Opcode::Jgt
| Opcode::Jge
| Opcode::Jlt
| Opcode::Jle
| Opcode::Lli
| Opcode::Lui => {
if let Token::Symbol(symbol) = node
.arg(0)
.expect("Expected argument 0 for jump-like opcode")
{
if let Some(address) = symbol_table.get(&symbol) {
node.tokens[0] = Token::Immediate(*address);
} else {
return Err(AssembleError::UndefinedSymbol(symbol));
}
}
}
_ => (),
}
}
Ok(())
}
fn generate_symbol_table(nodes: &[Node]) -> HashMap<Symbol, u32> {
let mut table = HashMap::new();
for (i, node) in nodes.iter().enumerate() {
if let Some(symbol) = node.label() {
table.insert(symbol, 4 * i as u32);
}
}
table
}
pub fn resolve_dependencies(
mut nodes: Vec<Node>,
base_dir: &Path,
) -> Result<Vec<Node>, AssembleError> {
// First we get a list of imports.
let mut dependencies = Vec::new();
for node in &nodes {
if node.opcode() == Opcode::Include {
// we want the path, and the name
let name = if let Token::Symbol(name) = node
.arg(0)
.expect("Expected argument #0 for Include directive.")
{
name.name.clone()
} else {
unreachable!()
}; //node.2.get(0).unwrap()
let Ok(Token::StringLit(path)) = node.arg(1) else {
unreachable!()
};
let full_path = base_dir.join(path);
let canonical_path = full_path
.canonicalize()
.map_err(|_| AssembleError::InvalidFile(full_path.clone()))?;
let hash = quick_hash(&canonical_path);
dependencies.push((name, hash));
}
}
let mut changes = Vec::<(u32, u32, Symbol)>::new();
// now we resolve the symbols on all the nodes
// we need to check all operands for unresolved signals
for (i, node) in nodes.clone().iter().enumerate() {
let Node {
tokens: operands, ..
} = node;
for (j, token) in operands.iter().enumerate() {
if let Token::Symbol(symbol) = token {
for d in &dependencies {
if let Module::Unresolved(name) = symbol.module.clone() {
if name != d.0 {
continue;
}
let symbol = Symbol {
name: symbol.name.clone(),
module: Module::Resolved(d.1),
};
changes.push((i as u32, j as u32, symbol));
}
}
}
}
}
for (i, j, symbol) in changes {
nodes[i as usize].tokens[j as usize] = Token::Symbol(symbol);
}
Ok(nodes)
}
pub fn create_sections(nodes: &mut Vec<Node>) -> Result<(), AssembleError> {
let mut res = Vec::<Node>::with_capacity(nodes.len());
res.push(node!(None, Opcode::Segment, Token::Immediate(0)));
for n in nodes.iter() {
if n.opcode() == Opcode::Data {
res.push(n.clone());
}
}
let start = res.len() + 1;
res.insert(
0,
node!(
None,
Opcode::Jmp,
Token::Immediate(start as u32 * 4),
Token::Register(Register::Zero)
),
);
for n in nodes.iter() {
if !matches!(n.opcode(), Opcode::Data | Opcode::Include) {
res.push(n.clone());
}
}
*nodes = res;
Ok(())
}
+27
View File
@@ -0,0 +1,27 @@
//! This module contains the global asembler context to be passed to functions that need
//! it.
use std::sync::RwLock;
use crate::{model::module_registry::ModuleRegistry, symtab::SymbolTable};
/// Global state to be passed around.
pub struct AssemblerContext {
pub symbol_table: RwLock<SymbolTable>,
pub module_registry: RwLock<ModuleRegistry>,
}
impl Default for AssemblerContext {
fn default() -> Self {
Self::new()
}
}
impl AssemblerContext {
#[must_use] pub fn new() -> Self {
Self {
symbol_table: RwLock::new(SymbolTable::new()),
module_registry: RwLock::new(ModuleRegistry::new()),
}
}
}
+82
View File
@@ -0,0 +1,82 @@
//! This module contains code for various types of errors that may occur when assembling a
//! set of source DSA files.
use std::fmt::{Debug, Display};
use crate::source::{source_info::SourceInfo, tokeniser::error::TokeniserError};
/// An error that may occur during the assembly of a set of source files.
#[derive(Debug)]
pub struct AssembleError {
/// Display implementation can handle when the source code information is shown or
/// not.
source_info: Option<SourceInfo>,
/// The type of assembly error that occurred.
kind: AssembleErrorKind,
}
impl AssembleError {
#[must_use]
pub const fn new_source_error(
source_info: SourceInfo,
kind: AssembleErrorKind,
) -> Self {
Self {
source_info: Some(source_info),
kind,
}
}
#[must_use]
pub const fn new_other_error(kind: AssembleErrorKind) -> Self {
Self {
source_info: None,
kind,
}
}
}
impl Display for AssembleError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if let Some(info) = &self.source_info {
write!(f, "at {info}")?;
}
write!(f, "{}", self.kind)?;
Ok(())
}
}
/// Marker trait.
impl std::error::Error for AssembleError {}
/// Different types of errors that may occur when assembling a set of input source files.
#[non_exhaustive]
#[derive(Debug)]
pub enum AssembleErrorKind {
/// Usually unexpected I/O errors. Not normally recoverable.
IO(std::io::Error),
/// Errors emitted from the [`Tokeniser`].
Tokenise(TokeniserError),
}
impl Display for AssembleErrorKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Tokenise(why) => write!(f, "tokeniser error: {why}"),
_ => write!(
f,
"unhandled error type in Display implementation! See error.rs!"
),
}
}
}
impl From<std::io::Error> for AssembleErrorKind {
fn from(err: std::io::Error) -> Self {
Self::IO(err)
}
}
pub mod conversions;
+7
View File
@@ -0,0 +1,7 @@
use crate::error::AssembleError;
impl From<std::io::Error> for AssembleError {
fn from(err: std::io::Error) -> Self {
Self::new_other_error(err.into())
}
}
+9 -5
View File
@@ -13,16 +13,20 @@
)] )]
pub mod args; pub mod args;
pub mod assembler;
pub mod image_builder; pub mod image_builder;
pub mod tooling; // pub mod tooling;
pub mod context;
pub mod error;
pub mod model;
pub mod source;
pub mod symtab;
mod util; mod util;
pub mod prelude { pub mod prelude {
pub use crate::assembler::CompilerEngine;
pub use crate::image_builder; pub use crate::image_builder;
pub use crate::tooling::brainf; // pub use crate::tooling::brainf;
pub use crate::tooling::project; // pub use crate::tooling::project;
} }
use num_cpus as _; use num_cpus as _;
+54 -59
View File
@@ -2,67 +2,62 @@ use common as _;
use num_cpus as _; use num_cpus as _;
use threadpool as _; use threadpool as _;
use assembler::{ // use clap::Parser;
prelude::*, // use std::{fs, io::Write, path::PathBuf};
tooling::{brainf, project},
};
use clap::Parser;
use std::{fs, io::Write, path::PathBuf};
fn main() { fn main() {
// Parse command line arguments // // Parse command line arguments
let args: Vec<String> = std::env::args().collect(); // let args: Vec<String> = std::env::args().collect();
let _clap_args = assembler::args::Args::parse(); // let _clap_args = assembler::args::Args::parse();
if args.len() == 2 && args[1] == "init" { // if args.len() == 2 && args[1] == "init" {
project::tool_libcreate(); // // project::tool_libcreate();
std::process::exit(0); // std::process::exit(0);
} // }
if args.len() == 2 && args[1] == "brainf" { // if args.len() == 2 && args[1] == "brainf" {
let src = PathBuf::from("brainf.bf"); // let src = PathBuf::from("brainf.bf");
let result = brainf::build(&src); // // let result = brainf::build(&src);
let mut file = match fs::File::create("brainf.dsb") { // let mut file = match fs::File::create("brainf.dsb") {
Err(e) => { // Err(e) => {
eprintln!("Failed to create output file: {e}"); // eprintln!("Failed to create output file: {e}");
std::process::exit(1); // std::process::exit(1);
} // }
Ok(file) => file, // Ok(file) => file,
}; // };
for instruction in result { // // for instruction in result {
if let Err(e) = file.write(&instruction.encode().to_be_bytes()) { // // if let Err(e) = file.write(&instruction.encode().to_be_bytes()) {
eprintln!("Failed to write to output file: {e}"); // // eprintln!("Failed to write to output file: {e}");
std::process::exit(1); // // std::process::exit(1);
} // // }
} // // }
std::process::exit(0); // std::process::exit(0);
} // }
if args.len() != 5 || args[1] != "-i" || args[3] != "-o" { // if args.len() != 5 || args[1] != "-i" || args[3] != "-o" {
eprintln!("Usage: {} -i input_path -o output_path", args[0]); // eprintln!("Usage: {} -i input_path -o output_path", args[0]);
std::process::exit(1); // std::process::exit(1);
} // }
let input_path = &args[2]; // let input_path = &args[2];
let output_path = &args[4]; // let output_path = &args[4];
let src = PathBuf::from(input_path); // let src = PathBuf::from(input_path);
// Initialize the compiler engine // // Initialize the compiler engine
let mut compiler = CompilerEngine::new(); // let mut compiler = CompilerEngine::new();
compiler.start_compilation(&src); // compiler.start_compilation(&src);
// Or block until done // // Or block until done
let result = compiler.wait_for_result().unwrap(); // let result = compiler.wait_for_result().unwrap();
for instruction in result { // for instruction in result {
if let Err(e) = fs::write(output_path, instruction.encode().to_be_bytes()) { // if let Err(e) = fs::write(output_path, instruction.encode().to_be_bytes()) {
eprintln!("Failed to write to output file: {e}"); // eprintln!("Failed to write to output file: {e}");
std::process::exit(1); // std::process::exit(1);
} // }
} // }
} }
+5
View File
@@ -0,0 +1,5 @@
//! This module contains the underlying data models and enums used by the Assembler.
pub mod module;
pub mod module_registry;
pub mod symbol;
+68
View File
@@ -0,0 +1,68 @@
//! This module contains the [`Module`] type and associated types. Each compilation unit
//! (file) is represented by a module which is used to namespace "function" calls and
//! accesses to global variables.
//!
//! They have unique identifiers in the form of UUIDs.
use std::path::{Path, PathBuf};
use uuid::Uuid;
use crate::model::module_registry::ModuleRegistry;
/// The ID for a module. A tuple struct for type safety.
#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)]
pub struct ModuleId(Uuid);
impl ModuleId {
#[must_use]
pub const fn from_module(module: &Module) -> Self {
module.id
}
/// Convenience method to get the [`Module`] from a [`ModuleId`].
#[must_use]
pub fn to_module<'m>(&self, registry: &'m ModuleRegistry) -> Option<&'m Module> {
registry.get(self)
}
/// Convenience method to get the [`Module`] name from a [`ModuleId`].
#[must_use]
pub fn to_module_name(self, registry: &ModuleRegistry) -> Option<&str> {
self.to_module(registry).map(|module| module.name.as_str())
}
}
impl std::fmt::Display for ModuleId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
/// A single source file or compilation unit. Stores its own symbol table.
#[derive(Debug)]
pub struct Module {
/// The name of the module. This is typically the name of the file, less the `.dsa`
/// extension.
pub name: String,
/// The file path to the module. This is an absolute path.
pub path: PathBuf,
/// A unique ID for this module.
pub id: ModuleId,
}
impl std::hash::Hash for Module {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.id.0.hash(state);
}
}
impl Module {
pub fn new<P: AsRef<Path>>(name: String, path: P) -> Self {
Self {
name,
path: path.as_ref().to_path_buf(),
id: ModuleId(Uuid::new_v4()),
}
}
}
+42
View File
@@ -0,0 +1,42 @@
//! This module contains the code for the module registry. This is a singleton storing all
//! the modules being assembled.
use std::collections::HashMap;
use super::module::{Module, ModuleId};
/// Stores all the [`Module`]'s to be assembled.
pub struct ModuleRegistry {
modules: HashMap<ModuleId, Module>,
}
impl Default for ModuleRegistry {
fn default() -> Self {
Self::new()
}
}
impl ModuleRegistry {
#[must_use] pub fn new() -> Self {
Self {
modules: HashMap::new(),
}
}
/// Gets a [`Module`] by ID.
#[must_use] pub fn get(&self, module_id: &ModuleId) -> Option<&Module> {
self.modules.get(module_id)
}
/// Adds a [`Module`] and returns its [`ModuleId`].
pub fn add(&mut self, module: Module) -> ModuleId {
let id = module.id;
self.modules.insert(id, module);
id
}
/// Returns an iterator of modules.
pub fn modules(&self) -> impl Iterator<Item = &Module> {
self.modules.values()
}
}
+165
View File
@@ -0,0 +1,165 @@
//! This module contains the definitions for a Symbol.
use std::collections::HashSet;
use uuid::Uuid;
use crate::{model::module::ModuleId, symtab::SymbolTable};
/// Tuple struct for type safety. Has methods for fetching symbols by ID.
#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)]
pub struct SymbolId(Uuid);
impl From<Symbol> for SymbolId {
fn from(sym: Symbol) -> Self {
sym.id
}
}
impl Default for SymbolId {
fn default() -> Self {
Self::new()
}
}
impl SymbolId {
#[must_use]
pub fn new() -> Self {
Self(Uuid::new_v4())
}
/// Convenience method to get the [`Module`] from a [`ModuleId`].
#[must_use]
pub fn to_module<'s>(&self, registry: &'s SymbolTable) -> Option<&'s Symbol> {
registry.get(self)
}
/// Convenience method to get the [`Module`] name from a [`ModuleId`].
#[must_use]
pub fn to_module_name(self, registry: &SymbolTable) -> Option<&str> {
self.to_module(registry).map(|module| module.name.as_str())
}
}
/// A symbol is a named reference that may be resolved later to an address by a linker.
#[derive(Debug)]
pub struct Symbol {
/// Stored cheaply instead of the name. Shall be stored in the symbol table under
/// this key.
pub id: SymbolId,
/// The human-readable name for the symbol.
pub name: String,
pub visibility: Visibility,
pub symbol_type: SymbolType,
/// The id of the module the symbol is defined in. This will be different for symbols
/// in different objects.
pub module_id: ModuleId,
/// Whether or not the symbol requires relocating.
pub needs_relocation: bool,
/// A list of the symbol's dependencies.
///
/// e.g.
///
/// ```dsa
/// main:
/// call another_func
///
/// another_func:
/// // Code goes here
/// ret
/// ```
///
/// Where `main` depends on `another_func`.
pub dependencies: HashSet<SymbolId>,
/// The address of the symbol.
pub address: Option<u32>,
/// The section the symbol is in.
/// TODO: Perhaps make this a proper type?
pub section: Option<String>,
pub size: Option<u32>,
}
impl Symbol {
#[must_use]
pub fn new(
name: String,
module_id: ModuleId,
visibility: Visibility,
symbol_type: SymbolType,
) -> Self {
Self {
id: SymbolId::new(),
name,
module_id,
address: None,
section: None,
size: None,
visibility,
symbol_type,
needs_relocation: false,
dependencies: HashSet::new(),
}
}
/// Adds a dependency on another [`Symbol`].
pub fn add_dependency(&mut self, dep: SymbolId) {
if self.id == dep {
return;
}
// We can resolve a lot of addresses at assembly time, but not really foreign
// ones, since we aren't certain of their position.
//
/* TODO: Handle this for flat binary case i.e. no linker required. This may be
* done using a similar method to before, such as just concatenating all
* of the files together and handling jumps and halts.
*
* > Ask Harry or read the initial code.
*/
if self.dependencies.insert(dep) {
self.needs_relocation = true;
}
}
/// Returns whether a [`Symbol`] depends on `symbol_id`.
#[must_use]
pub fn depends_on(&self, symbol_id: &SymbolId) -> bool {
self.dependencies.contains(symbol_id)
}
/// Removes a [`Symbol`] from the dependency set.
pub fn remove_dependency(&mut self, symbol_id: &SymbolId) {
self.dependencies.remove(symbol_id);
if self.dependencies.is_empty() {
self.needs_relocation = false;
}
}
}
#[derive(Debug, Copy, Clone)]
/// The visibility of the symbol in different object files.
pub enum Visibility {
/// `STB_PUBLIC` under the ELF spec. Visible in all other object files. Shall be used
/// for labels. Remember labels are namespaced in different files so they won't clash
/// with one another.
Public,
/// Only visible within this object file. `STB_LOCAL` under ELF spec. Shall be used
/// for data definitions unless they are marked public.
Local,
/// `STB_WEAK` under the ELF spec. Potentially unused.
Weak,
}
#[derive(Debug)]
pub enum SymbolType {
LabelOrFunction,
Variable,
}
+18
View File
@@ -0,0 +1,18 @@
//! This module contains anything within the first stage of assembly, i.e. the
//! tokenisation stage, or utility functions for reading input files.
use std::path::Path;
use crate::error::AssembleError;
pub mod source_info;
pub mod token;
pub mod tokeniser;
/// Attempts to load and open a source file, returning a [`Vec<u8>`] or an
/// [`AssembleError`].
pub fn load_source_bytes<P: AsRef<Path>>(p: P) -> Result<Vec<u8>, AssembleError> {
let path = p.as_ref();
Ok(std::fs::read(path)?)
}
+25
View File
@@ -0,0 +1,25 @@
//! This file contains information on where a [`Token`] or [`Node`] is within the source
//! code for more informative errors.
//!
//! This will likely be attached to a [`Token`] which will in turn be attached to an AST
//! [`Node`].
use std::fmt::Display;
use crate::model::module::Module;
/// Information on where the token is within the source.
#[derive(Debug)]
pub struct SourceInfo {
/// The line number within the source file underpinned by `module_id`.
pub line_no: usize,
pub module: Module,
/// The indexes where this token may be found (line-local).
pub span: std::ops::Range<usize>,
}
impl Display for SourceInfo {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.module.name)
}
}
+106
View File
@@ -0,0 +1,106 @@
//! Contains [`TokenType`] and [`Token`]'s. Adapted from Harry's old lexer since it was
//! easier to build from scratch and edit his code than it would be to try and wrangle it
//! into shape.
use crate::source::source_info::SourceInfo;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum TokenType {
/// Symbol reference (e.g., `loop_start`, `my_data`).
Symbol(SymbolToken),
/// CPU register (e.g., `r1`, `r2`, `sp`).
Register(RegisterToken),
/// Immediate value (e.g., `42`, `0xFF`).
Immediate(u32),
/// String literal (e.g., `"hello world"`).
String(String),
/// Assembly instruction (e.g., `add`, `jmp`, `nop`).
Instruction(InstructionToken),
/// Label definition (e.g., `loop_start:`).
Label(LabelToken),
/// Assembler directive (e.g., `.global`, `.section`, `.dw`).
Directive(DirectiveToken),
/// End of line.
Newline,
/// End of file.
Eof,
}
#[derive(Debug)]
pub struct Token {
/// The type of the token.
token_type: TokenType,
/// Where in the source code is this [`Token`]?
source_info: SourceInfo,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct SymbolToken {
pub name: String,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct LabelToken {
pub name: String,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct DirectiveToken {
pub directive: String,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct RegisterToken {
pub name: String,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct InstructionToken {
pub mnemonic: String,
}
impl Token {
#[must_use]
pub const fn new(token_type: TokenType, source_info: SourceInfo) -> Self {
Self {
token_type,
source_info,
}
}
#[must_use]
pub const fn symbol(name: String, source_info: SourceInfo) -> Self {
Self::new(TokenType::Symbol(SymbolToken { name }), source_info)
}
#[must_use]
pub const fn label(name: String, source_info: SourceInfo) -> Self {
Self::new(TokenType::Label(LabelToken { name }), source_info)
}
#[must_use]
pub const fn instruction(mnemonic: String, source_info: SourceInfo) -> Self {
Self::new(
TokenType::Instruction(InstructionToken { mnemonic }),
source_info,
)
}
#[must_use]
pub const fn register(name: String, source_info: SourceInfo) -> Self {
Self::new(TokenType::Register(RegisterToken { name }), source_info)
}
#[must_use]
pub const fn immediate(value: u32, source_info: SourceInfo) -> Self {
Self::new(TokenType::Immediate(value), source_info)
}
#[must_use]
pub const fn directive(directive: String, source_info: SourceInfo) -> Self {
Self::new(
TokenType::Directive(DirectiveToken { directive }),
source_info,
)
}
}
+7
View File
@@ -0,0 +1,7 @@
//! This file contains the [`Tokeniser`], which consumes a [`Vec`] of input bytes and
//! outputs a [`Vec<Token>`].
/// Consumes a [`Vec<u8>`] and outputs a [`Vec`] of [Token]'s.
pub struct Tokeniser {}
pub mod error;
+10
View File
@@ -0,0 +1,10 @@
//! This module contains the error types for the tokeniser.
#[derive(Debug)]
pub enum TokeniserError {}
impl std::fmt::Display for TokeniserError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "TODO!!!!!!")
}
}
+121
View File
@@ -0,0 +1,121 @@
//! This module contains the code for the Symbol Table, which can be written into object
//! files to support deferred relocations when using ELF files.
//!
//! It is also required for detection of duplicate symbols, and resolution in the flat
//! binary output type.
use crate::{
error::AssembleError,
model::{
module::ModuleId,
symbol::{Symbol, SymbolId, Visibility},
},
};
use std::collections::HashMap;
/// Global symbol table - single source of truth for all symbols.
/// Much simpler than per-module tables.
#[derive(Debug)]
pub struct SymbolTable {
/// All symbols by their ID - O(1) lookup
symbols: HashMap<SymbolId, Symbol>,
/// Name to ID mapping for human-readable lookups - O(1) lookup
name_to_id: HashMap<String, SymbolId>,
/// Module to symbols mapping for module-specific queries
module_symbols: HashMap<ModuleId, Vec<SymbolId>>,
}
impl SymbolTable {
#[must_use]
pub fn new() -> Self {
Self {
symbols: HashMap::new(),
name_to_id: HashMap::new(),
module_symbols: HashMap::new(),
}
}
/// Adds a symbol to the global table
pub fn add_symbol(&mut self, symbol: Symbol) -> Result<SymbolId, AssembleError> {
let id = symbol.id;
let module_id = symbol.module_id;
let name = symbol.name.clone();
// Check for duplicate names in the same module
if let Some(&existing_id) = self.name_to_id.get(&name)
&& let Some(existing) = self.symbols.get(&existing_id)
&& existing.module_id == module_id
{
return Err(AssembleError::new_other_error(
crate::error::AssembleErrorKind::IO(std::io::Error::new(
std::io::ErrorKind::AlreadyExists,
format!("Symbol '{name}' already defined in module"),
)),
));
}
// Add to all mappings
self.name_to_id.insert(name, id);
self.symbols.insert(id, symbol);
self.module_symbols.entry(module_id).or_default().push(id);
Ok(id)
}
/// Gets the [`Symbol`] by its [`SymbolId`].
#[must_use] pub fn get(&self, id: &SymbolId) -> Option<&Symbol> {
self.symbols.get(id)
}
/// Gets the [`Symbol`] by its name.
#[must_use] pub fn get_by_name(&self, name: &str) -> Option<&Symbol> {
self.name_to_id
.get(name)
.and_then(|id| self.symbols.get(id))
}
/// Gets all [`Symbol`]s in a module.
#[must_use] pub fn get_module_symbols(&self, module_id: &ModuleId) -> Vec<&Symbol> {
self.module_symbols
.get(module_id)
.map(|ids| ids.iter().filter_map(|id| self.symbols.get(id)).collect())
.unwrap_or_default()
}
/// Gets all the public symbols.
#[must_use] pub fn get_public_symbols(&self) -> Vec<&Symbol> {
self.symbols
.values()
.filter(|sym| matches!(sym.visibility, Visibility::Public))
.collect()
}
/// Updates symbol address (during resolution). Used for flat binaries or symbols with
/// no relocations.
pub fn update_symbol_address(
&mut self,
id: &SymbolId,
address: u32,
) -> Result<(), AssembleError> {
if let Some(symbol) = self.symbols.get_mut(id) {
symbol.address = Some(address);
if symbol.dependencies.is_empty() {
symbol.needs_relocation = false;
}
Ok(())
} else {
Err(AssembleError::new_other_error(
crate::error::AssembleErrorKind::IO(std::io::Error::new(
std::io::ErrorKind::NotFound,
"Symbol not found",
)),
))
}
}
}
impl Default for SymbolTable {
fn default() -> Self {
Self::new()
}
}
+1
View File
@@ -2,6 +2,7 @@
#![allow(unused)] #![allow(unused)]
use std::{fmt, sync::mpsc::Sender}; use std::{fmt, sync::mpsc::Sender};
#[derive(Debug, PartialEq, Eq)]
pub struct Logger {} pub struct Logger {}
impl Logger { impl Logger {
+1 -1
View File
@@ -38,7 +38,7 @@ pub enum InstructionType {
Immediate, Immediate,
} }
#[derive(Copy, Clone, Debug, PartialEq, Eq)] #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
#[non_exhaustive] #[non_exhaustive]
pub enum Register { pub enum Register {
// general purpose registers // general purpose registers
+1 -1
View File
@@ -16,7 +16,7 @@ use crate::emulator::{
ui::interface::Component, ui::interface::Component,
}; };
use assembler::prelude::*; // use assembler::prelude::*;
#[derive(Default)] #[derive(Default)]
pub struct Editor { pub struct Editor {