diff --git a/assembler/src/assembler/assembler.rs b/assembler/src/assembler/assembler.rs deleted file mode 100644 index f4f5ad5..0000000 --- a/assembler/src/assembler/assembler.rs +++ /dev/null @@ -1,203 +0,0 @@ -use std::{ - collections::HashSet, - fs, - path::{self, Path, PathBuf}, - sync::{Arc, Mutex}, - thread::{self, JoinHandle}, -}; - -use crate::assembler::{Node, Parser, ProgramRef, Task, resolve_dependencies}; -use crate::assembler::{ - Token, error::AssembleError, expand_pseudo_ops, lexer, quick_hash, -}; -use crate::util::logging::Logger; - -// pub fn new_assemble(path: &Path) { -// let program = Program::new(); -// let program_ref = ProgramRef::new(program); - -// let task = Module::build(path.to_path_buf(), program_ref.clone()); -// program_ref.add_task(task); - -// // wait on all tasks to finish -// for task in program_ref.get_tasks() { -// let module = task.module.join().unwrap(); -// program_ref.add_module(module); -// } -// } - -pub struct Program { - pub main_path: PathBuf, - registry: HashSet, - modules: Vec, - tasks: Vec, - logger: Logger, -} - -impl Program { - #[must_use] - pub fn new() -> Self { - Self { - registry: HashSet::new(), - modules: Vec::new(), - tasks: Vec::new(), - main_path: PathBuf::new(), - logger: Logger::new(), - } - } - - pub fn add_task(&mut self, task: Task) { - self.tasks.push(task); - } -} - -impl Default for Program { - fn default() -> Self { - Self::new() - } -} - -pub struct Module { - pub path: PathBuf, - pub hash: u64, - pub nodes: Vec, - program: ProgramRef, -} - -impl Module { - #[must_use] - pub const fn new( - path: PathBuf, - hash: u64, - nodes: Vec, - program: ProgramRef, - ) -> Self { - Self { - path, - hash, - nodes, - program, - } - } - - pub fn build(path: PathBuf, program: ProgramRef) -> Result { - // Spawn a thread that creates the main function and executes the lexer and - // parser. - let handle = thread::spawn(move || { - let mut module = - Self::new(path.clone(), quick_hash(&path), Vec::new(), program.clone()); - - match module.lex() { - Ok(tokens) => { - module.parse(tokens); - module.expand(); - module.prepare_dependencies(); - - Ok(module) - } - Err(why) => { - eprintln!( - "Error building program at path `{}`: {why}", - path.display() - ); - - Err(why) - } - } - }); - - Ok(Task::new(path, program)?) - } - - fn lex(&self) -> Result, AssembleError> { - if let Ok(path) = self.path.canonicalize() { - self.program.log(&format!( - "{:20} {:20} [{}]", - "Building", - self.get_filename(), - path.display() - )); - } - - let src = fs::read_to_string(&self.path).map_err(|e| { - AssembleError::Io(format!( - "Failed to read file '{}': {}", - self.path.display(), - e - )) - })?; - - let file_hash = quick_hash(&self.path); - - self.program - .log(&format!("{:20} {:20}", "Tokenising", self.get_filename())); - - lexer::lexer(src, file_hash) - } - - fn parse(&mut self, tokens: Vec) -> Result<(), AssembleError> { - self.program - .log(&format!("{:20} {:20}", "Parsing", self.get_filename())); - - let parsed = Parser::parse_nodes(tokens)?; - self.nodes = parsed; - - Ok(()) - } - - fn expand(&mut self) -> Result<(), AssembleError> { - self.program - .log(&format!("{:20} {:20}", "Expanding", self.get_filename())); - - let expanded = expand_pseudo_ops(self.nodes.clone(), self.hash)?; - self.nodes = expanded; - - Ok(()) - } - - fn prepare_dependencies(&self) -> Result<(), AssembleError> { - let nodes = resolve_dependencies( - self.nodes.clone(), - self.path.parent().expect("File should have a parent path!"), - )?; - - let dependencies = Parser::get_dependencies(&nodes, &self.path)?; - - for dep in dependencies { - if self.program.is_registered(&dep) { - // we have already built this module! - continue; - } - - self.program.register(&dep); - - // create new module - // add the task to the program - - match Self::build(dep, self.program.clone()) { - Ok(task) => self.program.add_task(task), - Err(why) => { - eprintln!("Error building program: {why}"); - } - } - } - - Ok(()) - } - - /// Gets the filename from a [`PathBuf`]. - fn get_filename(&self) -> &str { - self.path - .file_name() - .and_then(|f| f.to_str()) - .unwrap_or_default() - } - - /// Gets the parent filepath from a [`PathBuf`]. - fn get_parent(&self) -> &str { - self.path - .parent() - .and_then(|f| f.to_str()) - .unwrap_or_default() - } -} diff --git a/assembler/src/assembler/codegen.rs b/assembler/src/assembler/codegen.rs deleted file mode 100644 index 5c101a4..0000000 --- a/assembler/src/assembler/codegen.rs +++ /dev/null @@ -1,351 +0,0 @@ -use common::{args, prelude::*}; - -use crate::assembler::model::{Node, Opcode}; -use crate::{assembler::AssembleError, expect_token}; - -fn log(message: &str) { - println!("\x1b[32mINFO:\x1b[0m {message}"); -} - -pub fn codegen(nodes: Vec) -> Result, AssembleError> { - let mut instructions = vec![]; - - for node in nodes { - instructions.push( - build_instruction(&node) - .unwrap_or_else(|_| panic!("Failed to build instruction: {node:?}")), - ); - } - - println!("------------------------"); - log("Compilation Success ✅"); - - Ok(instructions) -} - -fn build_instruction(node: &Node) -> Result { - let opcode = node.opcode(); - let args = node.args(); - - match opcode { - Opcode::Nop => Ok(Instruction::Nop), - Opcode::Mov | Opcode::Movs => build_mov_instruction(opcode, &args), - Opcode::Ldb - | Opcode::Ldw - | Opcode::Ldh - | Opcode::Ldbs - | Opcode::Ldhs - | Opcode::Stb - | Opcode::Stw - | Opcode::Sth => build_memory_instruction(opcode, &args), - Opcode::Lli | Opcode::Lui => build_load_immediate_instruction(opcode, &args), - Opcode::Jmp - | Opcode::Jeq - | Opcode::Jne - | Opcode::Jgt - | Opcode::Jge - | Opcode::Jlt - | Opcode::Jle => build_jump_instruction(opcode, &args), - Opcode::Cmp => build_compare_instruction(&args), - Opcode::Inc | Opcode::Dec => build_inc_dec_instruction(opcode, &args), - Opcode::Shl | Opcode::Shr => build_shift_instruction(opcode, &args), - Opcode::Add - | Opcode::Sub - | Opcode::And - | Opcode::Or - | Opcode::Xor - | Opcode::Nand - | Opcode::Nor - | Opcode::Xnor => build_arithmetic_instruction(opcode, &args), - Opcode::AddI | Opcode::SubI => { - build_arithmetic_immediate_instruction(opcode, &args) - } - Opcode::Not => build_not_instruction(&args), - Opcode::Int => build_interrupt_instruction(&args), - Opcode::Irt => Ok(Instruction::IntReturn), - Opcode::Hlt => Ok(Instruction::Halt), - Opcode::Data => build_data_instruction(&args), - Opcode::Segment => build_segment_instruction(&args), - // These pseudo-instructions should have been expanded! - Opcode::Db - | Opcode::Dh - | Opcode::Dw - | Opcode::Resb - | Opcode::Resh - | Opcode::Resw - | Opcode::Push - | Opcode::Pop - | Opcode::Lwi - | Opcode::Include - | Opcode::Call - | Opcode::Return - | Opcode::Pusha - | Opcode::Popa => Err(AssembleError::InvalidArg), - } -} - -fn build_mov_instruction( - opcode: Opcode, - args: &[crate::assembler::model::Token], -) -> Result { - let Some(src_token) = args.first() else { - return Err(AssembleError::MissingArgument(0)); - }; - let Some(dest_token) = args.get(1) else { - return Err(AssembleError::MissingArgument(1)); - }; - - let src = expect_token!(src_token, Register)?; - let dest = expect_token!(dest_token, Register)?; - - match opcode { - Opcode::Mov => Ok(Instruction::Mov(args!(R, sr1: src, dr: dest))), - Opcode::Movs => Ok(Instruction::MovSigned(args!(R, sr1: src, dr: dest))), - _ => unreachable!(), - } -} - -fn build_memory_instruction( - opcode: Opcode, - args: &[crate::assembler::model::Token], -) -> Result { - let Some(base_token) = args.first() else { - return Err(AssembleError::MissingArgument(0)); - }; - let Some(dest_token) = args.get(1) else { - return Err(AssembleError::MissingArgument(1)); - }; - let Some(offset_token) = args.get(2) else { - return Err(AssembleError::MissingArgument(2)); - }; - - let base = expect_token!(base_token, Register)?; - let dest = expect_token!(dest_token, Register)?; - let offset = expect_token!(offset_token, Immediate)?; - let instruction_args = args!(I, immediate: offset as u16, r1: base, r2: dest); - - match opcode { - Opcode::Ldb => Ok(Instruction::LoadByte(instruction_args)), - Opcode::Ldw => Ok(Instruction::LoadWord(instruction_args)), - Opcode::Ldh => Ok(Instruction::LoadHalfword(instruction_args)), - Opcode::Ldbs => Ok(Instruction::LoadByteSigned(instruction_args)), - Opcode::Ldhs => Ok(Instruction::LoadHalfwordSigned(instruction_args)), - Opcode::Stb => Ok(Instruction::StoreByte(instruction_args)), - Opcode::Stw => Ok(Instruction::StoreWord(instruction_args)), - Opcode::Sth => Ok(Instruction::StoreHalfword(instruction_args)), - _ => unreachable!(), - } -} - -fn build_load_immediate_instruction( - opcode: Opcode, - args: &[crate::assembler::model::Token], -) -> Result { - let Some(value_token) = args.first() else { - return Err(AssembleError::MissingArgument(0)); - }; - let Some(dest_token) = args.get(1) else { - return Err(AssembleError::MissingArgument(1)); - }; - - let value = expect_token!(value_token, Immediate)?; - let dest = expect_token!(dest_token, Register)?; - - match opcode { - Opcode::Lli => { - let instruction_args = args!(I, immediate: value as u16, r1: dest); - Ok(Instruction::LoadLowerImmediate(instruction_args)) - } - Opcode::Lui => { - let upper_value = value >> 16; - let instruction_args = args!(I, immediate: upper_value as u16, r1: dest); - Ok(Instruction::LoadUpperImmediate(instruction_args)) - } - _ => unreachable!(), - } -} - -fn build_jump_instruction( - opcode: Opcode, - args: &[crate::assembler::model::Token], -) -> Result { - let Some(address_token) = args.first() else { - return Err(AssembleError::MissingArgument(0)); - }; - let Some(offset_token) = args.get(1) else { - return Err(AssembleError::MissingArgument(1)); - }; - - let address = expect_token!(address_token, Immediate)?; - let offset = expect_token!(offset_token, Register)?; - let instruction_args = args!(I, immediate: address as u16, r1: offset); - - match opcode { - Opcode::Jmp => Ok(Instruction::Jump(instruction_args)), - Opcode::Jeq => Ok(Instruction::JumpEq(instruction_args)), - Opcode::Jne => Ok(Instruction::JumpNeq(instruction_args)), - Opcode::Jgt => Ok(Instruction::JumpGt(instruction_args)), - Opcode::Jge => Ok(Instruction::JumpGe(instruction_args)), - Opcode::Jlt => Ok(Instruction::JumpLt(instruction_args)), - Opcode::Jle => Ok(Instruction::JumpLe(instruction_args)), - _ => unreachable!(), - } -} - -fn build_compare_instruction( - args: &[crate::assembler::model::Token], -) -> Result { - let Some(left_token) = args.first() else { - return Err(AssembleError::MissingArgument(0)); - }; - let Some(right_token) = args.get(1) else { - return Err(AssembleError::MissingArgument(1)); - }; - - let left = expect_token!(left_token, Register)?; - let right = expect_token!(right_token, Register)?; - Ok(Instruction::Compare(args!(R, sr1: left, sr2: right))) -} - -fn build_inc_dec_instruction( - opcode: Opcode, - args: &[crate::assembler::model::Token], -) -> Result { - let Some(reg_token) = args.first() else { - return Err(AssembleError::MissingArgument(0)); - }; - - let reg = expect_token!(reg_token, Register)?; - match opcode { - Opcode::Inc => Ok(Instruction::Increment(args!(R, sr1: reg))), - Opcode::Dec => Ok(Instruction::Decrement(args!(R, sr1: reg))), - _ => unreachable!(), - } -} - -fn build_shift_instruction( - opcode: Opcode, - args: &[crate::assembler::model::Token], -) -> Result { - let Some(reg_token) = args.first() else { - return Err(AssembleError::MissingArgument(0)); - }; - let Some(amount_token) = args.get(1) else { - return Err(AssembleError::MissingArgument(1)); - }; - - let reg = expect_token!(reg_token, Register)?; - let amount = expect_token!(amount_token, Immediate)? as u8; - - match opcode { - Opcode::Shl => Ok(Instruction::ShiftLeft(args!(R, sr1: reg, shamt: amount))), - Opcode::Shr => Ok(Instruction::ShiftRight(args!(R, sr1: reg, shamt: amount))), - _ => unreachable!(), - } -} - -fn build_arithmetic_instruction( - opcode: Opcode, - args: &[crate::assembler::model::Token], -) -> Result { - let Some(left_token) = args.first() else { - return Err(AssembleError::MissingArgument(0)); - }; - let Some(right_token) = args.get(1) else { - return Err(AssembleError::MissingArgument(1)); - }; - let Some(dest_token) = args.get(2) else { - return Err(AssembleError::MissingArgument(2)); - }; - - let left = expect_token!(left_token, Register)?; - let right = expect_token!(right_token, Register)?; - let dest = expect_token!(dest_token, Register)?; - let instruction_args = args!(R, sr1: left, sr2: right, dr: dest); - - match opcode { - Opcode::Add => Ok(Instruction::Add(instruction_args)), - Opcode::Sub => Ok(Instruction::Sub(instruction_args)), - Opcode::And => Ok(Instruction::And(instruction_args)), - Opcode::Or => Ok(Instruction::Or(instruction_args)), - Opcode::Xor => Ok(Instruction::Xor(instruction_args)), - Opcode::Nand => Ok(Instruction::Nand(instruction_args)), - Opcode::Nor => Ok(Instruction::Nor(instruction_args)), - Opcode::Xnor => Ok(Instruction::Xnor(instruction_args)), - _ => unreachable!(), - } -} - -fn build_arithmetic_immediate_instruction( - opcode: Opcode, - args: &[crate::assembler::model::Token], -) -> Result { - let Some(reg_token) = args.first() else { - return Err(AssembleError::MissingArgument(0)); - }; - let Some(immediate_token) = args.get(1) else { - return Err(AssembleError::MissingArgument(1)); - }; - let Some(dest_token) = args.get(2) else { - return Err(AssembleError::MissingArgument(2)); - }; - - let reg = expect_token!(reg_token, Register)?; - let immediate = expect_token!(immediate_token, Immediate)? as u16; - let dest = expect_token!(dest_token, Register)?; - let instruction_args = args!(I, immediate: immediate, r1: reg, r2: dest); - - match opcode { - Opcode::AddI => Ok(Instruction::AddImmediate(instruction_args)), - Opcode::SubI => Ok(Instruction::SubImmediate(instruction_args)), - _ => unreachable!(), - } -} - -fn build_not_instruction( - args: &[crate::assembler::model::Token], -) -> Result { - let Some(reg_token) = args.first() else { - return Err(AssembleError::MissingArgument(0)); - }; - let Some(dest_token) = args.get(1) else { - return Err(AssembleError::MissingArgument(1)); - }; - - let reg = expect_token!(reg_token, Register)?; - let dest = expect_token!(dest_token, Register)?; - Ok(Instruction::Not(args!(R, sr1: reg, dr: dest))) -} - -fn build_interrupt_instruction( - args: &[crate::assembler::model::Token], -) -> Result { - let Some(code_token) = args.first() else { - return Err(AssembleError::MissingArgument(0)); - }; - - let code = expect_token!(code_token, Immediate)? as u8; - Ok(Instruction::Interrupt(Interrupt::Software(code))) -} - -fn build_data_instruction( - args: &[crate::assembler::model::Token], -) -> Result { - let Some(immediate_token) = args.first() else { - return Err(AssembleError::MissingArgument(0)); - }; - - let immediate = expect_token!(immediate_token, Immediate)?; - Ok(Instruction::Data(immediate)) -} - -fn build_segment_instruction( - args: &[crate::assembler::model::Token], -) -> Result { - let Some(immediate_token) = args.first() else { - return Err(AssembleError::MissingArgument(0)); - }; - - let immediate = expect_token!(immediate_token, Immediate)?; - Ok(Instruction::Segment(immediate)) -} diff --git a/assembler/src/assembler/engine.rs b/assembler/src/assembler/engine.rs deleted file mode 100644 index cfc19b2..0000000 --- a/assembler/src/assembler/engine.rs +++ /dev/null @@ -1,153 +0,0 @@ -//! Compiler engine for orchestrating the assembly process. - -use crate::assembler::{AssembleError, Program, Task}; -use common::prelude::Instruction; -use std::path::{Path, PathBuf}; - -/// Supported output formats for the assembler. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum OutputFormat { - /// Flat binary executable - Binary, - /// ELF relocatable object file - ElfObject, - /// ELF executable - ElfExecutable, -} - -/// Main compilation orchestrator that manages the assembly process. -pub struct CompilerEngine { - /// Configuration options for compilation - pub output_format: OutputFormat, - pub include_debug_info: bool, - pub optimization_level: u8, -} - -impl CompilerEngine { - /// Creates a new compiler engine with default settings. - #[must_use] - pub fn new() -> Self { - Self { - output_format: OutputFormat::Binary, - include_debug_info: false, - optimization_level: 0, - } - } - - /// Creates a new compiler engine with specified output format. - #[must_use] - pub fn with_output_format(output_format: OutputFormat) -> Self { - Self { - output_format, - include_debug_info: false, - optimization_level: 0, - } - } - - /// Sets the output format for compilation. - pub fn set_output_format(&mut self, format: OutputFormat) { - self.output_format = format; - } - - /// Enables or disables debug information generation. - pub fn set_debug_info(&mut self, enabled: bool) { - self.include_debug_info = enabled; - } - - /// Sets the optimization level (0-3). - pub fn set_optimization_level(&mut self, level: u8) { - self.optimization_level = level.min(3); - } - - /// Main assembly function that orchestrates the entire compilation process. - pub fn assemble(&self, main_path: &Path) -> Result, AssembleError> { - let program = Program::new(); - - // Set the main path in the program - program.set_main_path(main_path.to_path_buf())?; - - // Create and execute the main compilation task - let main_task = Task::new(main_path.to_path_buf(), program.clone())?; - let module = main_task.join()?; - - program.add_module(module)?; - - // Wait for all dependency compilation tasks to complete - self.wait_for_completion(&program)?; - - // Generate final instructions - let instructions = self.generate_instructions(&program)?; - - Ok(instructions) - } - - /// Waits for all compilation tasks to complete. - fn wait_for_completion(&self, program: &Program) -> Result<(), AssembleError> { - let tasks = program.get_tasks()?; - - for task in tasks { - let module = task.join()?; - program.add_module(module)?; - } - - Ok(()) - } - - /// Generates the final instruction stream from all compiled modules. - fn generate_instructions( - &self, - program: &Program, - ) -> Result, AssembleError> { - let mut all_nodes = Vec::new(); - - // Collect all nodes from all modules - for module in program.get_modules()? { - all_nodes.extend(module.nodes.clone()); - } - - // Apply resolution and code generation - crate::assembler::create_sections(&mut all_nodes)?; - crate::assembler::resolve_symbols(&mut all_nodes)?; - crate::assembler::codegen(all_nodes) - } - - /// Determines the default output path based on input path and output format. - fn default_output_path(&self, input_path: &Path) -> PathBuf { - let stem = input_path.file_stem().unwrap_or_default(); - let parent = input_path.parent().unwrap_or(Path::new(".")); - - let extension = match self.output_format { - OutputFormat::Binary => "bin", - OutputFormat::ElfObject => "o", - OutputFormat::ElfExecutable => "elf", - }; - - parent.join(format!("{}.{}", stem.to_string_lossy(), extension)) - } -} - -impl Default for CompilerEngine { - fn default() -> Self { - Self::new() - } -} - -/// Convenience function for simple assembly with default settings. -pub fn assemble(input_path: &Path) -> Result, AssembleError> { - let engine = CompilerEngine::new(); - engine.assemble(input_path) -} - -/// Convenience function for assembling to ELF object format. -pub fn assemble_to_object(input_path: &Path) -> Result, AssembleError> { - let engine = CompilerEngine::with_output_format(OutputFormat::ElfObject); - engine.assemble(input_path) -} - -/// Convenience function for assembling to ELF executable format. -pub fn assemble_to_executable( - input_path: &Path, -) -> Result, AssembleError> { - let engine = CompilerEngine::with_output_format(OutputFormat::ElfExecutable); - engine.assemble(input_path) -} diff --git a/assembler/src/assembler/error.rs b/assembler/src/assembler/error.rs deleted file mode 100644 index 22a97c8..0000000 --- a/assembler/src/assembler/error.rs +++ /dev/null @@ -1,114 +0,0 @@ -//! Error types for the DSA assembler. - -use std::fmt; - -/// Comprehensive error type for assembly operations. -#[derive(Debug)] -pub enum AssembleError { - /// IO-related errors (file not found, permission denied, etc.). - Io(std::io::Error), - - /// Lexical analysis errors - Lexer { - message: String, - line: usize, - column: usize, - }, - - /// Parsing errors - Parser { - message: String, - line: usize, - token: String, - }, - - /// Symbol resolution errors - Symbol { - message: String, - symbol_name: String, - }, - - /// Code generation errors - Codegen { - message: String, - instruction: String, - }, - - /// Dependency resolution errors - Dependency { - message: String, - module_path: String, - }, - - /// Threading and synchronization errors - Threading(String), - - /// Output generation errors - Output { message: String, format: String }, - - /// Generic assembly error - Generic(String), -} - -impl fmt::Display for AssembleError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::Io(msg) => write!(f, "IO Error: {}", msg), - Self::Lexer { - message, - line, - column, - } => { - write!(f, "Lexer Error at {}:{}: {}", line, column, message) - } - Self::Parser { - message, - line, - token, - } => { - write!( - f, - "Parser Error at line {}, token '{}': {}", - line, token, message - ) - } - Self::Symbol { - message, - symbol_name, - } => { - write!(f, "Symbol Error '{}': {}", symbol_name, message) - } - Self::Codegen { - message, - instruction, - } => { - write!(f, "Codegen Error in '{}': {}", instruction, message) - } - Self::Dependency { - message, - module_path, - } => { - write!(f, "Dependency Error in '{}': {}", module_path, message) - } - Self::Threading(msg) => write!(f, "Threading Error: {}", msg), - Self::Output { message, format } => { - write!(f, "Output Error ({}): {}", format, message) - } - Self::Generic(msg) => write!(f, "Assembly Error: {}", msg), - } - } -} - -impl std::error::Error for AssembleError {} - -impl From for AssembleError { - fn from(error: std::io::Error) -> Self { - Self::Io(error) - } -} - -impl From> for AssembleError { - fn from(error: std::sync::PoisonError) -> Self { - Self::Threading(format!("Mutex poisoned: {}", error)) - } -} diff --git a/assembler/src/assembler/expand.rs b/assembler/src/assembler/expand.rs deleted file mode 100644 index ed25bd3..0000000 --- a/assembler/src/assembler/expand.rs +++ /dev/null @@ -1,368 +0,0 @@ -use common::prelude::Register; - -use crate::assembler::model::{Node, Opcode, Token}; -use crate::{assembler::AssembleError, expect_token, expect_type, node}; - -pub fn expand_pseudo_ops( - mut nodes: Vec, - module: u64, -) -> Result, AssembleError> { - let mut result = Vec::::with_capacity(nodes.len()); - - for node in &mut nodes { - if try_expand(node.clone(), &mut result, module).is_err() { - result.push(node.clone()); - } - } - - Ok(result) -} - -fn try_expand( - node: Node, - result: &mut Vec, - _module: u64, -) -> Result<(), AssembleError> { - match node.opcode() { - Opcode::Push => expand_push(&node, result)?, - Opcode::Pop => expand_pop(&node, result)?, - Opcode::Pusha => expand_pusha(&node, result)?, - Opcode::Popa => expand_popa(&node, result)?, - Opcode::Call => expand_call(&node, result)?, - Opcode::Return => expand_return(&node, result), - Opcode::Ldb | Opcode::Ldbs | Opcode::Ldh | Opcode::Ldhs | Opcode::Ldw => { - expand_ldx(&node, result)?; - } - Opcode::Stb | Opcode::Sth | Opcode::Stw => expand_stx(&node, result)?, - - Opcode::Lwi => expand_lwi(&node, result)?, - Opcode::Resb | Opcode::Resh | Opcode::Resw => expand_resx(&node, result)?, - Opcode::Db | Opcode::Dh | Opcode::Dw => expand_dx(&node, result)?, - _ => result.push(node), - } - Ok(()) -} - -fn expand_push(current: &Node, nodes: &mut Vec) -> Result<(), AssembleError> { - let label = current.label(); - let Ok(arg0) = current.arg(0) else { - return Err(AssembleError::Generic); - }; - - let reg = expect_type!(arg0, Register)?; - let spr = Token::Register(Register::Spr); - - nodes.extend(vec![ - node!(label, Opcode::SubI, spr, 4, spr), - node!(None, Opcode::Stw, reg, spr, 0), - ]); - - Ok(()) -} - -fn expand_pusha(current: &Node, nodes: &mut Vec) -> Result<(), AssembleError> { - let label = current.label(); - let Ok(arg0) = current.arg(0) else { - return Err(AssembleError::Generic); - }; - - let count = expect_token!(arg0, Immediate)?; - let spr = Token::Register(Register::Spr); - let registers: Vec = Register::general(); - - nodes.push(node!( - label, - Opcode::SubI, - spr, - Token::Immediate(count * 4), - spr - )); - - nodes.extend((0..count).rev().map(|i| { - node!( - None, - Opcode::Stw, - Token::Register(registers[i as usize]), - spr, - Token::Immediate(i * 4) - ) - })); - - Ok(()) -} - -fn expand_popa(current: &Node, nodes: &mut Vec) -> Result<(), AssembleError> { - let label = current.label(); - - let Ok(arg0) = current.arg(0) else { - return Err(AssembleError::Generic); - }; - - let count = expect_token!(arg0, Immediate)?; - let spr = Token::Register(Register::Spr); - let registers: Vec = Register::general(); - - nodes.extend((0..count).rev().map(|i| { - node!( - { if i == 0 { label.clone() } else { None } }, - Opcode::Ldw, - spr, - Token::Register(registers[i as usize]), - Token::Immediate(i * 4) - ) - })); - - nodes.push(node!( - None, - Opcode::AddI, - spr, - Token::Immediate(count * 4), - spr - )); - - Ok(()) -} - -fn expand_call(current: &Node, nodes: &mut Vec) -> Result<(), AssembleError> { - let label = current.label(); - - let Ok(arg0) = current.arg(0) else { - return Err(AssembleError::Generic); - }; - - let addr = expect_type!(arg0, Symbol)?; - let spr = Token::Register(Register::Spr); - let pcx = Token::Register(Register::Pcx); - let zero = Token::Register(Register::Zero); - - nodes.extend(vec![ - node!(label, Opcode::SubI, spr, 4, spr), - node!(None, Opcode::Stw, pcx, spr, 0), - node!(None, Opcode::Jmp, addr, zero), - ]); - - Ok(()) -} - -fn expand_return(current: &Node, nodes: &mut Vec) { - let label = current.label(); - let spr = Token::Register(Register::Spr); - let ret = Token::Register(Register::Ret); - - nodes.extend(vec![ - node!(label, Opcode::Ldw, spr, ret, 0), - node!(None, Opcode::AddI, spr, 4, spr), - node!(None, Opcode::Jmp, 4, ret), - ]); -} - -fn expand_pop(current: &Node, nodes: &mut Vec) -> Result<(), AssembleError> { - let label = current.label(); - - let Ok(arg0) = current.arg(0) else { - return Err(AssembleError::Generic); - }; - - let reg = expect_type!(arg0, Register)?; - let spr = Token::Register(Register::Spr); - - nodes.extend(vec![ - node!(label, Opcode::Ldw, spr, reg, 0), - node!(None, Opcode::AddI, spr, 4, spr), - ]); - - Ok(()) -} - -fn expand_ldx(current: &Node, nodes: &mut Vec) -> Result<(), AssembleError> { - let opcode = current.opcode(); - let args: Vec = current.args().into_iter().take(3).collect(); - - let Some(name) = args.first() else { - return Err(AssembleError::MissingArgument(0)); - }; - - let Some(reg) = args.get(1) else { - return Err(AssembleError::MissingArgument(1)); - }; - - let Some(offset) = args.get(2) else { - return Err(AssembleError::MissingArgument(2)); - }; - - let name = expect_type!(name, Symbol)?; - let reg = expect_type!(reg, Register)?; - let offset = expect_type!(offset, Immediate)?; - - nodes.extend(vec![ - node!(current.label(), Opcode::Lli, name, reg), - node!(None, Opcode::Lui, name, reg), - node!(None, opcode, reg, reg, offset), - ]); - - Ok(()) -} - -fn expand_stx(current: &Node, nodes: &mut Vec) -> Result<(), AssembleError> { - let opcode = current.opcode(); - - let args: Vec = current.args().into_iter().take(3).collect(); - - let Some(base) = args.first() else { - return Err(AssembleError::MissingArgument(0)); - }; - - let Some(dest) = args.get(1) else { - return Err(AssembleError::MissingArgument(1)); - }; - - let Some(offset) = args.get(2) else { - return Err(AssembleError::MissingArgument(2)); - }; - - let base = expect_type!(base, Register)?; - let dest = expect_type!(dest, Symbol)?; - let offset = expect_type!(offset, Immediate)?; - let temp = Token::Register(Register::Acc); - - nodes.extend(vec![ - node!(current.label(), Opcode::Lli, dest, temp), - node!(None, Opcode::Lui, dest, temp), - node!(None, opcode, base, temp, offset), - ]); - - Ok(()) -} - -fn expand_lwi(current: &Node, nodes: &mut Vec) -> Result<(), AssembleError> { - let Ok(val) = current.arg(0) else { - return Err(AssembleError::MissingArgument(0)); - }; - - let Ok(reg) = current.arg(1) else { - return Err(AssembleError::MissingArgument(1)); - }; - - let val = expect_type!(val, Symbol, Immediate)?; - let reg = expect_type!(reg, Register)?; - - nodes.extend(vec![ - node!(current.label(), Opcode::Lli, val, reg), - node!(None, Opcode::Lui, val, reg), - ]); - - Ok(()) -} - -fn expand_resx(current: &Node, nodes: &mut Vec) -> Result<(), AssembleError> { - let Ok(region_label) = current.arg(0) else { - return Err(AssembleError::MissingArgument(0)); - }; - - let Ok(size) = current.arg(1) else { - return Err(AssembleError::MissingArgument(1)); - }; - - let region_label = expect_token!(region_label, Symbol)?; - let size = expect_token!(size, Immediate)?; - - let units_per = match current.opcode() { - Opcode::Resb => 4, - Opcode::Resh => 2, - Opcode::Resw => 1, - _ => unreachable!(), - }; - - let mut buffer = vec![]; - // push the inital node with the label - for _ in 0..size.div_ceil(units_per) { - // push the rest of the nodes - buffer.push(node!(None, Opcode::Data, 0)); - } - buffer[0].symbol = Some(region_label); - nodes.extend(buffer); - - Ok(()) -} - -fn expand_dx(current: &Node, nodes: &mut Vec) -> Result<(), AssembleError> { - let Ok(region_label) = current.arg(0) else { - return Err(AssembleError::MissingArgument(0)); - }; - - let region_label = expect_token!(region_label, Symbol)?; - let size = match current.opcode() { - Opcode::Db => 4, - Opcode::Dh => 2, - Opcode::Dw => 1, - _ => unreachable!(), - }; - - let mut buffer = vec![]; - - let mut args = current.args(); - let _label = args.remove(0); - - for word in process_dx_data(args, size)? { - buffer.push(node!(None, Opcode::Data, Token::Immediate(word))); - } - buffer[0].symbol = Some(region_label); - - nodes.extend(buffer); - Ok(()) -} - -fn process_dx_data(args: Vec, size: usize) -> Result, AssembleError> { - assert!(matches!(size, 1 | 2 | 4)); - - let mut buffer = Vec::::new(); - - // Process each token - for token in args { - match token { - Token::StringLit(mut s) => { - s.push('\0'); - // Split string into chars and write as bytes - for ch in s.chars() { - // Convert char to bytes (UTF-8 encoding) - let mut char_buf = [0u8; 4]; - let char_bytes = ch.encode_utf8(&mut char_buf); - buffer.extend_from_slice(char_bytes.as_bytes()); - } - } - Token::Immediate(value) => { - // Split u32 into bytes (little-endian) - buffer.extend_from_slice(&value.to_be_bytes()); - } - _ => { - return Err(AssembleError::Generic); - } - } - - // Pad buffer to alignment boundary with zeros - let remainder = buffer.len() % size; - if remainder != 0 { - let padding = size - remainder; - buffer.resize(buffer.len() + padding, 0); - } - } - - // Convert byte buffer to u32 chunks - // Pad final buffer to u32 boundary if needed - let remainder = buffer.len() % 4; - if remainder != 0 { - let padding = 4 - remainder; - buffer.resize(buffer.len() + padding, 0); - } - - // Convert bytes to u32s efficiently using chunks_exact - let result = buffer - .chunks_exact(4) - .map(|chunk| { - // Convert 4 bytes to u32 (little-endian) - u32::from_be_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]) - }) - .collect(); - - Ok(result) -} diff --git a/assembler/src/assembler/lexer.rs b/assembler/src/assembler/lexer.rs deleted file mode 100644 index 9b624a4..0000000 --- a/assembler/src/assembler/lexer.rs +++ /dev/null @@ -1,167 +0,0 @@ -use std::str::FromStr; - -use crate::assembler::AssembleError; -use crate::assembler::model::{Module, Opcode, Symbol, Token}; -use common::prelude::Register; - -pub fn lexer(mut program: String, module: u64) -> Result, AssembleError> { - let mut tokens = Vec::new(); - - program = program.replace(',', ""); - let lines = program.lines(); - let mut literal = String::new(); - - for line in lines { - for (i, token) in line.split_whitespace().enumerate() { - if token.starts_with("//") { - break; - } - - if let Some(stripped) = token.strip_prefix('"') { - literal.push_str(stripped); - } - - if !literal.is_empty() { - if !token.starts_with('"') { - if i > 0 { - literal.push(' '); - } - literal.push_str(token); - } - - if token.ends_with('"') { - literal.pop(); // remove the closing quote - - tokens.push(Token::StringLit(literal)); - literal = String::new(); - } - - continue; - } - - if let Some(token) = parse_register(token)? { - tokens.push(token); - } else if let Some(token) = parse_opcode(token)? { - tokens.push(token); - } else if let Some(token) = parse_hex(token)? { - tokens.push(token); - } else if let Some(token) = parse_octal(token)? { - tokens.push(token); - } else if let Some(token) = parse_binary(token)? { - tokens.push(token); - } else if let Some(token) = parse_decimal(token)? { - tokens.push(token); - } else if let Some(token) = parse_label(token, module)? { - tokens.push(token); - } else if let Some(token) = parse_symbol(token, module)? { - tokens.push(token); - } else { - return Err(AssembleError::Generic("Token not matched!".to_string())); - } - } - } - - Ok(tokens) -} -pub fn parse_register(token: &str) -> Result, AssembleError> { - Ok(Register::try_from(token).map(Token::Register).ok()) -} - -pub fn parse_opcode(token: &str) -> Result, AssembleError> { - if Opcode::OPCODES.contains(&token) { - Ok(Some(Token::Opcode(Opcode::from_str(token).expect( - "Opcode::from_str failed for a valid opcode token", - )))) - } else { - Ok(None) - } -} - -pub fn parse_hex(token: &str) -> Result, AssembleError> { - if (token.len() < 3) | !token.starts_with("0x") { - return Ok(None); - } - - let Some(lit) = &token.get(2..) else { - return Err(AssembleError::InvalidArg); - }; - - u32::from_str_radix(lit, 16).map_or(Err(AssembleError::Generic), |value| { - Ok(Some(Token::Immediate(value))) - }) -} - -pub fn parse_octal(token: &str) -> Result, AssembleError> { - if (token.len() < 3) | !token.starts_with("0o") { - return Ok(None); - } - - let Some(lit) = &token.get(2..) else { - return Err(AssembleError::InvalidArg); - }; - - u32::from_str_radix(lit, 8).map_or(Err(AssembleError::Generic), |value| { - Ok(Some(Token::Immediate(value))) - }) -} - -pub fn parse_binary(token: &str) -> Result, AssembleError> { - if (token.len() < 3) | !token.starts_with("0b") { - return Ok(None); - } - - let Some(lit) = &token.get(2..) else { - return Err(AssembleError::InvalidArg); - }; - - u32::from_str_radix(lit, 2).map_or(Err(AssembleError::Generic), |value| { - Ok(Some(Token::Immediate(value))) - }) -} - -pub fn parse_decimal(token: &str) -> Result, AssembleError> { - let Ok(tok) = token.parse::() else { - return Ok(None); - }; - - Ok(Some(Token::Immediate(tok))) -} - -pub fn parse_label(token: &str, module: u64) -> Result, AssembleError> { - if token.ends_with(':') { - Ok(Some(Token::Symbol(Symbol { - name: token[0..token.len() - 1].to_string(), - module: Module::Resolved(module), - }))) - } else { - Ok(None) - } -} - -pub fn parse_symbol(token: &str, module: u64) -> Result, AssembleError> { - let Some(tokc) = token.chars().next() else { - return Err(AssembleError::Generic); // TODO: What is this error? - }; - - if tokc.is_numeric() { - return Ok(None); - } - - let mut split = token.splitn(2, "::"); - let Some(symbol1) = split.next() else { - return Err(AssembleError::InvalidArg); - }; - let symbol1 = symbol1.to_string(); - - if let Some(symbol2) = split.next() { - Ok(Some(Token::Symbol(Symbol { - name: symbol2.to_string(), - module: Module::Unresolved(symbol1), - }))) - } else { - Ok(Some(Token::Symbol(Symbol { - name: symbol1, - module: Module::Resolved(module), - }))) - } -} diff --git a/assembler/src/assembler/macros.rs b/assembler/src/assembler/macros.rs deleted file mode 100644 index c40fc4c..0000000 --- a/assembler/src/assembler/macros.rs +++ /dev/null @@ -1,139 +0,0 @@ -//! Macros used throughout the assembler - -use crate::assembler::model::{Node, Opcode, Symbol, Token}; - -/// Parse DSA assembly code with optional formatting -/// -/// # Examples -/// ``` -/// // With formatting: -/// let nodes = dsa!(hash, "mov r1, {}", 42)?; -/// -/// // Without formatting: -/// let nodes = dsa!(hash, "mov r1, 42")?; -/// ``` -#[macro_export] -macro_rules! dsa { - // Version with formatting arguments - ($hash:expr, $input:expr, $($args:expr),+) => {{ - let input = format!($input, $($args),+); - let tokens = $crate::lexer::lexer(input, $hash)?; - let parsed = $crate::parser::Parser::parse_nodes(tokens)?; - parsed - }}; - // Version without formatting - ($hash:expr, $input:expr) => {{ - let input = String::from($input); - let tokens = $crate::lexer::lexer(input, $hash)?; - let parsed = $crate::parser::Parser::parse_nodes(tokens)?; - parsed - }}; -} - -/// Creates a new Node with the given symbol, opcode, and tokens -#[macro_export] -macro_rules! node { - ($symbol: expr, $opcode: expr, args: $tokens: expr) => { - $crate::assembler::model::Node::new($symbol.clone(), $opcode.clone(), $tokens.clone()) - }; - - ($symbol: expr, $opcode: expr, $($tokens: expr),+) => { - $crate::assembler::model::Node::new( - $symbol.clone(), - $opcode.clone(), - vec![$(node!(@convert_token $tokens)),+] - ) - }; - - ($symbol: expr, $opcode: expr) => { - $crate::assembler::model::Node::new( - $symbol.clone(), - $opcode.clone(), - Vec::new() - ) - }; - - (@convert_token $token: literal) => { - $crate::assembler::model::Token::Immediate($token) - }; - - (@convert_token $token: expr) => { - $token.clone() - }; -} - -/// Extracts a specific token type from a token -#[macro_export] -macro_rules! expect_token { - ($token:expr, Symbol) => { - match $token { - $crate::assembler::model::Token::Symbol(value) => Ok(value.clone()), - other => Err($crate::assembler::AssembleError::UnexpectedToken( - other.clone(), - $crate::assembler::model::TokenType::Symbol, - )), - } - }; - ($token:expr, Register) => { - match $token { - $crate::assembler::model::Token::Register(value) => Ok(value.clone()), - other => Err($crate::assembler::AssembleError::UnexpectedToken( - other.clone(), - $crate::assembler::model::TokenType::Register, - )), - } - }; - ($token:expr, Immediate) => { - match $token { - $crate::assembler::model::Token::Immediate(value) => Ok(value.clone()), - other => Err($crate::assembler::AssembleError::UnexpectedToken( - other.clone(), - $crate::assembler::model::TokenType::Immediate, - )), - } - }; - ($token:expr, StringLit) => { - match $token { - $crate::assembler::model::Token::StringLit(value) => Ok(value.clone()), - other => Err($crate::assembler::AssembleError::UnexpectedToken( - other.clone(), - $crate::assembler::model::TokenType::StringLit, - )), - } - }; - ($token:expr, Opcode) => { - match $token { - $crate::assembler::model::Token::Opcode(value) => Ok(value.clone()), - other => Err($crate::assembler::AssembleError::UnexpectedToken( - other.clone(), - $crate::assembler::model::TokenType::Opcode, - )), - } - }; -} - -/// Checks if a token matches any of the specified types -#[macro_export] -macro_rules! expect_type { - ($token:expr, $($variant:ident),+) => {{ - let token = $token; - match &token { - $( - $crate::assembler::model::Token::$variant(_) => Ok(token.clone()), - )+ - other => { - let expected_type = expect_type!(@get_first_type $($variant),+); - Err($crate::assembler::AssembleError::UnexpectedToken( - other.clone().clone(), - expected_type, - )) - } - } - }}; - - (@get_first_type Symbol $(, $rest:ident)*) => { $crate::assembler::model::TokenType::Symbol }; - (@get_first_type Register $(, $rest:ident)*) => { $crate::assembler::model::TokenType::Register }; - (@get_first_type Immediate $(, $rest:ident)*) => { $crate::assembler::model::TokenType::Immediate }; - (@get_first_type StringLit $(, $rest:ident)*) => { $crate::assembler::model::TokenType::StringLit }; - (@get_first_type Opcode $(, $rest:ident)*) => { $crate::assembler::model::TokenType::Opcode }; -} diff --git a/assembler/src/assembler/mod.rs b/assembler/src/assembler/mod.rs deleted file mode 100644 index 940b28a..0000000 --- a/assembler/src/assembler/mod.rs +++ /dev/null @@ -1,43 +0,0 @@ -//! DSA Assembler module - converts assembly source code into executable instructions. - -use common::prelude::Instruction; -use std::path::Path; - -// Module declarations -#[macro_use] -pub mod macros; - -pub mod codegen; -pub mod engine; -pub mod error; -pub mod expand; -pub mod lexer; -pub mod model; -pub mod parser; -pub mod program; -pub mod resolver; -pub mod task; -pub mod util; - -// Re-exports for backward compatibility and convenience -pub use self::{ - codegen::codegen, - engine::{ - CompilerEngine, OutputFormat, assemble, assemble_to_executable, - assemble_to_object, - }, - error::AssembleError, - expand::expand_pseudo_ops, - lexer::lexer, - model::{Module, Node, Opcode, Symbol, Token, TokenType}, - parser::Parser, - program::Program, - resolver::{create_sections, resolve_dependencies, resolve_symbols}, - task::Task, - util::{log, quick_hash}, -}; - -/// The old assemble function for compatibility reasons. -pub fn legacy_assemble(src: &Path) -> Result, AssembleError> { - engine::assemble(src) -} diff --git a/assembler/src/assembler/model.rs b/assembler/src/assembler/model.rs deleted file mode 100644 index 5173f49..0000000 --- a/assembler/src/assembler/model.rs +++ /dev/null @@ -1,525 +0,0 @@ -//! Data models for the DSA assembler. - -use crate::assembler::{AssembleError, Parser, Program, expand_pseudo_ops, lexer}; -use std::path::PathBuf; - -use std::{fmt, str::FromStr}; - -use common::prelude::Register; -use uuid::Uuid; - -#[derive(Debug, Clone, Hash)] -pub struct Node { - pub symbol: Option, - pub opcode: Opcode, - pub tokens: Vec, -} - -impl Node { - #[must_use] - pub const fn new(symbol: Option, opcode: Opcode, tokens: Vec) -> Self { - Self { - symbol, - opcode, - tokens, - } - } - - #[must_use] - pub fn label(&self) -> Option { - self.symbol.clone() - } - - #[must_use] - pub const fn opcode(&self) -> Opcode { - self.opcode - } - - #[must_use] - pub fn args(&self) -> Vec { - self.tokens.clone() - } - - pub fn arg(&self, index: usize) -> Result { - self.args() - .get(index) - .cloned() - // TODO: This is a bad place to throw an error unless we write code to attach - // context. - .ok_or(AssembleError::Generic("Invalid argument index".to_string())) - } -} - -impl fmt::Display for Node { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let symbol = self - .label() - .as_ref() - .map_or_else(String::new, |symbol| format!("{symbol}:\n")); - - write!( - f, - "\x1b[93m{} \t\x1b[94m{} \x1b[37m{:?} \x1b[0m", - symbol, - self.opcode(), - self.args() - ) - } -} - -impl fmt::Display for Symbol { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{} ( module: {})", self.name, self.module) - } -} - -impl fmt::Display for Opcode { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - Self::Nop => write!(f, "nop"), - Self::Mov => write!(f, "mov"), - Self::Movs => write!(f, "movs"), - Self::Ldb => write!(f, "ldb"), - Self::Ldbs => write!(f, "ldbs"), - Self::Ldh => write!(f, "ldh"), - Self::Ldhs => write!(f, "ldhs"), - Self::Ldw => write!(f, "ldw"), - Self::Stb => write!(f, "stb"), - Self::Sth => write!(f, "sth"), - Self::Stw => write!(f, "stw"), - Self::Lli => write!(f, "lli"), - Self::Lui => write!(f, "lui"), - Self::Jmp => write!(f, "jmp"), - Self::Jeq => write!(f, "jeq"), - Self::Jne => write!(f, "jne"), - Self::Jgt => write!(f, "jgt"), - Self::Jge => write!(f, "jge"), - Self::Jlt => write!(f, "jlt"), - Self::Jle => write!(f, "jle"), - Self::Cmp => write!(f, "cmp"), - Self::Inc => write!(f, "inc"), - Self::Dec => write!(f, "dec"), - Self::Shl => write!(f, "shl"), - Self::Shr => write!(f, "shr"), - Self::Add => write!(f, "add"), - Self::Sub => write!(f, "sub"), - Self::And => write!(f, "and"), - Self::Or => write!(f, "or"), - Self::Not => write!(f, "not"), - Self::Xor => write!(f, "xor"), - Self::Nand => write!(f, "nand"), - Self::Nor => write!(f, "nor"), - Self::Xnor => write!(f, "xnor"), - Self::Int => write!(f, "int"), - Self::Irt => write!(f, "irt"), - Self::Hlt => write!(f, "hlt"), - Self::AddI => write!(f, "addi"), - Self::SubI => write!(f, "subi"), - Self::Db => write!(f, "db"), - Self::Dh => write!(f, "dh"), - Self::Dw => write!(f, "dw"), - Self::Resb => write!(f, "resb"), - Self::Resh => write!(f, "resh"), - Self::Resw => write!(f, "resw"), - Self::Push => write!(f, "push"), - Self::Pop => write!(f, "pop"), - Self::Lwi => write!(f, "lwi"), - Self::Call => write!(f, "call"), - Self::Return => write!(f, "return"), - Self::Pusha => write!(f, "pusha"), - Self::Popa => write!(f, "popa"), - - // meta instructions - Self::Include => write!(f, "include"), - Self::Data => write!(f, "data"), - Self::Segment => write!(f, "[SEGMENT]"), - } - } -} - -#[derive(Debug, Clone, Eq)] -pub struct Symbol { - pub name: String, - pub module: Module, -} - -impl std::hash::Hash for Symbol { - fn hash(&self, state: &mut H) { - self.name.hash(state); - self.module.hash(state); - } -} - -impl PartialEq for Symbol { - fn eq(&self, other: &Self) -> bool { - self.name == other.name && self.module == other.module - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub enum TokenType { - Symbol(Symbol), - Register(Register), - Immediate(u32), - StringLit(String), - Opcode(Opcode), -} - -pub struct Token { - token_type: TokenType, - source_info: SourceInfo, -} - -/// Information on where the token is within the source. -pub struct SourceInfo { - /// The line number within the source file underpinned by `module_id`. - pub line_no: usize, - /// The ID of the module containing this token. - pub module_id: Uuid, - /// The indexes where this token may be found (line-local). - pub span: std::ops::Range, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub enum Opcode { - // Real instructions (0x00-0x26) - Nop, - Mov, - Movs, - Ldb, - Ldbs, - Ldh, - Ldhs, - Ldw, - Stb, - Sth, - Stw, - Lli, - Lui, - Jmp, - Jeq, - Jne, - Jgt, - Jge, - Jlt, - Jle, - Cmp, - Inc, - Dec, - Shl, - Shr, - Add, - Sub, - And, - Or, - Not, - Xor, - Nand, - Nor, - Xnor, - Int, - Irt, - Hlt, - AddI, - SubI, - - // Pseudo-instructions - Db, - Dh, - Dw, - Resb, - Resh, - Resw, - Push, - Pop, - Pusha, - Popa, - Lwi, - Call, - Return, - - // meta instructions (these aren't present in the binary as instructions) - Include, - Data, - Segment, -} - -#[derive(Debug)] -pub enum OpcodeFromStrError { - InvalidRegister(&'static str), - InvalidOpcode(String), -} - -impl std::fmt::Display for OpcodeFromStrError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::InvalidRegister(reg) => write!(f, "register does not exist: {reg}"), - Self::InvalidOpcode(op) => write!(f, "instruction does not exist: {op}"), - } - } -} - -impl std::error::Error for OpcodeFromStrError {} - -impl FromStr for Opcode { - type Err = OpcodeFromStrError; - - fn from_str(s: &str) -> Result { - match s.to_lowercase().as_str() { - "nop" => Ok(Self::Nop), - "mov" => Ok(Self::Mov), - "movs" => Ok(Self::Movs), - "ldb" => Ok(Self::Ldb), - "ldbs" => Ok(Self::Ldbs), - "ldh" => Ok(Self::Ldh), - "ldhs" => Ok(Self::Ldhs), - "ldw" => Ok(Self::Ldw), - "stb" => Ok(Self::Stb), - "sth" => Ok(Self::Sth), - "stw" => Ok(Self::Stw), - "lli" => Ok(Self::Lli), - "lui" => Ok(Self::Lui), - "jmp" => Ok(Self::Jmp), - "jeq" => Ok(Self::Jeq), - "jne" => Ok(Self::Jne), - "jgt" => Ok(Self::Jgt), - "jge" => Ok(Self::Jge), - "jlt" => Ok(Self::Jlt), - "jle" => Ok(Self::Jle), - "cmp" => Ok(Self::Cmp), - "inc" => Ok(Self::Inc), - "dec" => Ok(Self::Dec), - "shl" => Ok(Self::Shl), - "shr" => Ok(Self::Shr), - "add" => Ok(Self::Add), - "sub" => Ok(Self::Sub), - "and" => Ok(Self::And), - "or" => Ok(Self::Or), - "not" => Ok(Self::Not), - "xor" => Ok(Self::Xor), - "nand" => Ok(Self::Nand), - "nor" => Ok(Self::Nor), - "xnor" => Ok(Self::Xnor), - "int" => Ok(Self::Int), - "irt" => Ok(Self::Irt), - "hlt" => Ok(Self::Hlt), - "addi" => Ok(Self::AddI), - "subi" => Ok(Self::SubI), - "db" => Ok(Self::Db), - "dh" => Ok(Self::Dh), - "dw" => Ok(Self::Dw), - "resb" => Ok(Self::Resb), - "resh" => Ok(Self::Resh), - "resw" => Ok(Self::Resw), - "push" => Ok(Self::Push), - "pop" => Ok(Self::Pop), - "lwi" => Ok(Self::Lwi), - "include" => Ok(Self::Include), - "call" => Ok(Self::Call), - "return" => Ok(Self::Return), - "pusha" => Ok(Self::Pusha), - "popa" => Ok(Self::Popa), - _ => Err(OpcodeFromStrError::InvalidOpcode(s.to_string())), - } - } -} - -impl Opcode { - pub const OPCODES: &[&str] = &[ - // Real instructions (0x00-0x26) - "nop", "mov", "movs", "ldb", "ldbs", "ldh", "ldhs", "ldw", "stb", "sth", "stw", - "lli", "lui", "jmp", "jeq", "jne", "jgt", "jge", "jlt", "jle", "cmp", "inc", - "dec", "shl", "shr", "add", "sub", "and", "or", "not", "xor", "nand", "nor", - "xnor", "int", "irt", "hlt", "addi", "subi", // Pseudo-instructions - "db", "dh", "dw", "resb", "resh", "resw", "push", "pop", "lwi", "call", "return", - "pusha", "popa", // meta instructions - "include", - ]; - - #[must_use] - pub const fn to_opcode_value(&self) -> Option { - match self { - Self::Nop => Some(0x00), - Self::Mov => Some(0x01), - Self::Movs => Some(0x02), - Self::Ldb => Some(0x03), - Self::Ldbs => Some(0x04), - Self::Ldh => Some(0x05), - Self::Ldhs => Some(0x06), - Self::Ldw => Some(0x07), - Self::Stb => Some(0x08), - Self::Sth => Some(0x09), - Self::Stw => Some(0x0A), - Self::Lli => Some(0x0B), - Self::Lui => Some(0x0C), - Self::Jmp => Some(0x0D), - Self::Jeq => Some(0x0E), - Self::Jne => Some(0x0F), - Self::Jgt => Some(0x10), - Self::Jge => Some(0x11), - Self::Jlt => Some(0x12), - Self::Jle => Some(0x13), - Self::Cmp => Some(0x14), - Self::Inc => Some(0x15), - Self::Dec => Some(0x16), - Self::Shl => Some(0x17), - Self::Shr => Some(0x18), - Self::Add => Some(0x19), - Self::Sub => Some(0x1A), - Self::And => Some(0x1B), - Self::Or => Some(0x1C), - Self::Not => Some(0x1D), - Self::Xor => Some(0x1E), - Self::Nand => Some(0x1F), - Self::Nor => Some(0x20), - Self::Xnor => Some(0x21), - Self::Int => Some(0x22), - Self::Irt => Some(0x23), - Self::Hlt => Some(0x24), - Self::AddI => Some(0x25), - Self::SubI => Some(0x26), - Self::Segment => Some(0x27), - - // Pseudo-instructions don't have opcode values - _ => None, - } - } - - #[must_use] - pub const fn is_pseudo_instruction(&self) -> bool { - matches!( - self, - Self::Db - | Self::Dh - | Self::Dw - | Self::Resb - | Self::Resh - | Self::Resw - | Self::Push - | Self::Pop - | Self::Lwi - ) - } -} - -/// Represents a single source module and its compilation state. -#[derive(Debug, Clone, Hash)] -pub struct Module { - pub id: Uuid, - pub path: PathBuf, - pub hash: u64, - pub nodes: Vec, - program: Program, -} - -impl PartialEq for Module { - fn eq(&self, other: &Self) -> bool { - self.id == other.id - } -} - -impl std::fmt::Display for Module { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "Module {{ id: {}, path: {}, nodes: {} }}", - self.id, - self.path.display(), - self.nodes.len() - ) - } -} - -impl Eq for Module {} - -impl Module { - #[must_use] - pub fn new(path: PathBuf, hash: u64, nodes: Vec, program: Program) -> Self { - Self { - id: Uuid::new_v4(), - path, - hash, - nodes, - program, - } - } - - /// Executes the full compilation pipeline for this module. - pub fn compile(&mut self) -> Result<(), AssembleError> { - self.lex()?; - self.parse()?; - self.expand()?; - self.prepare_dependencies()?; - Ok(()) - } - - /// Lexical analysis stage. - pub fn lex(&mut self) -> Result, AssembleError> { - // Log the build - if let Ok(path) = self.path.canonicalize() { - let _ = self.program.log(&format!( - "{:20} {:20} [{}]", - "Building", - self.get_filename(), - path.display() - )); - } - - // Read and lex the file - let source = std::fs::read_to_string(&self.path)?; - lexer(source, self.hash) - } - - /// Parsing stage. - pub fn parse(&mut self) -> Result<(), AssembleError> { - let source = std::fs::read_to_string(&self.path)?; - let tokens = lexer(source, self.hash)?; - let nodes = Parser::parse_nodes(tokens)?; - self.nodes = nodes; - Ok(()) - } - - /// Pseudo-instruction expansion stage. - pub fn expand(&mut self) -> Result<(), AssembleError> { - self.nodes = expand_pseudo_ops(self.nodes.clone(), self.hash)?; - Ok(()) - } - - /// Dependency resolution stage. - pub fn prepare_dependencies(&self) -> Result<(), AssembleError> { - // let base_dir = self.path.parent(); - - let dependencies = Parser::get_dependencies(&self.nodes, &self.path)?; - - for dep in dependencies { - if self.program.is_registered(&dep)? { - // we have already built this module! - continue; - } - self.program.register(&dep)?; - - // create new module task - match Task::new(dep, self.program.clone()) { - Ok(task) => { - if let Err(e) = self.program.add_task(task) { - eprintln!("Error adding task: {e}"); - } - } - Err(why) => { - eprintln!("Error building program: {why}"); - } - } - } - Ok(()) - } - - /// Gets the filename from a [`PathBuf`]. - fn get_filename(&self) -> &str { - self.path - .file_name() - .and_then(|f| f.to_str()) - .unwrap_or_default() - } -} - -use crate::assembler::Task; diff --git a/assembler/src/assembler/parser.rs b/assembler/src/assembler/parser.rs deleted file mode 100644 index 7ac26b2..0000000 --- a/assembler/src/assembler/parser.rs +++ /dev/null @@ -1,368 +0,0 @@ -use std::path::{Path, PathBuf}; - -use crate::{assembler::AssembleError, expect_token, expect_type, node}; - -use crate::assembler::model::{Node, Opcode, Token}; -use common::prelude::*; - -pub struct Parser { - tokens: Vec, - nodes: Vec, -} - -#[derive(Debug)] -pub struct Program { - pub nodes: Vec, -} - -impl Program { - #[must_use] - pub const fn new() -> Self { - Self { nodes: vec![] } - } - - pub fn add_module(&mut self, module: Vec) { - self.nodes.extend(module); - } - - pub fn parser(&mut self) -> Parser { - Parser { - tokens: vec![], - nodes: self.nodes.clone(), - } - } -} - -impl Default for Program { - fn default() -> Self { - Self::new() - } -} - -impl Parser { - pub fn parse_nodes(tokens: Vec) -> Result, AssembleError> { - let mut self_ = Self { - tokens: tokens.into_iter().rev().collect(), - nodes: vec![], - }; - - while !self_.tokens.is_empty() { - let ins = self_.parse_instruction()?; - self_.nodes.push(ins); - } - - Ok(self_.nodes.clone()) - } - - pub fn get_dependencies( - nodes: &Vec, - source_path: &Path, - ) -> Result, AssembleError> { - let mut dependencies = Vec::new(); - // Get the parent directory of the source file to use as the base directory - let base_dir = source_path - .parent() - .ok_or_else(|| AssembleError::InvalidFile(source_path.to_path_buf()))?; - - for node in nodes { - if node.opcode() == Opcode::Include { - let path_str = expect_token!( - node.args().get(1).ok_or(AssembleError::Generic)?, - StringLit - )?; - let path = PathBuf::from(path_str); - - // If the path is not absolute, make it relative to the base directory - let full_path = if path.is_absolute() { - path - } else { - base_dir.join(path) - }; - - dependencies.push(full_path); - } - } - Ok(dependencies) - } - - #[expect(clippy::too_many_lines, clippy::cognitive_complexity)] - fn parse_instruction(&mut self) -> Result { - if self.tokens.is_empty() { - unreachable!(); - } - - // check if the Node starts with a label - let label = expect_token!(self.peek_next()?, Symbol).ok(); - if label.is_some() { - self.tokens.pop(); - } - - let opcode = expect_token!(self.next()?, Opcode)?; - let args: Vec; - - match opcode { - // R-type instructions - Opcode::Mov | Opcode::Movs => { - let reg1 = expect_type!(self.next()?, Register, Symbol)?; - let reg2 = expect_type!(self.next()?, Register, Symbol)?; - args = vec![reg1, reg2]; - } - - Opcode::Ldb | Opcode::Ldbs | Opcode::Ldh | Opcode::Ldhs | Opcode::Ldw => { - let base = expect_type!(self.next()?, Register, Symbol)?; - let dest = expect_type!(self.next()?, Register)?; - - let mut offset = Token::Immediate(0); - if let Ok(next) = self.peek_next() - && expect_type!(next, Immediate).is_ok() { - offset = self.next()?; - } - - args = vec![base, dest, offset]; - } - Opcode::Stb | Opcode::Sth | Opcode::Stw => { - let base = expect_type!(self.next()?, Register)?; - let dest = expect_type!(self.next()?, Register, Symbol)?; - let mut offset = Token::Immediate(0); - if let Ok(next) = self.peek_next() - && expect_type!(next, Immediate).is_ok() { - offset = self.next()?; - } - args = vec![base, dest, offset]; - } - - Opcode::Add - | Opcode::Sub - | Opcode::And - | Opcode::Or - | Opcode::Xor - | Opcode::Nand - | Opcode::Nor - | Opcode::Xnor => { - let src1 = expect_type!(self.next()?, Register, Symbol)?; - let src2 = expect_type!(self.next()?, Register, Symbol)?; - let dest = expect_type!(self.next()?, Register, Symbol)?; - args = vec![src1, src2, dest]; - } - - Opcode::Not | Opcode::Cmp => { - let reg1 = expect_type!(self.next()?, Register, Symbol)?; - let reg2 = expect_type!(self.next()?, Register, Symbol)?; - args = vec![reg1, reg2]; - } - - Opcode::Shl | Opcode::Shr => { - let reg = expect_type!(self.next()?, Register, Symbol)?; - let num = expect_type!(self.next()?, Immediate)?; - args = vec![reg, num]; - } - - Opcode::Inc | Opcode::Dec => { - let reg = expect_type!(self.next()?, Register, Symbol)?; - args = vec![reg]; - } - - Opcode::Include => { - let mod_name = expect_type!(self.next()?, Symbol)?; - let path = expect_type!(self.next()?, StringLit)?; - args = vec![mod_name, path]; - } - - // J-type instructions - Opcode::Jmp - | Opcode::Jeq - | Opcode::Jne - | Opcode::Jgt - | Opcode::Jge - | Opcode::Jlt - | Opcode::Jle => { - let imm = expect_type!(self.next()?, Immediate, Symbol)?; - let offset = match self.peek_next() { - Ok(token) => { - if expect_type!(token, Register).is_ok() { - self.next()? - } else { - Token::Register(Register::Zero) - } - } - Err(_) => Token::Register(Register::Zero), - }; - args = vec![imm, offset]; - } - - Opcode::Call => { - let addr = expect_type!(self.next()?, Symbol)?; - args = vec![addr]; - } - - // I-type instructions - Opcode::Lui | Opcode::Lli | Opcode::Lwi => { - let imm = expect_type!(self.next()?, Immediate, Symbol)?; - let reg = expect_type!(self.next()?, Register)?; - args = vec![imm, reg]; - } - - // Immediate Arithmetic - Opcode::AddI | Opcode::SubI => { - let reg = expect_type!(self.next()?, Register)?; - let imm = expect_type!(self.next()?, Immediate)?; - let reg2 = if expect_type!(self.peek_next()?, Register).is_ok() { - self.next()? - } else { - reg.clone() - }; - args = vec![reg, imm, reg2]; - } - - // D-type pseudoinstructions (data definition) - Opcode::Resb | Opcode::Resh | Opcode::Resw => { - let name = expect_type!(self.next()?, Symbol)?; - let num = expect_type!(self.next()?, Immediate)?; - args = vec![name, num]; - } - - Opcode::Db | Opcode::Dh | Opcode::Dw => { - args = self.parse_data_definition(opcode)?; - } - - // E-type pseudoinstructions (stack operations) - Opcode::Push | Opcode::Pop => { - let reg = expect_type!(self.next()?, Register, Symbol)?; - args = vec![reg]; - } - - Opcode::Pusha | Opcode::Popa => { - let count = - expect_type!(self.next()?, Immediate).unwrap_or(Token::Immediate(8)); - args = vec![count]; - } - - // Special instructions - Opcode::Int => { - let val = expect_type!(self.next()?, Immediate)?; - args = vec![val]; - } - - // Instructions with no arguments - Opcode::Hlt | Opcode::Nop | Opcode::Irt | Opcode::Return => { - args = vec![]; - } - - Opcode::Data | Opcode::Segment => { - return Err(AssembleError::Generic); - } - } - - Ok(node!(label, opcode, args: args)) - } - - fn parse_data_definition( - &mut self, - opcode: Opcode, - ) -> Result, AssembleError> { - let mut values = Vec::new(); - - let name = expect_type!(self.next()?, Symbol)?; - values.push(name); - - match opcode { - Opcode::Db => { - // db can take string literals or u8 immediates - while !self.tokens.is_empty() { - let token = self - .tokens - .last() - .expect("Expected a token for data definition, but found none"); - - match token { - Token::StringLit(_) => { - values.push(self.tokens.pop().expect( - "Expected a token for data definition, but found none", - )); - } - Token::Immediate(val) if u8::try_from(*val).is_ok() => { - values.push(self.tokens.pop().expect( - "Expected a token for data definition, but found none", - )); - } - _ => break, - } - } - } - - Opcode::Dh => { - // dh can take u16 immediates - while !self.tokens.is_empty() { - let token = self - .tokens - .last() - .expect("Expected a token for data definition, but found none"); - - match token { - Token::StringLit(_) => { - values.push(self.tokens.pop().expect( - "Expected a token for data definition, but found none", - )); - } - Token::Immediate(val) if u16::try_from(*val).is_ok() => { - values.push(self.tokens.pop().expect( - "Expected a token for data definition, but found none", - )); - } - _ => break, - } - } - } - - Opcode::Dw => { - // dw can take u32 immediates - while !self.tokens.is_empty() { - match self - .tokens - .last() - .expect("Expected a token for data definition, but found none") - { - Token::StringLit(_) => { - values.push(self.tokens.pop().expect( - "Expected a token for data definition, but found none", - )); - } - Token::Immediate(val) => { - values.push(self.tokens.pop().expect( - "Expected a token for data definition, but found none", - )); - } - _ => break, - } - } - } - - _ => unreachable!(), - } - - Ok(values) - } - - fn next(&mut self) -> Result { - if self.tokens.is_empty() { - Err(AssembleError::UnexpectedEof) - } else { - Ok(self - .tokens - .pop() - .expect("tokens vector was unexpectedly empty in next()")) - } - } - - fn peek_next(&self) -> Result { - if self.tokens.is_empty() { - Err(AssembleError::UnexpectedEof) - } else { - Ok(self - .tokens - .last() - .expect("peek_next called on empty tokens vector") - .clone()) - } - } -} diff --git a/assembler/src/assembler/program.rs b/assembler/src/assembler/program.rs deleted file mode 100644 index 05b8132..0000000 --- a/assembler/src/assembler/program.rs +++ /dev/null @@ -1,122 +0,0 @@ -//! Program state management for multi-module compilation. - -use std::{ - collections::HashSet, - path::PathBuf, - sync::{Arc, Mutex}, -}; - -use uuid::Uuid; - -use crate::assembler::{AssembleError, Module, Task, quick_hash}; -use crate::util::logging::Logger; - -/// Main program state containing all modules and compilation metadata. -#[derive(Debug)] -pub struct Program { - /// A field to be passed into a hasher. - hash_me: Uuid, - inner: Arc>, -} - -impl std::hash::Hash for Program { - fn hash(&self, state: &mut H) { - self.hash_me.hash(state); - } -} - -impl PartialEq for Program { - fn eq(&self, other: &Self) -> bool { - *self.inner.lock().unwrap() == *other.inner.lock().unwrap() - } -} - -#[derive(Debug, PartialEq)] -struct ProgramInner { - pub main_path: PathBuf, - pub registry: HashSet, - pub modules: Vec, - pub tasks: Vec, - pub logger: Logger, -} - -impl Program { - #[must_use] - pub fn new() -> Self { - Self { - hash_me: Uuid::new_v4(), - inner: Arc::new(Mutex::new(ProgramInner { - registry: HashSet::new(), - modules: Vec::new(), - tasks: Vec::new(), - main_path: PathBuf::new(), - logger: Logger::new(), - })), - } - } - - /// Registers a module path to prevent duplicate compilation. - pub fn register(&self, path: &std::path::Path) -> Result<(), AssembleError> { - self.inner.lock()?.registry.insert(quick_hash(path)); - Ok(()) - } - - /// Checks if a module path is already registered. - pub fn is_registered(&self, path: &std::path::Path) -> Result { - Ok(self.inner.lock()?.registry.contains(&quick_hash(path))) - } - - /// Gets all compilation tasks. - pub fn get_tasks(&self) -> Result, AssembleError> { - Ok(self.inner.lock()?.tasks.clone()) - } - - /// Adds a new compilation task. - pub fn add_task(&self, task: Task) -> Result<(), AssembleError> { - self.inner.lock()?.tasks.push(task); - Ok(()) - } - - /// Adds a compiled module to the program. - pub fn add_module(&self, module: Module) -> Result<(), AssembleError> { - self.inner.lock()?.modules.push(module); - Ok(()) - } - - /// Gets all compiled modules. - pub fn get_modules(&self) -> Result, AssembleError> { - Ok(self.inner.lock()?.modules.clone()) - } - - /// Logs a message using the program's logger. - pub fn log(&self, message: &str) -> Result<(), AssembleError> { - self.inner.lock()?.logger.log(message); - Ok(()) - } - - /// Sets the main path for the program. - pub fn set_main_path(&self, path: PathBuf) -> Result<(), AssembleError> { - self.inner.lock()?.main_path = path; - Ok(()) - } - - /// Gets the main path for the program. - pub fn get_main_path(&self) -> Result { - Ok(self.inner.lock()?.main_path.clone()) - } -} - -impl Clone for Program { - fn clone(&self) -> Self { - Self { - hash_me: self.hash_me.clone(), - inner: Arc::clone(&self.inner), - } - } -} - -impl Default for Program { - fn default() -> Self { - Self::new() - } -} diff --git a/assembler/src/assembler/resolver.rs b/assembler/src/assembler/resolver.rs deleted file mode 100644 index 626a045..0000000 --- a/assembler/src/assembler/resolver.rs +++ /dev/null @@ -1,156 +0,0 @@ -use std::{ - collections::HashMap, - fs::canonicalize, - path::{Path, PathBuf}, -}; - -use common::prelude::Register; - -use crate::assembler::quick_hash; -use crate::assembler::{ - log, - model::{Module, Node, Opcode, Symbol, Token}, -}; -use crate::{assembler::AssembleError, node}; - -pub fn resolve_symbols(nodes: &mut [Node]) -> Result<(), AssembleError> { - let symbol_table = generate_symbol_table(nodes); - - for node in nodes.iter_mut() { - match node.opcode() { - Opcode::Jmp - | Opcode::Jeq - | Opcode::Jne - | Opcode::Jgt - | Opcode::Jge - | Opcode::Jlt - | Opcode::Jle - | Opcode::Lli - | Opcode::Lui => { - if let Token::Symbol(symbol) = node - .arg(0) - .expect("Expected argument 0 for jump-like opcode") - { - if let Some(address) = symbol_table.get(&symbol) { - node.tokens[0] = Token::Immediate(*address); - } else { - return Err(AssembleError::UndefinedSymbol(symbol)); - } - } - } - _ => (), - } - } - - Ok(()) -} - -fn generate_symbol_table(nodes: &[Node]) -> HashMap { - let mut table = HashMap::new(); - - for (i, node) in nodes.iter().enumerate() { - if let Some(symbol) = node.label() { - table.insert(symbol, 4 * i as u32); - } - } - - table -} - -pub fn resolve_dependencies( - mut nodes: Vec, - base_dir: &Path, -) -> Result, AssembleError> { - // First we get a list of imports. - let mut dependencies = Vec::new(); - for node in &nodes { - if node.opcode() == Opcode::Include { - // we want the path, and the name - let name = if let Token::Symbol(name) = node - .arg(0) - .expect("Expected argument #0 for Include directive.") - { - name.name.clone() - } else { - unreachable!() - }; //node.2.get(0).unwrap() - - let Ok(Token::StringLit(path)) = node.arg(1) else { - unreachable!() - }; - - let full_path = base_dir.join(path); - let canonical_path = full_path - .canonicalize() - .map_err(|_| AssembleError::InvalidFile(full_path.clone()))?; - - let hash = quick_hash(&canonical_path); - - dependencies.push((name, hash)); - } - } - - let mut changes = Vec::<(u32, u32, Symbol)>::new(); - // now we resolve the symbols on all the nodes - // we need to check all operands for unresolved signals - for (i, node) in nodes.clone().iter().enumerate() { - let Node { - tokens: operands, .. - } = node; - for (j, token) in operands.iter().enumerate() { - if let Token::Symbol(symbol) = token { - for d in &dependencies { - if let Module::Unresolved(name) = symbol.module.clone() { - if name != d.0 { - continue; - } - - let symbol = Symbol { - name: symbol.name.clone(), - module: Module::Resolved(d.1), - }; - changes.push((i as u32, j as u32, symbol)); - } - } - } - } - } - - for (i, j, symbol) in changes { - nodes[i as usize].tokens[j as usize] = Token::Symbol(symbol); - } - - Ok(nodes) -} - -pub fn create_sections(nodes: &mut Vec) -> Result<(), AssembleError> { - let mut res = Vec::::with_capacity(nodes.len()); - - res.push(node!(None, Opcode::Segment, Token::Immediate(0))); - - for n in nodes.iter() { - if n.opcode() == Opcode::Data { - res.push(n.clone()); - } - } - - let start = res.len() + 1; - res.insert( - 0, - node!( - None, - Opcode::Jmp, - Token::Immediate(start as u32 * 4), - Token::Register(Register::Zero) - ), - ); - for n in nodes.iter() { - if !matches!(n.opcode(), Opcode::Data | Opcode::Include) { - res.push(n.clone()); - } - } - - *nodes = res; - - Ok(()) -} diff --git a/assembler/src/assembler/task.rs b/assembler/src/assembler/task.rs deleted file mode 100644 index 0ed501f..0000000 --- a/assembler/src/assembler/task.rs +++ /dev/null @@ -1,96 +0,0 @@ -//! Threading utilities for parallel module compilation. - -use std::{ - path::PathBuf, - sync::Arc, - thread::{self, JoinHandle}, -}; - -use uuid::Uuid; - -use crate::assembler::{AssembleError, Module, Program, quick_hash}; - -/// Represents a threaded compilation task for a single module. -#[derive(Debug)] -pub struct Task { - id: Uuid, - module_handle: Arc>>, -} - -impl PartialEq for Task { - fn eq(&self, other: &Self) -> bool { - self.id == other.id - } -} - -impl Task { - /// Creates a new compilation task for the given module path. - pub fn new(path: PathBuf, program: Program) -> Result { - let handle = thread::spawn(move || { - let mut module = - Module::new(path.clone(), quick_hash(&path), Vec::new(), program.clone()); - - // Execute the compilation pipeline - match module.compile() { - Ok(()) => Ok(module), - Err(e) => { - eprintln!( - "Error building program at path `{}`: {}", - path.display(), - e - ); - Err(e) - } - } - }); - - Ok(Self { - module_handle: Arc::new(handle), - id: Uuid::new_v4(), - }) - } - - /// Creates a task from an existing join handle (for compatibility). - pub fn from_handle(handle: JoinHandle>) -> Self { - Self { - module_handle: Arc::new(handle), - id: Uuid::new_v4(), - } - } - - /// Waits for the compilation task to complete and returns the compiled module. - pub fn join(self) -> Result { - let Some(join_handle) = Arc::try_unwrap(self.module_handle).ok() else { - let err_msg = String::from( - "Cannot take ownership of reference counted task join_handle, multiple references exist.", - ); - eprintln!("{err_msg}"); - return Err(AssembleError::Threading(err_msg)); - }; - - match join_handle.join() { - Ok(result) => result, - Err(panic_payload) => { - let err_msg = format!( - "Task thread panicked: {:?}", - panic_payload - .downcast_ref::() - .map(|s| s.as_str()) - .or_else(|| panic_payload.downcast_ref::<&str>().copied()) - .unwrap_or("Unknown panic") - ); - eprintln!("{err_msg}"); - Err(AssembleError::Threading(err_msg)) - } - } - } -} - -impl Clone for Task { - fn clone(&self) -> Self { - Self { - id: self.id.clone(), - module_handle: Arc::clone(&self.module_handle), - } - } -} diff --git a/assembler/src/assembler/util.rs b/assembler/src/assembler/util.rs deleted file mode 100644 index 9171dda..0000000 --- a/assembler/src/assembler/util.rs +++ /dev/null @@ -1,23 +0,0 @@ -//! Utility functions for the assembler. - -use std::{ - hash::{DefaultHasher, Hash, Hasher}, - path::Path, -}; - -/// Quick hash function for file paths. -pub fn quick_hash(value: &Path) -> u64 { - let mut hasher = DefaultHasher::new(); - value - .canonicalize() - .expect("Failed to canonicalize path for quick_hash") - .to_str() - .hash(&mut hasher); - - hasher.finish() -} - -/// TODO: Use an actual logging or tracing library for pretty (scoped) output. -pub fn log(message: &str) { - println!("\x1b[32mINFO:\x1b[0m {message}"); -} diff --git a/assembler/src/lib.rs b/assembler/src/lib.rs index ec850b2..a9ed557 100644 --- a/assembler/src/lib.rs +++ b/assembler/src/lib.rs @@ -13,16 +13,14 @@ )] pub mod args; -pub mod assembler; pub mod image_builder; -pub mod tooling; +// pub mod tooling; mod util; pub mod prelude { - pub use crate::assembler::CompilerEngine; pub use crate::image_builder; - pub use crate::tooling::brainf; - pub use crate::tooling::project; + // pub use crate::tooling::brainf; + // pub use crate::tooling::project; } use num_cpus as _; diff --git a/assembler/src/main.rs b/assembler/src/main.rs index de7afde..e172aa3 100644 --- a/assembler/src/main.rs +++ b/assembler/src/main.rs @@ -2,67 +2,62 @@ use common as _; use num_cpus as _; use threadpool as _; -use assembler::{ - prelude::*, - tooling::{brainf, project}, -}; - -use clap::Parser; -use std::{fs, io::Write, path::PathBuf}; +// use clap::Parser; +// use std::{fs, io::Write, path::PathBuf}; fn main() { - // Parse command line arguments - let args: Vec = std::env::args().collect(); + // // Parse command line arguments + // let args: Vec = std::env::args().collect(); - let _clap_args = assembler::args::Args::parse(); + // let _clap_args = assembler::args::Args::parse(); - if args.len() == 2 && args[1] == "init" { - project::tool_libcreate(); - std::process::exit(0); - } + // if args.len() == 2 && args[1] == "init" { + // // project::tool_libcreate(); + // std::process::exit(0); + // } - if args.len() == 2 && args[1] == "brainf" { - let src = PathBuf::from("brainf.bf"); - let result = brainf::build(&src); + // if args.len() == 2 && args[1] == "brainf" { + // let src = PathBuf::from("brainf.bf"); + // // let result = brainf::build(&src); - let mut file = match fs::File::create("brainf.dsb") { - Err(e) => { - eprintln!("Failed to create output file: {e}"); - std::process::exit(1); - } - Ok(file) => file, - }; + // let mut file = match fs::File::create("brainf.dsb") { + // Err(e) => { + // eprintln!("Failed to create output file: {e}"); + // std::process::exit(1); + // } + // Ok(file) => file, + // }; - for instruction in result { - if let Err(e) = file.write(&instruction.encode().to_be_bytes()) { - eprintln!("Failed to write to output file: {e}"); - std::process::exit(1); - } - } + // // for instruction in result { + // // if let Err(e) = file.write(&instruction.encode().to_be_bytes()) { + // // eprintln!("Failed to write to output file: {e}"); + // // std::process::exit(1); + // // } + // // } - std::process::exit(0); - } + // std::process::exit(0); + // } - if args.len() != 5 || args[1] != "-i" || args[3] != "-o" { - eprintln!("Usage: {} -i input_path -o output_path", args[0]); - std::process::exit(1); - } + // if args.len() != 5 || args[1] != "-i" || args[3] != "-o" { + // eprintln!("Usage: {} -i input_path -o output_path", args[0]); + // std::process::exit(1); + // } - let input_path = &args[2]; - let output_path = &args[4]; - let src = PathBuf::from(input_path); + // let input_path = &args[2]; + // let output_path = &args[4]; + // let src = PathBuf::from(input_path); - // Initialize the compiler engine - let mut compiler = CompilerEngine::new(); - compiler.start_compilation(&src); + // // Initialize the compiler engine + // let mut compiler = CompilerEngine::new(); + // compiler.start_compilation(&src); - // Or block until done - let result = compiler.wait_for_result().unwrap(); + // // Or block until done + // let result = compiler.wait_for_result().unwrap(); - for instruction in result { - if let Err(e) = fs::write(output_path, instruction.encode().to_be_bytes()) { - eprintln!("Failed to write to output file: {e}"); - std::process::exit(1); - } - } + // for instruction in result { + // if let Err(e) = fs::write(output_path, instruction.encode().to_be_bytes()) { + // eprintln!("Failed to write to output file: {e}"); + // std::process::exit(1); + // } + // } } diff --git a/assembler/src/source_info.rs b/assembler/src/source_info.rs new file mode 100644 index 0000000..4773308 --- /dev/null +++ b/assembler/src/source_info.rs @@ -0,0 +1,13 @@ +//! This file contains information on where a [`Token`] or [`Node`] is within the source +//! code for more informative errors. This will likely be attached to a [`Token`] which +//! will in turn be attached to an AST [`Node`]. + +/// Information on where the token is within the source. +pub struct SourceInfo { + /// The line number within the source file underpinned by `module_id`. + pub line_no: usize, + /// The ID of the module containing this token. + pub module_id: Uuid, + /// The indexes where this token may be found (line-local). + pub span: std::ops::Range, +} diff --git a/assembler/src/token.rs b/assembler/src/token.rs new file mode 100644 index 0000000..094b078 --- /dev/null +++ b/assembler/src/token.rs @@ -0,0 +1,17 @@ +//! Contains [`TokenType`] and [`Token`]'s. Adapted from Harry's old lexer since it was +//! easier to build from scratch and edit his code than it would be to try and wrangle it +//! into shape. + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum TokenType { + Symbol(Symbol), + Register(Register), + Immediate(u32), + StringLit(String), + Opcode(Opcode), +} + +pub struct Token { + token_type: TokenType, + source_info: SourceInfo, +} diff --git a/assembler/src/util/logging.rs b/assembler/src/util/logging.rs index 4b72123..f9a4de1 100644 --- a/assembler/src/util/logging.rs +++ b/assembler/src/util/logging.rs @@ -2,7 +2,7 @@ #![allow(unused)] use std::{fmt, sync::mpsc::Sender}; -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Eq)] pub struct Logger {} impl Logger {