From 11ba09ab4312339e1019f1b24d5af94e857152c0 Mon Sep 17 00:00:00 2001 From: "J. Hinchliffe" Date: Tue, 24 Jun 2025 23:19:20 +0100 Subject: [PATCH] assembler: broke everything, currently modularising --- Cargo.lock | 1 + assembler/Cargo.toml | 1 + assembler/src/assembler/assembler.rs | 95 ++-------- assembler/src/assembler/engine.rs | 161 ++++++++++++++++ assembler/src/assembler/error.rs | 114 ++++++++++++ assembler/src/assembler/mod.rs | 263 ++------------------------- assembler/src/assembler/model.rs | 153 ++++++++++++++-- assembler/src/assembler/program.rs | 122 +++++++++++++ assembler/src/assembler/task.rs | 96 ++++++++++ assembler/src/assembler/util.rs | 23 +++ assembler/src/util/logging.rs | 1 + common/src/instructions.rs | 2 +- 12 files changed, 690 insertions(+), 342 deletions(-) create mode 100644 assembler/src/assembler/engine.rs create mode 100644 assembler/src/assembler/error.rs create mode 100644 assembler/src/assembler/program.rs create mode 100644 assembler/src/assembler/task.rs create mode 100644 assembler/src/assembler/util.rs diff --git a/Cargo.lock b/Cargo.lock index 5e46b62..b3d79d8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -270,6 +270,7 @@ dependencies = [ "common", "num_cpus", "threadpool", + "uuid", ] [[package]] diff --git a/assembler/Cargo.toml b/assembler/Cargo.toml index d7a6efb..a39add2 100644 --- a/assembler/Cargo.toml +++ b/assembler/Cargo.toml @@ -17,3 +17,4 @@ clap = { version = "4.5.40", features = ["derive"] } common = { path = "../common" } num_cpus = "1.17.0" threadpool = "1.8.1" +uuid = { version = "1.17.0", features = ["v4"] } diff --git a/assembler/src/assembler/assembler.rs b/assembler/src/assembler/assembler.rs index 5ba9e1c..f4f5ad5 100644 --- a/assembler/src/assembler/assembler.rs +++ b/assembler/src/assembler/assembler.rs @@ -6,8 +6,10 @@ use std::{ thread::{self, JoinHandle}, }; -use crate::assembler::{AssembleError, Token, expand_pseudo_ops, lexer, quick_hash}; -use crate::assembler::{Node, Parser, resolve_dependencies}; +use crate::assembler::{Node, Parser, ProgramRef, Task, resolve_dependencies}; +use crate::assembler::{ + Token, error::AssembleError, expand_pseudo_ops, lexer, quick_hash, +}; use crate::util::logging::Logger; // pub fn new_assemble(path: &Path) { @@ -55,71 +57,6 @@ impl Default for Program { } } -pub struct ProgramRef { - program: Arc>, -} - -impl ProgramRef { - #[must_use] - pub fn new(program: Program) -> Self { - Self { - program: Arc::new(Mutex::new(program)), - } - } - - pub fn register(&self, path: &Path) { - self.program - .lock() - .expect("Failed to acquire program lock") - .registry - .insert(quick_hash(path)); - } - - #[must_use] - pub fn is_registered(&self, path: &Path) -> bool { - self.program - .lock() - .expect("Failed to acquire program lock") - .registry - .contains(&quick_hash(path)) - } - - // pub fn get_tasks(&self) -> Vec<&Task> { - // self.program.lock().unwrap().tasks.iter().collect() - // } - - pub fn add_task(&self, task: Task) { - self.program - .lock() - .expect("Failed to acquire program lock") - .add_task(task); - } - - pub fn add_module(&self, module: Module) { - self.program - .lock() - .expect("Failed to acquire program lock") - .modules - .push(module); - } - - pub fn log(&self, message: &str) { - self.program - .lock() - .expect("Failed to acquire program lock") - .logger - .log(message); - } -} - -impl Clone for ProgramRef { - fn clone(&self) -> Self { - Self { - program: self.program.clone(), - } - } -} - pub struct Module { pub path: PathBuf, pub hash: u64, @@ -144,7 +81,8 @@ impl Module { } pub fn build(path: PathBuf, program: ProgramRef) -> Result { - // Spawn a thread that creates the main function and executes the lexer and parser. + // Spawn a thread that creates the main function and executes the lexer and + // parser. let handle = thread::spawn(move || { let mut module = Self::new(path.clone(), quick_hash(&path), Vec::new(), program.clone()); @@ -154,7 +92,8 @@ impl Module { module.parse(tokens); module.expand(); module.prepare_dependencies(); - module + + Ok(module) } Err(why) => { eprintln!( @@ -162,13 +101,12 @@ impl Module { path.display() ); - // TODO: Find a way to make this work without panicking. - unreachable!() + Err(why) } } }); - Ok(Task { module: handle }) + Ok(Task::new(path, program)?) } fn lex(&self) -> Result, AssembleError> { @@ -181,8 +119,13 @@ impl Module { )); } - let src = fs::read_to_string(&self.path) - .map_err(|_| AssembleError::InvalidFile(self.path.clone()))?; + let src = fs::read_to_string(&self.path).map_err(|e| { + AssembleError::Io(format!( + "Failed to read file '{}': {}", + self.path.display(), + e + )) + })?; let file_hash = quick_hash(&self.path); @@ -258,7 +201,3 @@ impl Module { .unwrap_or_default() } } - -pub struct Task { - module: JoinHandle, -} diff --git a/assembler/src/assembler/engine.rs b/assembler/src/assembler/engine.rs new file mode 100644 index 0000000..72628a7 --- /dev/null +++ b/assembler/src/assembler/engine.rs @@ -0,0 +1,161 @@ +//! Compiler engine for orchestrating the assembly process. + +use crate::assembler::{AssembleError, Program, Task}; +use common::prelude::Instruction; +use std::path::{Path, PathBuf}; + +/// Supported output formats for the assembler. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum OutputFormat { + /// Flat binary executable + Binary, + /// ELF relocatable object file + ElfObject, + /// ELF executable + ElfExecutable, +} + +/// Main compilation orchestrator that manages the assembly process. +pub struct CompilerEngine { + /// Configuration options for compilation + pub output_format: OutputFormat, + pub include_debug_info: bool, + pub optimization_level: u8, +} + +impl CompilerEngine { + /// Creates a new compiler engine with default settings. + #[must_use] + pub fn new() -> Self { + Self { + output_format: OutputFormat::Binary, + include_debug_info: false, + optimization_level: 0, + } + } + + /// Creates a new compiler engine with specified output format. + #[must_use] + pub fn with_output_format(output_format: OutputFormat) -> Self { + Self { + output_format, + include_debug_info: false, + optimization_level: 0, + } + } + + /// Sets the output format for compilation. + pub fn set_output_format(&mut self, format: OutputFormat) { + self.output_format = format; + } + + /// Enables or disables debug information generation. + pub fn set_debug_info(&mut self, enabled: bool) { + self.include_debug_info = enabled; + } + + /// Sets the optimization level (0-3). + pub fn set_optimization_level(&mut self, level: u8) { + self.optimization_level = level.min(3); + } + + /// Main assembly function that orchestrates the entire compilation process. + pub fn assemble( + &self, + main_path: &Path, + output_path: Option<&Path>, + ) -> Result, AssembleError> { + let program = Program::new(); + + // Set the main path in the program + program.set_main_path(main_path.to_path_buf())?; + + // Create and execute the main compilation task + let main_task = Task::new(main_path.to_path_buf(), program.clone())?; + let module = main_task.join()?; + + program.add_module(module)?; + + // Wait for all dependency compilation tasks to complete + self.wait_for_completion(&program)?; + + // Generate final instructions + let instructions = self.generate_instructions(&program)?; + + Ok(instructions) + } + + /// Waits for all compilation tasks to complete. + fn wait_for_completion(&self, program: &Program) -> Result<(), AssembleError> { + let tasks = program.get_tasks()?; + + for task in tasks { + let module = task.join()?; + program.add_module(module)?; + } + + Ok(()) + } + + /// Generates the final instruction stream from all compiled modules. + fn generate_instructions( + &self, + program: &Program, + ) -> Result, AssembleError> { + let mut all_nodes = Vec::new(); + + // Collect all nodes from all modules + for module in program.get_modules()? { + all_nodes.extend(module.nodes.clone()); + } + + // Apply resolution and code generation + crate::assembler::create_sections(&mut all_nodes)?; + crate::assembler::resolve_symbols(&mut all_nodes)?; + crate::assembler::codegen(all_nodes) + } + + /// Determines the default output path based on input path and output format. + fn default_output_path(&self, input_path: &Path) -> PathBuf { + let stem = input_path.file_stem().unwrap_or_default(); + let parent = input_path.parent().unwrap_or(Path::new(".")); + + let extension = match self.output_format { + OutputFormat::Binary => "bin", + OutputFormat::ElfObject => "o", + OutputFormat::ElfExecutable => "elf", + }; + + parent.join(format!("{}.{}", stem.to_string_lossy(), extension)) + } +} + +impl Default for CompilerEngine { + fn default() -> Self { + Self::new() + } +} + +/// Convenience function for simple assembly with default settings. +pub fn assemble(input_path: &Path) -> Result, AssembleError> { + let engine = CompilerEngine::new(); + engine.assemble(input_path, None) +} + +/// Convenience function for assembling to ELF object format. +pub fn assemble_to_object( + input_path: &Path, + output_path: Option<&Path>, +) -> Result, AssembleError> { + let engine = CompilerEngine::with_output_format(OutputFormat::ElfObject); + engine.assemble(input_path, output_path) +} + +/// Convenience function for assembling to ELF executable format. +pub fn assemble_to_executable( + input_path: &Path, + output_path: Option<&Path>, +) -> Result, AssembleError> { + let engine = CompilerEngine::with_output_format(OutputFormat::ElfExecutable); + engine.assemble(input_path, output_path) +} diff --git a/assembler/src/assembler/error.rs b/assembler/src/assembler/error.rs new file mode 100644 index 0000000..22a97c8 --- /dev/null +++ b/assembler/src/assembler/error.rs @@ -0,0 +1,114 @@ +//! Error types for the DSA assembler. + +use std::fmt; + +/// Comprehensive error type for assembly operations. +#[derive(Debug)] +pub enum AssembleError { + /// IO-related errors (file not found, permission denied, etc.). + Io(std::io::Error), + + /// Lexical analysis errors + Lexer { + message: String, + line: usize, + column: usize, + }, + + /// Parsing errors + Parser { + message: String, + line: usize, + token: String, + }, + + /// Symbol resolution errors + Symbol { + message: String, + symbol_name: String, + }, + + /// Code generation errors + Codegen { + message: String, + instruction: String, + }, + + /// Dependency resolution errors + Dependency { + message: String, + module_path: String, + }, + + /// Threading and synchronization errors + Threading(String), + + /// Output generation errors + Output { message: String, format: String }, + + /// Generic assembly error + Generic(String), +} + +impl fmt::Display for AssembleError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Io(msg) => write!(f, "IO Error: {}", msg), + Self::Lexer { + message, + line, + column, + } => { + write!(f, "Lexer Error at {}:{}: {}", line, column, message) + } + Self::Parser { + message, + line, + token, + } => { + write!( + f, + "Parser Error at line {}, token '{}': {}", + line, token, message + ) + } + Self::Symbol { + message, + symbol_name, + } => { + write!(f, "Symbol Error '{}': {}", symbol_name, message) + } + Self::Codegen { + message, + instruction, + } => { + write!(f, "Codegen Error in '{}': {}", instruction, message) + } + Self::Dependency { + message, + module_path, + } => { + write!(f, "Dependency Error in '{}': {}", module_path, message) + } + Self::Threading(msg) => write!(f, "Threading Error: {}", msg), + Self::Output { message, format } => { + write!(f, "Output Error ({}): {}", format, message) + } + Self::Generic(msg) => write!(f, "Assembly Error: {}", msg), + } + } +} + +impl std::error::Error for AssembleError {} + +impl From for AssembleError { + fn from(error: std::io::Error) -> Self { + Self::Io(error) + } +} + +impl From> for AssembleError { + fn from(error: std::sync::PoisonError) -> Self { + Self::Threading(format!("Mutex poisoned: {}", error)) + } +} diff --git a/assembler/src/assembler/mod.rs b/assembler/src/assembler/mod.rs index 26fd801..940b28a 100644 --- a/assembler/src/assembler/mod.rs +++ b/assembler/src/assembler/mod.rs @@ -1,266 +1,43 @@ -#![allow(dead_code, unused)] - -use std::{ - collections::HashSet, - fmt, fs, - hash::{DefaultHasher, Hash, Hasher}, - path::{Path, PathBuf}, - sync::{Arc, Mutex, mpsc}, - thread, -}; +//! DSA Assembler module - converts assembly source code into executable instructions. use common::prelude::Instruction; - -// TODO: Use an actual logging or tracing library for pretty (scoped) output. -fn log(message: &str) { - println!("\x1b[32mINFO:\x1b[0m {message}"); -} +use std::path::Path; // Module declarations #[macro_use] pub mod macros; -#[allow(clippy::module_inception)] -pub mod assembler; pub mod codegen; +pub mod engine; +pub mod error; pub mod expand; pub mod lexer; pub mod model; pub mod parser; +pub mod program; pub mod resolver; +pub mod task; +pub mod util; -// Re-exports +// Re-exports for backward compatibility and convenience pub use self::{ codegen::codegen, + engine::{ + CompilerEngine, OutputFormat, assemble, assemble_to_executable, + assemble_to_object, + }, + error::AssembleError, expand::expand_pseudo_ops, lexer::lexer, model::{Module, Node, Opcode, Symbol, Token, TokenType}, - parser::{Parser, Program}, + parser::Parser, + program::Program, resolver::{create_sections, resolve_dependencies, resolve_symbols}, + task::Task, + util::{log, quick_hash}, }; -use crate::util::logging::{Entry, Logger}; - -pub struct CompilerEngine { - result_tx: mpsc::Sender, AssembleError>>, - result_rx: Option, AssembleError>>>, - is_running: bool, -} - -impl CompilerEngine { - #[must_use] - pub fn new() -> Self { - let (tx, rx) = mpsc::channel(); - Self { - result_tx: tx, - result_rx: Some(rx), - is_running: false, - } - } - - /// Start the compilation process in a separate thread - pub fn start_compilation(&mut self, src: &Path) { - if self.is_running { - return; - } - - let src = src.to_path_buf(); - let tx = self.result_tx.clone(); - - thread::spawn(move || { - let result = assemble(&src); - tx.send(result) - .expect("Failed to send compilation result from worker thread"); - }); - - self.is_running = true; - } - - /// Check if compilation is complete and get the result - pub fn try_get_result(&mut self) -> Option, AssembleError>> { - if !self.is_running { - return None; - } - - match self - .result_rx - .as_ref() - .expect("result_rx should be Some while compilation is running") - .try_recv() - { - Ok(result) => { - self.is_running = false; - Some(result) - } - Err(mpsc::TryRecvError::Empty) => None, - Err(mpsc::TryRecvError::Disconnected) => { - self.is_running = false; - Some(Err(AssembleError::Generic)) - } - } - } - - /// Block until compilation is complete and return the result - pub fn wait_for_result(&mut self) -> Result, AssembleError> { - if !self.is_running { - return Err(AssembleError::Generic); - } - - if let Ok(result) = self - .result_rx - .take() - .expect("result_rx should be Some while waiting for compilation result") - .recv() - { - self.is_running = false; - result - } else { - self.is_running = false; - Err(AssembleError::Generic) - } - } -} - -fn assemble(src: &Path) -> Result, AssembleError> { - let mut modules = HashSet::new(); - let mut program = Program::new(); - - let hash = quick_hash(src); - - if modules.contains(&hash) { - return Ok(vec![]); - } - - prepare_dependency(src, &mut modules, &mut program)?; - - let mut nodes = program.nodes.clone(); - - create_sections(&mut nodes)?; - resolve_symbols(&mut nodes)?; - - let instructions = codegen(nodes)?; - Ok(instructions) -} - -impl Default for CompilerEngine { - fn default() -> Self { - Self::new() - } -} - -fn prepare_dependency( - path: &Path, - modules: &mut HashSet, - program: &mut Program, -) -> Result<(), AssembleError> { - let filename = path - .file_name() - .and_then(|n| n.to_str()) - .expect("Failed to get file name from path"); - - if let Ok(path) = path.canonicalize() { - log(&format!( - "{:20} {:20} [{}]", - "Building", - filename, - path.display() - )); - } - - let src = fs::read_to_string(path) - .map_err(|_| AssembleError::InvalidFile(path.to_path_buf()))?; - let file_hash = quick_hash(path); - - log(&format!("{:20} {:20}", "Tokenising", filename)); - let tokens = lexer::lexer(src, file_hash)?; - - log(&format!("{:20} {:20}", "Parsing", filename)); - let parsed = Parser::parse_nodes(tokens)?; - - log(&format!("{:20} {:20}", "Resolving Deps", filename)); - // Get the parent directory of the source file to use as the base directory - let base_dir = path - .parent() - .ok_or_else(|| AssembleError::InvalidFile(path.to_path_buf()))?; - let mut nodes = expand_pseudo_ops(parsed, file_hash)?; - nodes = resolve_dependencies(nodes, base_dir)?; - - let deps = Parser::get_dependencies(&nodes, path)?; - - log(&format!( - "{:20} {:20}", - "Expanding PseudoInstructions", filename - )); - - // add a section instruction - nodes.insert( - 0, - node!(None, Opcode::Segment, Token::Immediate(file_hash as u32)), - ); - - for n in &nodes { - println!("{n}"); - } - - program.add_module(nodes); - - for dep in deps { - log(&format!( - "{:20} {:20}", - "Including", - dep.file_name() - .and_then(|f| f.to_str()) - .expect("Dependency path has no file name or is not valid UTF-8") - )); - - let dep_hash = quick_hash(&dep); - if modules.insert(dep_hash) { - prepare_dependency(dep.as_path(), modules, program)?; - } - } - - Ok(()) -} - -#[derive(Debug, Clone)] -pub enum AssembleError { - Generic, - UnexpectedEof, - InvalidFile(PathBuf), - UnexpectedToken(Token, TokenType), - InvalidArg, - UndefinedSymbol(Symbol), - /// Contains the nth element missing from the instruction. - MissingArgument(u8), -} - -impl fmt::Display for AssembleError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::Generic => write!(f, "Generic error"), - Self::UnexpectedToken(tok, expected) => { - write!(f, "Unexpected token {tok:?}, expected {expected:?}") - } - Self::UnexpectedEof => write!(f, "Unexpected end of file"), - Self::InvalidFile(path) => write!(f, "Invalid file `{}`", path.display()), - Self::InvalidArg => write!(f, "Invalid argument"), - Self::UndefinedSymbol(symbol) => { - write!(f, "Undefined symbol {symbol}") - } - Self::MissingArgument(n) => { - write!(f, "Missing argument #{n} from instruction arguments.") - } - } - } -} - -fn quick_hash(value: &Path) -> u64 { - let mut hasher = DefaultHasher::new(); - value - .canonicalize() - .expect("Failed to canonicalize path for quick_hash") - .to_str() - .hash(&mut hasher); - - hasher.finish() +/// The old assemble function for compatibility reasons. +pub fn legacy_assemble(src: &Path) -> Result, AssembleError> { + engine::assemble(src) } diff --git a/assembler/src/assembler/model.rs b/assembler/src/assembler/model.rs index 97ad939..615b7da 100644 --- a/assembler/src/assembler/model.rs +++ b/assembler/src/assembler/model.rs @@ -1,10 +1,14 @@ +//! Data models for the DSA assembler. + +use crate::assembler::{AssembleError, Parser, Program, expand_pseudo_ops, lexer}; +use std::path::PathBuf; + use std::{fmt, str::FromStr}; use common::prelude::Register; +use uuid::Uuid; -use crate::assembler::AssembleError; - -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Hash)] pub struct Node { pub symbol: Option, pub opcode: Opcode, @@ -40,7 +44,9 @@ impl Node { self.args() .get(index) .cloned() - .ok_or(AssembleError::InvalidArg) + // TODO: This is a bad place to throw an error unless we write code to attach + // context. + .ok_or(AssembleError::Generic("Invalid argument index".to_string())) } } @@ -67,15 +73,6 @@ impl fmt::Display for Symbol { } } -impl fmt::Display for Module { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - Self::Unresolved(name) => write!(f, "{name}"), - Self::Resolved(name) => write!(f, "{name}"), - } - } -} - impl fmt::Display for Opcode { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { @@ -160,12 +157,6 @@ impl PartialEq for Symbol { } #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub enum Module { - Resolved(u64), - Unresolved(String), -} - -#[derive(Debug, Clone)] pub enum Token { Symbol(Symbol), Register(Register), @@ -196,7 +187,7 @@ impl TokenType { } } -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum Opcode { // Real instructions (0x00-0x26) Nop, @@ -417,3 +408,125 @@ impl Opcode { ) } } + +/// Represents a single source module and its compilation state. +#[derive(Debug, Clone, Hash)] +pub struct Module { + pub id: Uuid, + pub path: PathBuf, + pub hash: u64, + pub nodes: Vec, + program: Program, +} + +impl PartialEq for Module { + fn eq(&self, other: &Self) -> bool { + self.id == other.id + } +} + +impl std::fmt::Display for Module { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "Module {{ id: {}, path: {}, nodes: {} }}", + self.id, + self.path.display(), + self.nodes.len() + ) + } +} + +impl Eq for Module {} + +impl Module { + #[must_use] + pub fn new(path: PathBuf, hash: u64, nodes: Vec, program: Program) -> Self { + Self { + id: Uuid::new_v4(), + path, + hash, + nodes, + program, + } + } + + /// Executes the full compilation pipeline for this module. + pub fn compile(&mut self) -> Result<(), AssembleError> { + self.lex()?; + self.parse()?; + self.expand()?; + self.prepare_dependencies()?; + Ok(()) + } + + /// Lexical analysis stage. + pub fn lex(&mut self) -> Result, AssembleError> { + // Log the build + if let Ok(path) = self.path.canonicalize() { + let _ = self.program.log(&format!( + "{:20} {:20} [{}]", + "Building", + self.get_filename(), + path.display() + )); + } + + // Read and lex the file + let source = std::fs::read_to_string(&self.path)?; + lexer(source, self.hash) + } + + /// Parsing stage. + pub fn parse(&mut self) -> Result<(), AssembleError> { + let source = std::fs::read_to_string(&self.path)?; + let tokens = lexer(source, self.hash)?; + let nodes = Parser::parse_nodes(tokens)?; + self.nodes = nodes; + Ok(()) + } + + /// Pseudo-instruction expansion stage. + pub fn expand(&mut self) -> Result<(), AssembleError> { + self.nodes = expand_pseudo_ops(self.nodes.clone(), self.hash)?; + Ok(()) + } + + /// Dependency resolution stage. + pub fn prepare_dependencies(&self) -> Result<(), AssembleError> { + // let base_dir = self.path.parent(); + + let dependencies = Parser::get_dependencies(&self.nodes, &self.path)?; + + for dep in dependencies { + if self.program.is_registered(&dep)? { + // we have already built this module! + continue; + } + self.program.register(&dep)?; + + // create new module task + match Task::new(dep, self.program.clone()) { + Ok(task) => { + if let Err(e) = self.program.add_task(task) { + eprintln!("Error adding task: {e}"); + } + } + Err(why) => { + eprintln!("Error building program: {why}"); + } + } + } + Ok(()) + } + + /// Gets the filename from a [`PathBuf`]. + fn get_filename(&self) -> &str { + self.path + .file_name() + .and_then(|f| f.to_str()) + .unwrap_or_default() + } +} + +use crate::assembler::Task; diff --git a/assembler/src/assembler/program.rs b/assembler/src/assembler/program.rs new file mode 100644 index 0000000..05b8132 --- /dev/null +++ b/assembler/src/assembler/program.rs @@ -0,0 +1,122 @@ +//! Program state management for multi-module compilation. + +use std::{ + collections::HashSet, + path::PathBuf, + sync::{Arc, Mutex}, +}; + +use uuid::Uuid; + +use crate::assembler::{AssembleError, Module, Task, quick_hash}; +use crate::util::logging::Logger; + +/// Main program state containing all modules and compilation metadata. +#[derive(Debug)] +pub struct Program { + /// A field to be passed into a hasher. + hash_me: Uuid, + inner: Arc>, +} + +impl std::hash::Hash for Program { + fn hash(&self, state: &mut H) { + self.hash_me.hash(state); + } +} + +impl PartialEq for Program { + fn eq(&self, other: &Self) -> bool { + *self.inner.lock().unwrap() == *other.inner.lock().unwrap() + } +} + +#[derive(Debug, PartialEq)] +struct ProgramInner { + pub main_path: PathBuf, + pub registry: HashSet, + pub modules: Vec, + pub tasks: Vec, + pub logger: Logger, +} + +impl Program { + #[must_use] + pub fn new() -> Self { + Self { + hash_me: Uuid::new_v4(), + inner: Arc::new(Mutex::new(ProgramInner { + registry: HashSet::new(), + modules: Vec::new(), + tasks: Vec::new(), + main_path: PathBuf::new(), + logger: Logger::new(), + })), + } + } + + /// Registers a module path to prevent duplicate compilation. + pub fn register(&self, path: &std::path::Path) -> Result<(), AssembleError> { + self.inner.lock()?.registry.insert(quick_hash(path)); + Ok(()) + } + + /// Checks if a module path is already registered. + pub fn is_registered(&self, path: &std::path::Path) -> Result { + Ok(self.inner.lock()?.registry.contains(&quick_hash(path))) + } + + /// Gets all compilation tasks. + pub fn get_tasks(&self) -> Result, AssembleError> { + Ok(self.inner.lock()?.tasks.clone()) + } + + /// Adds a new compilation task. + pub fn add_task(&self, task: Task) -> Result<(), AssembleError> { + self.inner.lock()?.tasks.push(task); + Ok(()) + } + + /// Adds a compiled module to the program. + pub fn add_module(&self, module: Module) -> Result<(), AssembleError> { + self.inner.lock()?.modules.push(module); + Ok(()) + } + + /// Gets all compiled modules. + pub fn get_modules(&self) -> Result, AssembleError> { + Ok(self.inner.lock()?.modules.clone()) + } + + /// Logs a message using the program's logger. + pub fn log(&self, message: &str) -> Result<(), AssembleError> { + self.inner.lock()?.logger.log(message); + Ok(()) + } + + /// Sets the main path for the program. + pub fn set_main_path(&self, path: PathBuf) -> Result<(), AssembleError> { + self.inner.lock()?.main_path = path; + Ok(()) + } + + /// Gets the main path for the program. + pub fn get_main_path(&self) -> Result { + Ok(self.inner.lock()?.main_path.clone()) + } +} + +impl Clone for Program { + fn clone(&self) -> Self { + Self { + hash_me: self.hash_me.clone(), + inner: Arc::clone(&self.inner), + } + } +} + +impl Default for Program { + fn default() -> Self { + Self::new() + } +} diff --git a/assembler/src/assembler/task.rs b/assembler/src/assembler/task.rs new file mode 100644 index 0000000..0ed501f --- /dev/null +++ b/assembler/src/assembler/task.rs @@ -0,0 +1,96 @@ +//! Threading utilities for parallel module compilation. + +use std::{ + path::PathBuf, + sync::Arc, + thread::{self, JoinHandle}, +}; + +use uuid::Uuid; + +use crate::assembler::{AssembleError, Module, Program, quick_hash}; + +/// Represents a threaded compilation task for a single module. +#[derive(Debug)] +pub struct Task { + id: Uuid, + module_handle: Arc>>, +} + +impl PartialEq for Task { + fn eq(&self, other: &Self) -> bool { + self.id == other.id + } +} + +impl Task { + /// Creates a new compilation task for the given module path. + pub fn new(path: PathBuf, program: Program) -> Result { + let handle = thread::spawn(move || { + let mut module = + Module::new(path.clone(), quick_hash(&path), Vec::new(), program.clone()); + + // Execute the compilation pipeline + match module.compile() { + Ok(()) => Ok(module), + Err(e) => { + eprintln!( + "Error building program at path `{}`: {}", + path.display(), + e + ); + Err(e) + } + } + }); + + Ok(Self { + module_handle: Arc::new(handle), + id: Uuid::new_v4(), + }) + } + + /// Creates a task from an existing join handle (for compatibility). + pub fn from_handle(handle: JoinHandle>) -> Self { + Self { + module_handle: Arc::new(handle), + id: Uuid::new_v4(), + } + } + + /// Waits for the compilation task to complete and returns the compiled module. + pub fn join(self) -> Result { + let Some(join_handle) = Arc::try_unwrap(self.module_handle).ok() else { + let err_msg = String::from( + "Cannot take ownership of reference counted task join_handle, multiple references exist.", + ); + eprintln!("{err_msg}"); + return Err(AssembleError::Threading(err_msg)); + }; + + match join_handle.join() { + Ok(result) => result, + Err(panic_payload) => { + let err_msg = format!( + "Task thread panicked: {:?}", + panic_payload + .downcast_ref::() + .map(|s| s.as_str()) + .or_else(|| panic_payload.downcast_ref::<&str>().copied()) + .unwrap_or("Unknown panic") + ); + eprintln!("{err_msg}"); + Err(AssembleError::Threading(err_msg)) + } + } + } +} + +impl Clone for Task { + fn clone(&self) -> Self { + Self { + id: self.id.clone(), + module_handle: Arc::clone(&self.module_handle), + } + } +} diff --git a/assembler/src/assembler/util.rs b/assembler/src/assembler/util.rs new file mode 100644 index 0000000..9171dda --- /dev/null +++ b/assembler/src/assembler/util.rs @@ -0,0 +1,23 @@ +//! Utility functions for the assembler. + +use std::{ + hash::{DefaultHasher, Hash, Hasher}, + path::Path, +}; + +/// Quick hash function for file paths. +pub fn quick_hash(value: &Path) -> u64 { + let mut hasher = DefaultHasher::new(); + value + .canonicalize() + .expect("Failed to canonicalize path for quick_hash") + .to_str() + .hash(&mut hasher); + + hasher.finish() +} + +/// TODO: Use an actual logging or tracing library for pretty (scoped) output. +pub fn log(message: &str) { + println!("\x1b[32mINFO:\x1b[0m {message}"); +} diff --git a/assembler/src/util/logging.rs b/assembler/src/util/logging.rs index 93c37e1..4b72123 100644 --- a/assembler/src/util/logging.rs +++ b/assembler/src/util/logging.rs @@ -2,6 +2,7 @@ #![allow(unused)] use std::{fmt, sync::mpsc::Sender}; +#[derive(Debug, PartialEq)] pub struct Logger {} impl Logger { diff --git a/common/src/instructions.rs b/common/src/instructions.rs index 381e739..deeaddb 100644 --- a/common/src/instructions.rs +++ b/common/src/instructions.rs @@ -38,7 +38,7 @@ pub enum InstructionType { Immediate, } -#[derive(Copy, Clone, Debug, PartialEq, Eq)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] #[non_exhaustive] pub enum Register { // general purpose registers