From 7565374d5b82fd968660c913c3b743ad5f91eb97 Mon Sep 17 00:00:00 2001 From: "J. Hinchliffe" Date: Wed, 25 Jun 2025 17:55:34 +0100 Subject: [PATCH] assembler: Tokeniser updates, Compiler Engine is back finally --- assembler/src/compiler_engine.rs | 375 +++++++++++++++++++++++++++++ assembler/src/lib.rs | 1 + assembler/src/source/token.rs | 2 + assembler/src/source/tokeniser.rs | 14 ++ assembler/src/util/mod.rs | 2 +- emulator/src/emulator/ui/editor.rs | 1 + 6 files changed, 394 insertions(+), 1 deletion(-) create mode 100644 assembler/src/compiler_engine.rs diff --git a/assembler/src/compiler_engine.rs b/assembler/src/compiler_engine.rs new file mode 100644 index 0000000..4728136 --- /dev/null +++ b/assembler/src/compiler_engine.rs @@ -0,0 +1,375 @@ +//! Simple compiler engine that orchestrates the entire compilation process. + +use std::collections::{HashMap, HashSet}; +use std::fmt; +use std::path::Path; +use std::sync::mpsc; +use std::thread; + +use crate::{ + context::AssemblerContext, + error::AssembleError, + model::module::ModuleId, + source::{token::Token, tokeniser::Tokeniser}, +}; + +use common::instructions::Instruction; + +/// Error type for the `CompilerEngine` +#[derive(Debug)] +pub enum EngineError { + /// Assembly error during compilation + Assembly(AssembleError), + /// Channel communication error + Channel(String), + /// Other generic error + Other(String), +} + +impl fmt::Display for EngineError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Assembly(e) => write!(f, "Assembly error: {e}"), + Self::Channel(msg) => write!(f, "Channel error: {msg}"), + Self::Other(msg) => write!(f, "Engine error: {msg}"), + } + } +} + +impl std::error::Error for EngineError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + Self::Assembly(e) => Some(e), + Self::Channel(_) | Self::Other(_) => None, + } + } +} + +// Convert from AssembleError +impl From for EngineError { + fn from(error: AssembleError) -> Self { + Self::Assembly(error) + } +} + +// Convert from mpsc::SendError +impl From> for EngineError { + fn from(error: mpsc::SendError) -> Self { + Self::Channel(format!("Send error: {error}")) + } +} + +// Convert from mpsc::RecvError +impl From for EngineError { + fn from(error: mpsc::RecvError) -> Self { + Self::Channel(format!("Receive error: {error}")) + } +} + +// Convert from mpsc::TryRecvError +impl From for EngineError { + fn from(error: mpsc::TryRecvError) -> Self { + Self::Channel(format!("Try receive error: {error}")) + } +} + +// Convert from String for generic errors +impl From for EngineError { + fn from(error: String) -> Self { + Self::Other(error) + } +} + +// Convert from &str for convenience +impl From<&str> for EngineError { + fn from(error: &str) -> Self { + Self::Other(error.to_string()) + } +} + +/// Simple compiler engine that orchestrates the entire compilation process. +pub struct CompilerEngine { + result_tx: mpsc::Sender, EngineError>>, + result_rx: Option, EngineError>>>, + is_running: bool, +} + +impl CompilerEngine { + /// Create a new compiler engine + #[must_use] + pub fn new() -> Self { + let (tx, rx) = mpsc::channel(); + Self { + result_tx: tx, + result_rx: Some(rx), + is_running: false, + } + } + + /// Start the compilation process in a separate thread + pub fn start_compilation>(&mut self, src: P) { + if self.is_running { + return; + } + + let src = src.as_ref().to_path_buf(); + let tx = self.result_tx.clone(); + + thread::spawn(move || { + let result = assemble(&src).map_err(EngineError::from); + let _ = tx.send(result); // Ignore send errors if receiver is dropped + }); + + self.is_running = true; + } + + /// Check if compilation is complete and get the result + pub fn try_get_result(&mut self) -> Option, EngineError>> { + if !self.is_running { + return None; + } + + match self + .result_rx + .as_ref() + .expect("result_rx should be Some while compilation is running") + .try_recv() + { + Ok(result) => { + self.is_running = false; + Some(result) + } + Err(mpsc::TryRecvError::Empty) => None, + Err(mpsc::TryRecvError::Disconnected) => { + self.is_running = false; + Some(Err(EngineError::Channel( + "Compilation thread disconnected".to_string(), + ))) + } + } + } + + /// Block until compilation is complete and return the result + pub fn wait_for_result(&mut self) -> Result, EngineError> { + if !self.is_running { + return Err(EngineError::Other("No compilation in progress".to_string())); + } + + let result = self + .result_rx + .take() + .expect("result_rx should be Some while waiting for compilation result") + .recv() + .map_err(EngineError::from)?; + + self.is_running = false; + result + } + + /// Add a source file to be compiled (for compatibility with old interface) + pub fn add_source_file>( + &mut self, + path: P, + ) -> Result<(), EngineError> { + let path = path.as_ref().to_path_buf(); + + // Verify file exists + if !path.exists() { + return Err(EngineError::Assembly(AssembleError::new_other_error( + crate::error::AssembleErrorKind::Io(crate::error::IoError::new( + crate::error::IoErrorKind::NotFound, + Some(format!("Source file not found: {}", path.display())), + )), + ))); + } + + // For now, just validate the file exists + // TODO: Could store multiple files for batch compilation + Ok(()) + } + + /// Compile all added source files (synchronous version) + pub fn compile(&mut self) -> Result { + // This is a placeholder that matches the old interface + // For now, return empty result since we don't have a specific file to compile + Ok(CompileResult { + modules: Vec::new(), + tokens: HashMap::new(), + }) + } + + /// Get access to the assembler context (placeholder) + pub fn context(&self) -> Result<&AssemblerContext, EngineError> { + // For now, return an error since we're using the threaded approach + // TODO: Integrate context properly when we have more compilation phases + Err(EngineError::Other( + "Context not available in threaded mode".to_string(), + )) + } +} + +impl Default for CompilerEngine { + fn default() -> Self { + Self::new() + } +} + +/// Main assembly function that orchestrates the compilation process +fn assemble(src: &Path) -> Result, AssembleError> { + // Verify the file exists + if !src.exists() { + return Err(AssembleError::new_other_error( + crate::error::AssembleErrorKind::Io(crate::error::IoError::new( + crate::error::IoErrorKind::NotFound, + Some(format!("Source file not found: {}", src.display())), + )), + )); + } + + let mut modules = HashSet::new(); + let mut all_tokens = HashMap::new(); + let mut module_ids = Vec::new(); + + // Create a new assembler context for this compilation + let context = AssemblerContext::new(); + + // Process the main file and its dependencies + prepare_dependency( + src, + &mut modules, + &mut all_tokens, + &mut module_ids, + &context, + )?; + + // Phase 2: Parse tokens into AST (placeholder for now) + // TODO: Add parser here when implemented + println!("Phase 2: Parsing {} modules...", module_ids.len()); + + // Phase 3: Symbol resolution (placeholder for now) + // TODO: Add symbol resolution here when implemented + println!("Phase 3: Resolving symbols..."); + + // Phase 4: Code generation (placeholder for now) + // TODO: Add code generation here when implemented + println!("Phase 4: Generating code..."); + + // For now, return empty instructions since we don't have the full pipeline yet + Ok(Vec::new()) +} + +/// Prepare a dependency (file) for compilation +fn prepare_dependency( + path: &Path, + modules: &mut HashSet, + all_tokens: &mut HashMap>, + module_ids: &mut Vec, + context: &AssemblerContext, +) -> Result<(), AssembleError> { + let filename = path.file_name().and_then(|n| n.to_str()).ok_or_else(|| { + AssembleError::new_other_error(crate::error::AssembleErrorKind::Io( + crate::error::IoError::new( + crate::error::IoErrorKind::InvalidData, + Some("Failed to get file name from path".to_string()), + ), + )) + })?; + + // Calculate a simple hash for the file (similar to quick_hash) + let file_hash = calculate_file_hash(path); + + // Skip if we've already processed this module + if modules.contains(&file_hash) { + return Ok(()); + } + modules.insert(file_hash); + + if let Ok(canonical_path) = path.canonicalize() { + println!("Building {} [{}]", filename, canonical_path.display()); + } + + // Phase 1: Tokenize the file + println!("Tokenising {filename}"); + let tokeniser = Tokeniser::new(path)?; + let tokens = tokeniser.tokenise(context)?; + + // Get the module ID that was registered during tokenization + let module_id = get_module_id_for_file(path, context)?; + + all_tokens.insert(module_id, tokens); + module_ids.push(module_id); + + // TODO: Parse tokens to find dependencies (.include directives, etc.) + // For now, we'll just process the single file + println!("Resolving dependencies for {filename}"); + + Ok(()) +} + +/// Calculate a simple hash for a file path (similar to the old `quick_hash`) +fn calculate_file_hash(path: &Path) -> u64 { + use std::collections::hash_map::DefaultHasher; + use std::hash::{Hash, Hasher}; + + let mut hasher = DefaultHasher::new(); + if let Ok(canonical) = path.canonicalize() { + canonical.hash(&mut hasher); + } else { + path.hash(&mut hasher); + } + hasher.finish() +} + +/// Get the module ID for a given source file +fn get_module_id_for_file( + file_path: &Path, + context: &AssemblerContext, +) -> Result { + { + let registry = context.module_registry.read()?; + + // Find module by path + for module in registry.modules() { + if module.path == file_path { + return Ok(module.id); + } + } + } + + Err(AssembleError::new_other_error( + crate::error::AssembleErrorKind::Io(crate::error::IoError::new( + crate::error::IoErrorKind::NotFound, + Some(format!( + "Module not found for file: {}", + file_path.display() + )), + )), + )) +} + +/// Result of compilation. This is useless at present but compiles. +#[derive(Debug)] +pub struct CompileResult { + pub modules: Vec, + pub tokens: HashMap>, +} + +impl CompileResult { + /// Get tokens for a specific module + #[must_use] + pub fn get_tokens(&self, module_id: &ModuleId) -> Option<&Vec> { + self.tokens.get(module_id) + } + + /// Get all module IDs + #[must_use] + pub fn module_ids(&self) -> &[ModuleId] { + &self.modules + } + + /// Get total number of tokens across all modules + #[must_use] + pub fn total_tokens(&self) -> usize { + self.tokens.values().map(std::vec::Vec::len).sum() + } +} diff --git a/assembler/src/lib.rs b/assembler/src/lib.rs index 22be5b9..87ed68f 100644 --- a/assembler/src/lib.rs +++ b/assembler/src/lib.rs @@ -15,6 +15,7 @@ pub mod args; pub mod image_builder; // pub mod tooling; +pub mod compiler_engine; pub mod context; pub mod error; pub mod model; diff --git a/assembler/src/source/token.rs b/assembler/src/source/token.rs index 4314128..f327295 100644 --- a/assembler/src/source/token.rs +++ b/assembler/src/source/token.rs @@ -31,6 +31,8 @@ pub enum TokenType { Newline, /// End of file. Eof, + /// A line comment. This is to be filtered out of the token stream. + Comment, } #[derive(Debug)] diff --git a/assembler/src/source/tokeniser.rs b/assembler/src/source/tokeniser.rs index b68c0d8..a7e34d8 100644 --- a/assembler/src/source/tokeniser.rs +++ b/assembler/src/source/tokeniser.rs @@ -40,6 +40,7 @@ pub struct Tokeniser { instruction_regex: Regex, symbol_regex: Regex, string_regex: Regex, + comment_regex: Regex, } impl Tokeniser { @@ -65,6 +66,8 @@ impl Tokeniser { .expect("Failed to compile symbol regex pattern"), string_regex: Regex::new(r#"^"([^"]*)"#) .expect("Failed to compile string regex pattern"), + comment_regex: Regex::new("//.*") + .expect("Failed to compile comment regex pattern"), } } @@ -155,6 +158,13 @@ impl Tokeniser { Ok(tokens) } + fn try_match_comment(&self, input: &str) -> Option<(TokenType, usize)> { + let caps = self.comment_regex.captures(input)?; + let len = caps.get(0)?.len(); + + Some((TokenType::Comment, len)) + } + fn try_match_label(&self, input: &str) -> Option<(TokenType, usize)> { let caps = self.label_regex.captures(input)?; let name = caps.get(1)?.as_str().to_string(); @@ -222,6 +232,10 @@ impl Tokeniser { } fn match_token(&self, input: &str) -> Result<(TokenType, usize), AssembleError> { + if let Some(m) = self.try_match_comment(input) { + return Ok(m); + } + if let Some(m) = self.try_match_label(input) { return Ok(m); } diff --git a/assembler/src/util/mod.rs b/assembler/src/util/mod.rs index c8746e4..e3323ec 100644 --- a/assembler/src/util/mod.rs +++ b/assembler/src/util/mod.rs @@ -2,7 +2,7 @@ pub mod logging; use std::io::Write; -pub fn input(prompt: &str) -> String { +pub fn _input(prompt: &str) -> String { print!("{prompt}\n > "); std::io::stdout().flush().expect("Failed to flush stdout"); let mut input = String::new(); diff --git a/emulator/src/emulator/ui/editor.rs b/emulator/src/emulator/ui/editor.rs index 7e70d88..53659bb 100644 --- a/emulator/src/emulator/ui/editor.rs +++ b/emulator/src/emulator/ui/editor.rs @@ -5,6 +5,7 @@ use std::{ path::{Path, PathBuf}, }; +use assembler::compiler_engine::CompilerEngine; use common::prelude::Instruction; use egui::{Align, Context, Key, Layout, Ui};