assembler: Tokeniser updates, Compiler Engine is back finally

This commit is contained in:
2025-06-25 17:55:34 +01:00
parent 9b9e153500
commit 7565374d5b
6 changed files with 394 additions and 1 deletions
+375
View File
@@ -0,0 +1,375 @@
//! Simple compiler engine that orchestrates the entire compilation process.
use std::collections::{HashMap, HashSet};
use std::fmt;
use std::path::Path;
use std::sync::mpsc;
use std::thread;
use crate::{
context::AssemblerContext,
error::AssembleError,
model::module::ModuleId,
source::{token::Token, tokeniser::Tokeniser},
};
use common::instructions::Instruction;
/// Error type for the `CompilerEngine`
#[derive(Debug)]
pub enum EngineError {
/// Assembly error during compilation
Assembly(AssembleError),
/// Channel communication error
Channel(String),
/// Other generic error
Other(String),
}
impl fmt::Display for EngineError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Assembly(e) => write!(f, "Assembly error: {e}"),
Self::Channel(msg) => write!(f, "Channel error: {msg}"),
Self::Other(msg) => write!(f, "Engine error: {msg}"),
}
}
}
impl std::error::Error for EngineError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
Self::Assembly(e) => Some(e),
Self::Channel(_) | Self::Other(_) => None,
}
}
}
// Convert from AssembleError
impl From<AssembleError> for EngineError {
fn from(error: AssembleError) -> Self {
Self::Assembly(error)
}
}
// Convert from mpsc::SendError
impl<T> From<mpsc::SendError<T>> for EngineError {
fn from(error: mpsc::SendError<T>) -> Self {
Self::Channel(format!("Send error: {error}"))
}
}
// Convert from mpsc::RecvError
impl From<mpsc::RecvError> for EngineError {
fn from(error: mpsc::RecvError) -> Self {
Self::Channel(format!("Receive error: {error}"))
}
}
// Convert from mpsc::TryRecvError
impl From<mpsc::TryRecvError> for EngineError {
fn from(error: mpsc::TryRecvError) -> Self {
Self::Channel(format!("Try receive error: {error}"))
}
}
// Convert from String for generic errors
impl From<String> for EngineError {
fn from(error: String) -> Self {
Self::Other(error)
}
}
// Convert from &str for convenience
impl From<&str> for EngineError {
fn from(error: &str) -> Self {
Self::Other(error.to_string())
}
}
/// Simple compiler engine that orchestrates the entire compilation process.
pub struct CompilerEngine {
result_tx: mpsc::Sender<Result<Vec<Instruction>, EngineError>>,
result_rx: Option<mpsc::Receiver<Result<Vec<Instruction>, EngineError>>>,
is_running: bool,
}
impl CompilerEngine {
/// Create a new compiler engine
#[must_use]
pub fn new() -> Self {
let (tx, rx) = mpsc::channel();
Self {
result_tx: tx,
result_rx: Some(rx),
is_running: false,
}
}
/// Start the compilation process in a separate thread
pub fn start_compilation<P: AsRef<Path>>(&mut self, src: P) {
if self.is_running {
return;
}
let src = src.as_ref().to_path_buf();
let tx = self.result_tx.clone();
thread::spawn(move || {
let result = assemble(&src).map_err(EngineError::from);
let _ = tx.send(result); // Ignore send errors if receiver is dropped
});
self.is_running = true;
}
/// Check if compilation is complete and get the result
pub fn try_get_result(&mut self) -> Option<Result<Vec<Instruction>, EngineError>> {
if !self.is_running {
return None;
}
match self
.result_rx
.as_ref()
.expect("result_rx should be Some while compilation is running")
.try_recv()
{
Ok(result) => {
self.is_running = false;
Some(result)
}
Err(mpsc::TryRecvError::Empty) => None,
Err(mpsc::TryRecvError::Disconnected) => {
self.is_running = false;
Some(Err(EngineError::Channel(
"Compilation thread disconnected".to_string(),
)))
}
}
}
/// Block until compilation is complete and return the result
pub fn wait_for_result(&mut self) -> Result<Vec<Instruction>, EngineError> {
if !self.is_running {
return Err(EngineError::Other("No compilation in progress".to_string()));
}
let result = self
.result_rx
.take()
.expect("result_rx should be Some while waiting for compilation result")
.recv()
.map_err(EngineError::from)?;
self.is_running = false;
result
}
/// Add a source file to be compiled (for compatibility with old interface)
pub fn add_source_file<P: AsRef<Path>>(
&mut self,
path: P,
) -> Result<(), EngineError> {
let path = path.as_ref().to_path_buf();
// Verify file exists
if !path.exists() {
return Err(EngineError::Assembly(AssembleError::new_other_error(
crate::error::AssembleErrorKind::Io(crate::error::IoError::new(
crate::error::IoErrorKind::NotFound,
Some(format!("Source file not found: {}", path.display())),
)),
)));
}
// For now, just validate the file exists
// TODO: Could store multiple files for batch compilation
Ok(())
}
/// Compile all added source files (synchronous version)
pub fn compile(&mut self) -> Result<CompileResult, EngineError> {
// This is a placeholder that matches the old interface
// For now, return empty result since we don't have a specific file to compile
Ok(CompileResult {
modules: Vec::new(),
tokens: HashMap::new(),
})
}
/// Get access to the assembler context (placeholder)
pub fn context(&self) -> Result<&AssemblerContext, EngineError> {
// For now, return an error since we're using the threaded approach
// TODO: Integrate context properly when we have more compilation phases
Err(EngineError::Other(
"Context not available in threaded mode".to_string(),
))
}
}
impl Default for CompilerEngine {
fn default() -> Self {
Self::new()
}
}
/// Main assembly function that orchestrates the compilation process
fn assemble(src: &Path) -> Result<Vec<Instruction>, AssembleError> {
// Verify the file exists
if !src.exists() {
return Err(AssembleError::new_other_error(
crate::error::AssembleErrorKind::Io(crate::error::IoError::new(
crate::error::IoErrorKind::NotFound,
Some(format!("Source file not found: {}", src.display())),
)),
));
}
let mut modules = HashSet::new();
let mut all_tokens = HashMap::new();
let mut module_ids = Vec::new();
// Create a new assembler context for this compilation
let context = AssemblerContext::new();
// Process the main file and its dependencies
prepare_dependency(
src,
&mut modules,
&mut all_tokens,
&mut module_ids,
&context,
)?;
// Phase 2: Parse tokens into AST (placeholder for now)
// TODO: Add parser here when implemented
println!("Phase 2: Parsing {} modules...", module_ids.len());
// Phase 3: Symbol resolution (placeholder for now)
// TODO: Add symbol resolution here when implemented
println!("Phase 3: Resolving symbols...");
// Phase 4: Code generation (placeholder for now)
// TODO: Add code generation here when implemented
println!("Phase 4: Generating code...");
// For now, return empty instructions since we don't have the full pipeline yet
Ok(Vec::new())
}
/// Prepare a dependency (file) for compilation
fn prepare_dependency(
path: &Path,
modules: &mut HashSet<u64>,
all_tokens: &mut HashMap<ModuleId, Vec<Token>>,
module_ids: &mut Vec<ModuleId>,
context: &AssemblerContext,
) -> Result<(), AssembleError> {
let filename = path.file_name().and_then(|n| n.to_str()).ok_or_else(|| {
AssembleError::new_other_error(crate::error::AssembleErrorKind::Io(
crate::error::IoError::new(
crate::error::IoErrorKind::InvalidData,
Some("Failed to get file name from path".to_string()),
),
))
})?;
// Calculate a simple hash for the file (similar to quick_hash)
let file_hash = calculate_file_hash(path);
// Skip if we've already processed this module
if modules.contains(&file_hash) {
return Ok(());
}
modules.insert(file_hash);
if let Ok(canonical_path) = path.canonicalize() {
println!("Building {} [{}]", filename, canonical_path.display());
}
// Phase 1: Tokenize the file
println!("Tokenising {filename}");
let tokeniser = Tokeniser::new(path)?;
let tokens = tokeniser.tokenise(context)?;
// Get the module ID that was registered during tokenization
let module_id = get_module_id_for_file(path, context)?;
all_tokens.insert(module_id, tokens);
module_ids.push(module_id);
// TODO: Parse tokens to find dependencies (.include directives, etc.)
// For now, we'll just process the single file
println!("Resolving dependencies for {filename}");
Ok(())
}
/// Calculate a simple hash for a file path (similar to the old `quick_hash`)
fn calculate_file_hash(path: &Path) -> u64 {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
if let Ok(canonical) = path.canonicalize() {
canonical.hash(&mut hasher);
} else {
path.hash(&mut hasher);
}
hasher.finish()
}
/// Get the module ID for a given source file
fn get_module_id_for_file(
file_path: &Path,
context: &AssemblerContext,
) -> Result<ModuleId, AssembleError> {
{
let registry = context.module_registry.read()?;
// Find module by path
for module in registry.modules() {
if module.path == file_path {
return Ok(module.id);
}
}
}
Err(AssembleError::new_other_error(
crate::error::AssembleErrorKind::Io(crate::error::IoError::new(
crate::error::IoErrorKind::NotFound,
Some(format!(
"Module not found for file: {}",
file_path.display()
)),
)),
))
}
/// Result of compilation. This is useless at present but compiles.
#[derive(Debug)]
pub struct CompileResult {
pub modules: Vec<ModuleId>,
pub tokens: HashMap<ModuleId, Vec<Token>>,
}
impl CompileResult {
/// Get tokens for a specific module
#[must_use]
pub fn get_tokens(&self, module_id: &ModuleId) -> Option<&Vec<Token>> {
self.tokens.get(module_id)
}
/// Get all module IDs
#[must_use]
pub fn module_ids(&self) -> &[ModuleId] {
&self.modules
}
/// Get total number of tokens across all modules
#[must_use]
pub fn total_tokens(&self) -> usize {
self.tokens.values().map(std::vec::Vec::len).sum()
}
}
+1
View File
@@ -15,6 +15,7 @@
pub mod args;
pub mod image_builder;
// pub mod tooling;
pub mod compiler_engine;
pub mod context;
pub mod error;
pub mod model;
+2
View File
@@ -31,6 +31,8 @@ pub enum TokenType {
Newline,
/// End of file.
Eof,
/// A line comment. This is to be filtered out of the token stream.
Comment,
}
#[derive(Debug)]
+14
View File
@@ -40,6 +40,7 @@ pub struct Tokeniser {
instruction_regex: Regex,
symbol_regex: Regex,
string_regex: Regex,
comment_regex: Regex,
}
impl Tokeniser {
@@ -65,6 +66,8 @@ impl Tokeniser {
.expect("Failed to compile symbol regex pattern"),
string_regex: Regex::new(r#"^"([^"]*)"#)
.expect("Failed to compile string regex pattern"),
comment_regex: Regex::new("//.*")
.expect("Failed to compile comment regex pattern"),
}
}
@@ -155,6 +158,13 @@ impl Tokeniser {
Ok(tokens)
}
fn try_match_comment(&self, input: &str) -> Option<(TokenType, usize)> {
let caps = self.comment_regex.captures(input)?;
let len = caps.get(0)?.len();
Some((TokenType::Comment, len))
}
fn try_match_label(&self, input: &str) -> Option<(TokenType, usize)> {
let caps = self.label_regex.captures(input)?;
let name = caps.get(1)?.as_str().to_string();
@@ -222,6 +232,10 @@ impl Tokeniser {
}
fn match_token(&self, input: &str) -> Result<(TokenType, usize), AssembleError> {
if let Some(m) = self.try_match_comment(input) {
return Ok(m);
}
if let Some(m) = self.try_match_label(input) {
return Ok(m);
}
+1 -1
View File
@@ -2,7 +2,7 @@ pub mod logging;
use std::io::Write;
pub fn input(prompt: &str) -> String {
pub fn _input(prompt: &str) -> String {
print!("{prompt}\n > ");
std::io::stdout().flush().expect("Failed to flush stdout");
let mut input = String::new();
+1
View File
@@ -5,6 +5,7 @@ use std::{
path::{Path, PathBuf},
};
use assembler::compiler_engine::CompilerEngine;
use common::prelude::Instruction;
use egui::{Align, Context, Key, Layout, Ui};