From 27267e3daa09cacaf017bfa34ca7e0b1e6989b0b Mon Sep 17 00:00:00 2001 From: "J. Hinchliffe" Date: Wed, 25 Jun 2025 17:03:48 +0100 Subject: [PATCH] assembler: use smart pointer for modules since sourceinfo gets copy --- assembler/src/model/module_registry.rs | 10 ++-- assembler/src/source/tokeniser.rs | 68 ++++++++++++++++++-------- 2 files changed, 52 insertions(+), 26 deletions(-) diff --git a/assembler/src/model/module_registry.rs b/assembler/src/model/module_registry.rs index 72ed50a..21e4094 100644 --- a/assembler/src/model/module_registry.rs +++ b/assembler/src/model/module_registry.rs @@ -1,13 +1,13 @@ //! This module contains the code for the module registry. This is a singleton storing all //! the modules being assembled. -use std::collections::HashMap; +use std::{collections::HashMap, sync::Arc}; use super::module::{Module, ModuleId}; /// Stores all the [`Module`]'s to be assembled. pub struct ModuleRegistry { - modules: HashMap, + modules: HashMap>, } impl Default for ModuleRegistry { @@ -26,19 +26,19 @@ impl ModuleRegistry { /// Gets a [`Module`] by ID. #[must_use] - pub fn get(&self, module_id: &ModuleId) -> Option<&Module> { + pub fn get(&self, module_id: &ModuleId) -> Option<&Arc> { self.modules.get(module_id) } /// Adds a [`Module`] and returns its [`ModuleId`]. - pub fn add(&mut self, module: Module) -> ModuleId { + pub fn add(&mut self, module: Arc) -> ModuleId { let id = module.id; self.modules.insert(id, module); id } /// Returns an iterator of modules. - pub fn modules(&self) -> impl Iterator { + pub fn modules(&self) -> impl Iterator> { self.modules.values() } } diff --git a/assembler/src/source/tokeniser.rs b/assembler/src/source/tokeniser.rs index 1f3c588..b996c88 100644 --- a/assembler/src/source/tokeniser.rs +++ b/assembler/src/source/tokeniser.rs @@ -1,7 +1,10 @@ //! This file contains the [`Tokeniser`], which consumes a [`Vec`] of input bytes and //! outputs a [`Vec`]. -use std::path::{Path, PathBuf}; +use std::{ + path::{Path, PathBuf}, + sync::Arc, +}; use regex::Regex; @@ -10,7 +13,10 @@ use crate::{ error::{AssembleError, AssembleErrorKind, IoError, IoErrorKind}, model::module::Module, source::{ - lines::lines_with_spans, load_source_bytes, token::Token, + lines::lines_with_spans, + load_source_bytes, + source_info::SourceInfo, + token::{Token, TokenType}, }, }; @@ -69,37 +75,57 @@ impl Tokeniser { // Note that modules are tokenised in their own threads, possibly in parallel. pub fn tokenise(self, ctx: &AssemblerContext) -> Result, AssembleError> { - let lines = lines_with_spans(&self.data); - - let Some(module_name) = self.path.file_name().and_then(|f| f.to_str()) else { - return Err(AssembleError::new_other_error(AssembleErrorKind::Io( - IoError::new( - IoErrorKind::InvalidData, - Some( - "filename couldn't be extracted, is it valid UTF-8?".to_string(), - ), - ), - ))); - }; + let module_name = self.extract_module_name()?; + let _file_path = self.path.to_string_lossy().to_string(); // Create a module for the source file being processed. - let module = Module::new(module_name.to_string(), &self.path); + let module = Arc::new(Module::new(module_name, &self.path)); { let mut module_registry = ctx.module_registry.write()?; - module_registry.add(module); + module_registry.add(module.clone()); } + let mut token_stream = Vec::new(); + let lines = lines_with_spans(&self.data); + // Technically ignores newlines since line will be trimmed. We just append a // Newline token for each line. for line_result in lines { - let line = line_result?; + let line_span = line_result?; - eprintln!("{}", line.line_number); + // Skip empty lines and comments + let trimmed = line_span.content.trim(); + + // Add newline token on blank lines. + if trimmed.is_empty() { + token_stream.push(Token::new( + TokenType::Newline, + SourceInfo::new(line_span.line_number, module.clone(), 0..1), + )); + continue; + } + + eprintln!("{}", line_span.line_number); } - Err(AssembleError::new_other_error( - AssembleErrorKind::Unimplemented("tokeniser not written yet!"), - )) + Ok(token_stream) + } + + fn extract_module_name(&self) -> Result { + let module_name = self + .path + .file_name() + .and_then(|f| Some(f.to_string_lossy().to_string())) + .ok_or_else(|| { + AssembleError::new_other_error(AssembleErrorKind::Io(IoError::new( + IoErrorKind::InvalidData, + Some( + "filename couldn't be extracted, is it valid UTF-8?".to_string(), + ), + ))) + })?; + + Ok(module_name) } }