assembler: use smart pointer for modules since sourceinfo gets copy

This commit is contained in:
2025-06-25 17:03:48 +01:00
parent fb84a6d3c3
commit 27267e3daa
2 changed files with 52 additions and 26 deletions
+5 -5
View File
@@ -1,13 +1,13 @@
//! This module contains the code for the module registry. This is a singleton storing all
//! the modules being assembled.
use std::collections::HashMap;
use std::{collections::HashMap, sync::Arc};
use super::module::{Module, ModuleId};
/// Stores all the [`Module`]'s to be assembled.
pub struct ModuleRegistry {
modules: HashMap<ModuleId, Module>,
modules: HashMap<ModuleId, Arc<Module>>,
}
impl Default for ModuleRegistry {
@@ -26,19 +26,19 @@ impl ModuleRegistry {
/// Gets a [`Module`] by ID.
#[must_use]
pub fn get(&self, module_id: &ModuleId) -> Option<&Module> {
pub fn get(&self, module_id: &ModuleId) -> Option<&Arc<Module>> {
self.modules.get(module_id)
}
/// Adds a [`Module`] and returns its [`ModuleId`].
pub fn add(&mut self, module: Module) -> ModuleId {
pub fn add(&mut self, module: Arc<Module>) -> ModuleId {
let id = module.id;
self.modules.insert(id, module);
id
}
/// Returns an iterator of modules.
pub fn modules(&self) -> impl Iterator<Item = &Module> {
pub fn modules(&self) -> impl Iterator<Item = &Arc<Module>> {
self.modules.values()
}
}
+47 -21
View File
@@ -1,7 +1,10 @@
//! This file contains the [`Tokeniser`], which consumes a [`Vec`] of input bytes and
//! outputs a [`Vec<Token>`].
use std::path::{Path, PathBuf};
use std::{
path::{Path, PathBuf},
sync::Arc,
};
use regex::Regex;
@@ -10,7 +13,10 @@ use crate::{
error::{AssembleError, AssembleErrorKind, IoError, IoErrorKind},
model::module::Module,
source::{
lines::lines_with_spans, load_source_bytes, token::Token,
lines::lines_with_spans,
load_source_bytes,
source_info::SourceInfo,
token::{Token, TokenType},
},
};
@@ -69,37 +75,57 @@ impl Tokeniser {
// Note that modules are tokenised in their own threads, possibly in parallel.
pub fn tokenise(self, ctx: &AssemblerContext) -> Result<Vec<Token>, AssembleError> {
let lines = lines_with_spans(&self.data);
let Some(module_name) = self.path.file_name().and_then(|f| f.to_str()) else {
return Err(AssembleError::new_other_error(AssembleErrorKind::Io(
IoError::new(
IoErrorKind::InvalidData,
Some(
"filename couldn't be extracted, is it valid UTF-8?".to_string(),
),
),
)));
};
let module_name = self.extract_module_name()?;
let _file_path = self.path.to_string_lossy().to_string();
// Create a module for the source file being processed.
let module = Module::new(module_name.to_string(), &self.path);
let module = Arc::new(Module::new(module_name, &self.path));
{
let mut module_registry = ctx.module_registry.write()?;
module_registry.add(module);
module_registry.add(module.clone());
}
let mut token_stream = Vec::new();
let lines = lines_with_spans(&self.data);
// Technically ignores newlines since line will be trimmed. We just append a
// Newline token for each line.
for line_result in lines {
let line = line_result?;
let line_span = line_result?;
eprintln!("{}", line.line_number);
// Skip empty lines and comments
let trimmed = line_span.content.trim();
// Add newline token on blank lines.
if trimmed.is_empty() {
token_stream.push(Token::new(
TokenType::Newline,
SourceInfo::new(line_span.line_number, module.clone(), 0..1),
));
continue;
}
eprintln!("{}", line_span.line_number);
}
Err(AssembleError::new_other_error(
AssembleErrorKind::Unimplemented("tokeniser not written yet!"),
))
Ok(token_stream)
}
fn extract_module_name(&self) -> Result<String, AssembleError> {
let module_name = self
.path
.file_name()
.and_then(|f| Some(f.to_string_lossy().to_string()))
.ok_or_else(|| {
AssembleError::new_other_error(AssembleErrorKind::Io(IoError::new(
IoErrorKind::InvalidData,
Some(
"filename couldn't be extracted, is it valid UTF-8?".to_string(),
),
)))
})?;
Ok(module_name)
}
}