assembler: clippy lints, better error formatting

Adds regex dependency and enhances error handling system

Introduces comprehensive error type hierarchy with specific variants for parser, symbol, codegen, threading, and IO errors to improve error reporting and debugging capabilities.

Adds regex crate for pattern matching in tokenizer implementation with pre-compiled patterns for labels, registers, immediates, directives, instructions, and symbols.

Enhances source info functionality with context printing and error underlining similar to compiler diagnostics.

Implements better error conversions and threading error handling for lock failures and panics.
This commit is contained in:
2025-06-25 16:50:17 +01:00
parent 4e5db58a84
commit fb84a6d3c3
12 changed files with 519 additions and 49 deletions
Generated
+39
View File
@@ -129,6 +129,15 @@ dependencies = [
"zerocopy",
]
[[package]]
name = "aho-corasick"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
dependencies = [
"memchr",
]
[[package]]
name = "android-activity"
version = "0.6.0"
@@ -269,6 +278,7 @@ dependencies = [
"clap",
"common",
"num_cpus",
"regex",
"threadpool",
"uuid",
]
@@ -2691,6 +2701,35 @@ dependencies = [
"thiserror 2.0.12",
]
[[package]]
name = "regex"
version = "1.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
[[package]]
name = "renderdoc-sys"
version = "1.1.0"
+1
View File
@@ -16,5 +16,6 @@ path = "src/lib.rs"
clap = { version = "4.5.40", features = ["derive"] }
common = { path = "../common" }
num_cpus = "1.17.0"
regex = "1.11.1"
threadpool = "1.8.1"
uuid = { version = "1.17.0", features = ["v4"] }
+155 -14
View File
@@ -51,26 +51,173 @@ impl Display for AssembleError {
/// Marker trait.
impl std::error::Error for AssembleError {}
/// Different types of errors that may occur when assembling a set of input source files.
#[derive(Debug, Clone)]
#[non_exhaustive]
#[derive(Debug)]
pub enum AssembleErrorKind {
/// Usually unexpected I/O errors. Not normally recoverable.
IO(std::io::Error),
Io(IoError),
/// Errors emitted from the [`Tokeniser`].
Tokenise(TokeniserError),
Tokeniser(TokeniserError),
Parser(ParserError),
Symbol(SymbolError),
Codegen(CodegenError),
Threading(ThreadingError),
/// Returned for code where the functionality has not yet been implemented but we
/// don't want the program to panic.
Unimplemented(String),
Unimplemented(&'static str),
}
#[derive(Debug, Clone)]
pub struct ParserError {
error_type: ParserErrorType,
source_info: SourceInfo,
}
#[derive(Debug, Clone)]
pub enum ParserErrorType {
UnexpectedToken,
MissingOperand,
InvalidInstruction,
MissingLabel,
DuplicateLabel,
}
impl Display for ParserErrorType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::UnexpectedToken => write!(f, "unexpected token"),
Self::MissingOperand => write!(f, "missing operand"),
Self::InvalidInstruction => write!(f, "invalid instruction"),
Self::MissingLabel => write!(f, "missing label"),
Self::DuplicateLabel => write!(f, "duplicate label"),
}
}
}
impl Display for ParserError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
// TODO: Print the path/to/filename.dsa:line_no, column col_no.
write!(
f,
"Parser error, {} at {}",
self.error_type, self.source_info
)?;
// Prints out the context for our error.
self.source_info
.print_context_with_underline()
.map_err(|e| {
_ = writeln!(f, "Print context error: {e}");
std::fmt::Error {}
})?;
Ok(())
}
}
#[derive(Debug, Clone)]
pub enum SymbolError {
Undefined,
Duplicate,
CircularDependency,
InvalidReference,
}
impl Display for SymbolError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Undefined => write!(f, "undefined symbol"),
Self::Duplicate => write!(f, "duplicate symbol"),
Self::CircularDependency => write!(f, "circular dependency"),
Self::InvalidReference => write!(f, "invalid reference"),
}
}
}
#[derive(Debug, Clone)]
pub enum CodegenError {
InvalidOperand,
OutOfRange,
UnsupportedInstruction,
}
impl Display for CodegenError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::InvalidOperand => write!(f, "invalid operand"),
Self::OutOfRange => write!(f, "out of range"),
Self::UnsupportedInstruction => write!(f, "unsupported instruction"),
}
}
}
#[derive(Debug, Clone)]
pub enum ThreadingError {
LockFailed,
ThreadPanic,
}
impl Display for ThreadingError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::LockFailed => write!(f, "lock failed"),
Self::ThreadPanic => write!(f, "thread panic"),
}
}
}
#[derive(Debug, Clone)]
pub struct IoError {
msg: Option<String>,
kind: IoErrorKind,
}
impl IoError {
#[must_use]
pub const fn new(kind: IoErrorKind, msg: Option<String>) -> Self {
Self { msg, kind }
}
}
#[derive(Debug, Clone)]
pub enum IoErrorKind {
NotFound,
PermissionDenied,
InvalidData,
Other,
}
impl std::fmt::Display for IoErrorKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::NotFound => write!(f, "file not found"),
Self::PermissionDenied => write!(f, "permission denied"),
Self::InvalidData => write!(f, "invalid data"),
Self::Other => write!(f, "other I/O error"),
}
}
}
impl std::fmt::Display for IoError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.kind)?;
if let Some(msg) = &self.msg {
write!(f, ", \"{msg}\"")?;
}
Ok(())
}
}
impl Display for AssembleErrorKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Tokenise(why) => write!(f, "tokeniser error: {why}"),
Self::Tokeniser(why) => write!(f, "tokeniser error: {why}"),
Self::Unimplemented(why) => write!(f, "used unimplemented feature: {why}"),
Self::IO(why) => write!(f, "problem occurred with I/O: {why}"),
#[expect(unreachable_patterns)]
Self::Io(why) => write!(f, "problem occurred with I/O: {why}"),
#[allow(unreachable_patterns)]
_ => write!(
f,
"unhandled error type in Display implementation! See error.rs!"
@@ -79,10 +226,4 @@ impl Display for AssembleErrorKind {
}
}
impl From<std::io::Error> for AssembleErrorKind {
fn from(err: std::io::Error) -> Self {
Self::IO(err)
}
}
pub mod conversions;
+62 -2
View File
@@ -1,7 +1,67 @@
use crate::error::AssembleError;
use std::{
io::ErrorKind,
sync::{PoisonError, RwLockReadGuard, RwLockWriteGuard},
};
use crate::error::{AssembleError, IoError, IoErrorKind};
use super::{AssembleErrorKind, ThreadingError};
impl From<std::io::Error> for IoError {
fn from(err: std::io::Error) -> Self {
let kind = match err.kind() {
ErrorKind::NotFound => IoErrorKind::NotFound,
ErrorKind::PermissionDenied => IoErrorKind::PermissionDenied,
ErrorKind::InvalidData => IoErrorKind::InvalidData,
_ => IoErrorKind::Other,
};
let msg = err.to_string();
Self::new(kind, Some(msg))
}
}
impl From<std::io::Error> for AssembleError {
fn from(err: std::io::Error) -> Self {
Self::new_other_error(err.into())
Self::new_other_error(AssembleErrorKind::Io(err.into()))
}
}
// TODO: Maybe attempt recovery? To be honest we don't want any threads to panic at all,
// or we want them all to panic spectacularly.
impl<T> From<PoisonError<RwLockReadGuard<'_, T>>> for AssembleError {
fn from(err: PoisonError<RwLockReadGuard<'_, T>>) -> Self {
Self::new_other_error(AssembleErrorKind::Threading(err.into()))
}
}
impl<T> From<PoisonError<RwLockReadGuard<'_, T>>> for ThreadingError {
fn from(_err: PoisonError<RwLockReadGuard<'_, T>>) -> Self {
Self::LockFailed
}
}
impl<T> From<PoisonError<RwLockWriteGuard<'_, T>>> for AssembleError {
fn from(err: PoisonError<RwLockWriteGuard<'_, T>>) -> Self {
Self::new_other_error(AssembleErrorKind::Threading(err.into()))
}
}
impl<T> From<PoisonError<RwLockWriteGuard<'_, T>>> for ThreadingError {
fn from(_err: PoisonError<RwLockWriteGuard<'_, T>>) -> Self {
Self::LockFailed
}
}
impl From<std::fmt::Error> for AssembleError {
fn from(err: std::fmt::Error) -> Self {
IoError::new(IoErrorKind::Other, Some(err.to_string())).into()
}
}
impl From<IoError> for AssembleError {
fn from(err: IoError) -> Self {
Self::new_other_error(AssembleErrorKind::Io(err))
}
}
+1 -1
View File
@@ -40,7 +40,7 @@ impl std::fmt::Display for ModuleId {
}
/// A single source file or compilation unit. Stores its own symbol table.
#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct Module {
/// The name of the module. This is typically the name of the file, less the `.dsa`
/// extension.
+4 -2
View File
@@ -17,14 +17,16 @@ impl Default for ModuleRegistry {
}
impl ModuleRegistry {
#[must_use] pub fn new() -> Self {
#[must_use]
pub fn new() -> Self {
Self {
modules: HashMap::new(),
}
}
/// Gets a [`Module`] by ID.
#[must_use] pub fn get(&self, module_id: &ModuleId) -> Option<&Module> {
#[must_use]
pub fn get(&self, module_id: &ModuleId) -> Option<&Module> {
self.modules.get(module_id)
}
+10 -1
View File
@@ -1,10 +1,14 @@
//! This module contains anything within the first stage of assembly, i.e. the
//! tokenisation stage, or utility functions for reading input files.
use std::path::Path;
use std::{
io::{BufRead, Lines},
path::Path,
};
use crate::error::AssembleError;
pub mod lines;
pub mod source_info;
pub mod token;
pub mod token_info;
@@ -17,3 +21,8 @@ pub fn load_source_bytes<P: AsRef<Path>>(p: P) -> Result<Vec<u8>, AssembleError>
Ok(std::fs::read(path)?)
}
/// Get the lines from a [`BufReader`].
pub fn reader_lines<R: BufRead>(rdr: R) -> Lines<R> {
rdr.lines()
}
+76
View File
@@ -0,0 +1,76 @@
//! Enhanced lines iterator that tracks line numbers and character positions.
use std::io::{BufRead, BufReader, Cursor};
use crate::error::AssembleError;
/// Iterator that yields lines with their line numbers and character spans.
pub struct LinesWithSpans<R: BufRead> {
reader: R,
line_number: usize,
total_chars: usize,
buffer: String,
}
#[derive(Debug, Clone)]
pub struct LineSpan {
/// The line number.
pub line_number: usize,
/// The contents of the line.
pub content: String,
/// Character offset from start of file.
pub start_char: usize,
/// End character offset (exclusive).
pub end_char: usize,
}
impl<R: BufRead> LinesWithSpans<R> {
pub const fn new(reader: R) -> Self {
Self {
reader,
line_number: 0,
total_chars: 0,
buffer: String::new(),
}
}
}
impl<R: BufRead> Iterator for LinesWithSpans<R> {
type Item = Result<LineSpan, AssembleError>;
fn next(&mut self) -> Option<Self::Item> {
self.buffer.clear();
match self.reader.read_line(&mut self.buffer) {
Ok(0) => None, // EOF
Ok(bytes_read) => {
self.line_number += 1;
let start_char = self.total_chars;
self.total_chars += bytes_read;
// Remove trailing newline for cleaner processing
let content = if self.buffer.ends_with('\n') {
self.buffer[..self.buffer.len() - 1].to_string()
} else {
self.buffer.clone()
};
Some(Ok(LineSpan {
line_number: self.line_number,
content,
start_char,
end_char: self.total_chars,
}))
}
Err(e) => Some(Err(e.into())),
}
}
}
/// Helper function to create lines iterator from data.
#[must_use]
pub fn lines_with_spans(data: &[u8]) -> LinesWithSpans<BufReader<Cursor<&[u8]>>> {
let cursor = Cursor::new(data);
let reader = BufReader::new(cursor);
LinesWithSpans::new(reader)
}
+81 -5
View File
@@ -4,22 +4,98 @@
//! This will likely be attached to a [`Token`] which will in turn be attached to an AST
//! [`Node`].
use std::fmt::Display;
use std::{
fmt::{Display, Write},
fs::File,
io::BufReader,
sync::Arc,
};
use crate::model::module::Module;
use crate::{
error::{AssembleError, AssembleErrorKind, IoError, IoErrorKind},
model::module::Module,
source::lines::LinesWithSpans,
};
/// Information on where the token is within the source.
#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct SourceInfo {
/// The line number within the source file underpinned by `module_id`.
pub line_no: usize,
pub module: Module,
pub module: Arc<Module>,
/// The indexes where this token may be found (line-local).
pub span: std::ops::Range<usize>,
}
impl Display for SourceInfo {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.module.name)
write!(
f,
"{}:{}, column {}",
self.module.path.display(),
self.line_no,
self.span.start
)
}
}
impl SourceInfo {
#[must_use]
pub const fn new(
line_no: usize,
module: Arc<Module>,
span: std::ops::Range<usize>,
) -> Self {
Self {
line_no,
module,
span,
}
}
/// Prints out where in the source code the error originated with an underline similar
/// to what rustc does.
pub fn print_context_with_underline(&self) -> Result<(), AssembleError> {
let f = File::open(&self.module.path)?;
let rdr = BufReader::new(f);
let mut lines = LinesWithSpans::new(rdr);
let Some(line_result) = lines.nth(self.line_no - 1) else {
// Handle a line not existing.
return Err(AssembleError::new_source_error(
self.clone(),
AssembleErrorKind::Io(IoError::new(
IoErrorKind::Other,
Some(format!(
"the line {} does not exist in input file `{}` but source info suggested otherwise!.",
self.line_no,
self.module.path.display()
)),
)),
));
};
let line_span = line_result?;
// Print the line number and line content.
println!("{:>4} | {}", self.line_no, line_span.content);
let mut underline = String::new();
write!(underline, "{:>4} | ", "")?;
for _ in 0..self.span.start {
underline.push(' ');
}
for _ in self.span.start..self.span.end.min(line_span.content.len()) {
underline.push('^');
}
// Print the underline in red and bold.
// TODO: Use a crate to make this extra portable.
println!("\x1b[1;31m{underline}\x1b[0m");
Ok(())
}
}
+72 -7
View File
@@ -3,9 +3,15 @@
use std::path::{Path, PathBuf};
use regex::Regex;
use crate::{
error::{AssembleError, AssembleErrorKind},
source::{load_source_bytes, token::Token},
context::AssemblerContext,
error::{AssembleError, AssembleErrorKind, IoError, IoErrorKind},
model::module::Module,
source::{
lines::lines_with_spans, load_source_bytes, token::Token,
},
};
pub mod error;
@@ -16,12 +22,41 @@ pub struct Tokeniser {
pub data: Vec<u8>,
/// The path to the file.
pub path: PathBuf,
// Pre-compiled regex patterns
label_regex: Regex,
register_regex: Regex,
immediate_regex: Regex,
directive_regex: Regex,
instruction_regex: Regex,
symbol_regex: Regex,
string_regex: Regex,
}
impl Tokeniser {
#[must_use]
pub const fn from_data(data: Vec<u8>, path: PathBuf) -> Self {
Self { data, path }
pub fn from_data(data: Vec<u8>, path: PathBuf) -> Self {
Self {
data,
path,
label_regex: Regex::new(r"^([a-zA-Z_][a-zA-Z0-9_]*):")
.expect("Failed to compile label regex pattern"),
register_regex: Regex::new(r"^(r[0-9]+|sp|fp|pc)")
.expect("Failed to compile register regex pattern"),
immediate_regex: Regex::new(r"^(0x[0-9a-fA-F]+|[0-9]+)")
.expect("Failed to compile immediate regex pattern"),
directive_regex: Regex::new(r"^\.([a-zA-Z]+)")
.expect("Failed to compile directive regex pattern"),
instruction_regex: Regex::new(
r"^(add|sub|mul|div|jmp|call|ret|lli|nop|halt)",
)
.expect("Failed to compile instruction regex pattern"),
symbol_regex: Regex::new(r"^([a-zA-Z_][a-zA-Z0-9_]*)")
.expect("Failed to compile symbol regex pattern"),
string_regex: Regex::new(r#"^"([^"]*)"#)
.expect("Failed to compile string regex pattern"),
}
}
/// Creates a [`Tokeniser`] from a file path.
@@ -29,12 +64,42 @@ impl Tokeniser {
let path = path.as_ref().to_path_buf();
let data = load_source_bytes(&path)?;
Ok(Self { data, path })
Ok(Self::from_data(data, path))
}
pub fn tokenise(self) -> Result<Vec<Token>, AssembleError> {
// Note that modules are tokenised in their own threads, possibly in parallel.
pub fn tokenise(self, ctx: &AssemblerContext) -> Result<Vec<Token>, AssembleError> {
let lines = lines_with_spans(&self.data);
let Some(module_name) = self.path.file_name().and_then(|f| f.to_str()) else {
return Err(AssembleError::new_other_error(AssembleErrorKind::Io(
IoError::new(
IoErrorKind::InvalidData,
Some(
"filename couldn't be extracted, is it valid UTF-8?".to_string(),
),
),
)));
};
// Create a module for the source file being processed.
let module = Module::new(module_name.to_string(), &self.path);
{
let mut module_registry = ctx.module_registry.write()?;
module_registry.add(module);
}
// Technically ignores newlines since line will be trimmed. We just append a
// Newline token for each line.
for line_result in lines {
let line = line_result?;
eprintln!("{}", line.line_number);
}
Err(AssembleError::new_other_error(
AssembleErrorKind::Unimplemented("tokeniser not written yet!".to_string()),
AssembleErrorKind::Unimplemented("tokeniser not written yet!"),
))
}
}
+1 -1
View File
@@ -1,6 +1,6 @@
//! This module contains the error types for the tokeniser.
#[derive(Debug)]
#[derive(Debug, Clone, Copy)]
pub enum TokeniserError {}
impl std::fmt::Display for TokeniserError {
+17 -16
View File
@@ -46,12 +46,11 @@ impl SymbolTable {
&& let Some(existing) = self.symbols.get(&existing_id)
&& existing.module_id == module_id
{
return Err(AssembleError::new_other_error(
crate::error::AssembleErrorKind::IO(std::io::Error::new(
std::io::ErrorKind::AlreadyExists,
format!("Symbol '{name}' already defined in module"),
)),
));
return Err(std::io::Error::new(
std::io::ErrorKind::AlreadyExists,
format!("Symbol '{name}' already defined in module"),
)
.into());
}
// Add to all mappings
@@ -63,19 +62,22 @@ impl SymbolTable {
}
/// Gets the [`Symbol`] by its [`SymbolId`].
#[must_use] pub fn get(&self, id: &SymbolId) -> Option<&Symbol> {
#[must_use]
pub fn get(&self, id: &SymbolId) -> Option<&Symbol> {
self.symbols.get(id)
}
/// Gets the [`Symbol`] by its name.
#[must_use] pub fn get_by_name(&self, name: &str) -> Option<&Symbol> {
#[must_use]
pub fn get_by_name(&self, name: &str) -> Option<&Symbol> {
self.name_to_id
.get(name)
.and_then(|id| self.symbols.get(id))
}
/// Gets all [`Symbol`]s in a module.
#[must_use] pub fn get_module_symbols(&self, module_id: &ModuleId) -> Vec<&Symbol> {
#[must_use]
pub fn get_module_symbols(&self, module_id: &ModuleId) -> Vec<&Symbol> {
self.module_symbols
.get(module_id)
.map(|ids| ids.iter().filter_map(|id| self.symbols.get(id)).collect())
@@ -83,7 +85,8 @@ impl SymbolTable {
}
/// Gets all the public symbols.
#[must_use] pub fn get_public_symbols(&self) -> Vec<&Symbol> {
#[must_use]
pub fn get_public_symbols(&self) -> Vec<&Symbol> {
self.symbols
.values()
.filter(|sym| matches!(sym.visibility, Visibility::Public))
@@ -104,12 +107,10 @@ impl SymbolTable {
}
Ok(())
} else {
Err(AssembleError::new_other_error(
crate::error::AssembleErrorKind::IO(std::io::Error::new(
std::io::ErrorKind::NotFound,
"Symbol not found",
)),
))
Err(
std::io::Error::new(std::io::ErrorKind::NotFound, "Symbol not found")
.into(),
)
}
}
}