From 9232f2ccabc5136c96220807f9c6ab3c17008069 Mon Sep 17 00:00:00 2001 From: "J. Hinchliffe" Date: Wed, 25 Jun 2025 03:26:50 +0100 Subject: [PATCH] assembler: great leap forwards (more like the Cultural Revolution) --- assembler/src/error.rs | 48 ++++++++++++++++ assembler/src/error/conversions.rs | 7 +++ assembler/src/lib.rs | 5 ++ assembler/src/model.rs | 3 + assembler/src/model/symbol.rs | 69 +++++++++++++++++++++++ assembler/src/source.rs | 20 +++++++ assembler/src/{ => source}/source_info.rs | 6 +- assembler/src/{ => source}/token.rs | 0 assembler/src/source/tokeniser.rs | 5 ++ assembler/src/symtab.rs | 10 ++++ 10 files changed, 172 insertions(+), 1 deletion(-) create mode 100644 assembler/src/error.rs create mode 100644 assembler/src/error/conversions.rs create mode 100644 assembler/src/model.rs create mode 100644 assembler/src/model/symbol.rs create mode 100644 assembler/src/source.rs rename assembler/src/{ => source}/source_info.rs (77%) rename assembler/src/{ => source}/token.rs (100%) create mode 100644 assembler/src/source/tokeniser.rs create mode 100644 assembler/src/symtab.rs diff --git a/assembler/src/error.rs b/assembler/src/error.rs new file mode 100644 index 0000000..a856590 --- /dev/null +++ b/assembler/src/error.rs @@ -0,0 +1,48 @@ +//! This module contains code for various types of errors that may occur when assembling a +//! set of source DSA files. + +use std::fmt::{Debug, Display}; + +use crate::source::source_info::SourceInfo; + +/// An error that may occur during the assembly of a set of source files. +#[derive(Debug)] +pub struct AssembleError { + /// Display implementation can handle when the source code information is shown or + /// not. + source_info: Option, + /// The type of assembly error that occurred. + kind: AssembleErrorKind, + /// The formatter to handle printing the error. + formatter: Box, +} + +impl AssembleError { + pub fn new_source_error(source_info: SourceInfo, kind: AssembleErrorKind) -> Self { + Self { + source_info: Some(source_info), + kind, + formatter, + } + } + + pub fn new_other_error(kind: AssembleErrorKind) {} +} + +impl Display for AssembleError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.formatter + .write(f, self.source_info.as_ref(), &self.kind) + } +} + +/// Marker trait. +impl std::error::Error for AssembleError {} + +/// Different types of errors that may occur when assembling a set of input source files. +#[non_exhaustive] +#[derive(Debug)] +pub enum AssembleErrorKind {} + +pub mod conversions; +pub mod formatters; diff --git a/assembler/src/error/conversions.rs b/assembler/src/error/conversions.rs new file mode 100644 index 0000000..2a89c19 --- /dev/null +++ b/assembler/src/error/conversions.rs @@ -0,0 +1,7 @@ +use crate::error::AssembleError; + +impl From for AssembleError { + fn from(err: std::io::Error) -> Self { + + } +} diff --git a/assembler/src/lib.rs b/assembler/src/lib.rs index a9ed557..c9433f2 100644 --- a/assembler/src/lib.rs +++ b/assembler/src/lib.rs @@ -15,6 +15,11 @@ pub mod args; pub mod image_builder; // pub mod tooling; +pub mod error; +pub mod model; +pub mod source; +pub mod symtab; + mod util; pub mod prelude { diff --git a/assembler/src/model.rs b/assembler/src/model.rs new file mode 100644 index 0000000..8969da3 --- /dev/null +++ b/assembler/src/model.rs @@ -0,0 +1,3 @@ +//! This module contains the underlying data models and enums used by the Assembler. + +pub mod symbol; diff --git a/assembler/src/model/symbol.rs b/assembler/src/model/symbol.rs new file mode 100644 index 0000000..24fffc7 --- /dev/null +++ b/assembler/src/model/symbol.rs @@ -0,0 +1,69 @@ +//! This module contains the definitions for a Symbol. + +use uuid::Uuid; + +/// A symbol is a named reference that may be resolved later to an address by a linker. +pub struct Symbol { + /// Stored cheaply instead of the name. Shall be stored in the symbol table under + /// this key. + pub id: Uuid, + pub visibility: Visibility, + pub symbol_type: SymbolType, + + /// The id of the module the symbol is defined in. + module_id: Uuid, + + /// Whether or not the symbol requires relocating. + pub needs_relocation: bool, + + /// A list of the symbol's dependencies. + /// + /// e.g. + /// + /// ```dsa + /// main: + /// call another_func + /// + /// another_func: + /// // Code goes here + /// ret + /// ``` + /// + /// Where `main` depends on `another_func`. + pub dependencies: Vec, +} + +impl Symbol { + pub fn new( + id: Uuid, + visibility: Visibility, + symbol_type: SymbolType, + module_id: Uuid, + ) -> Self { + Self { + id, + visibility, + symbol_type, + module_id, + } + } +} + +#[derive(Debug, Copy, Clone)] +/// The visibility of the symbol in different object files. +pub enum Visibility { + /// STB_PUBLIC under the ELF spec. Visible in all other object files. Shall be used + /// for labels. Remember labels are namespaced in different files so they won't clash + /// with one another. + Public, + /// Only visible within this object file. STB_LOCAL under ELF spec. Shall be used for + /// data definitions unless they are marked public. + Local, + /// STB_WEAK under the ELF spec. Potentially unused. + Weak, +} + +pub enum SymbolType { + Function, + Label, +} diff --git a/assembler/src/source.rs b/assembler/src/source.rs new file mode 100644 index 0000000..5190d73 --- /dev/null +++ b/assembler/src/source.rs @@ -0,0 +1,20 @@ +//! This module contains anything within the first stage of assembly, i.e. the +//! tokenisation stage, or utility functions for reading input files. + +use std::path::Path; + +use crate::error::AssembleError; + +pub mod source_info; +pub mod token; +pub mod tokeniser; + +/// Attempts to load and open a source file, returning a [`Vec`] or an +/// [`AssembleError`]. +pub fn load_source_bytes>(p: P) -> Result, AssembleError> { + let path = p.as_ref(); + + let bytes = std::fs::read(path)?; + + Ok(vec![]) +} diff --git a/assembler/src/source_info.rs b/assembler/src/source/source_info.rs similarity index 77% rename from assembler/src/source_info.rs rename to assembler/src/source/source_info.rs index 4773308..5347a29 100644 --- a/assembler/src/source_info.rs +++ b/assembler/src/source/source_info.rs @@ -2,11 +2,15 @@ //! code for more informative errors. This will likely be attached to a [`Token`] which //! will in turn be attached to an AST [`Node`]. +use uuid::Uuid; + /// Information on where the token is within the source. +#[derive(Debug)] pub struct SourceInfo { /// The line number within the source file underpinned by `module_id`. pub line_no: usize, - /// The ID of the module containing this token. + /// The ID of the module containing this token. This will be looked up in the global + /// hashmap of [`Module`]'s. pub module_id: Uuid, /// The indexes where this token may be found (line-local). pub span: std::ops::Range, diff --git a/assembler/src/token.rs b/assembler/src/source/token.rs similarity index 100% rename from assembler/src/token.rs rename to assembler/src/source/token.rs diff --git a/assembler/src/source/tokeniser.rs b/assembler/src/source/tokeniser.rs new file mode 100644 index 0000000..8bbe489 --- /dev/null +++ b/assembler/src/source/tokeniser.rs @@ -0,0 +1,5 @@ +//! This file contains the [`Tokeniser`], which consumes a [`Vec`] of input bytes and +//! outputs a [`Vec`]. + +/// Consumes a [`Vec`] and outputs a [`Vec`] of [Token]'s. +pub struct Tokeniser {} diff --git a/assembler/src/symtab.rs b/assembler/src/symtab.rs new file mode 100644 index 0000000..45d913f --- /dev/null +++ b/assembler/src/symtab.rs @@ -0,0 +1,10 @@ +//! This module contains the code for the Symbol Table, which can be written into object +//! files to support deferred relocations when using ELF files. +//! +//! It is also required for detection of duplicate symbols, and resolution in the flat +//! binary output type. + +/// Stored for each compilation unit (called a [`Module`]). +/// +/// One hashmap maps [`Symbol`] ID's to their corresponding structs, and +pub struct SymbolTable