From 4e5db58a84b3d4c0506487533f58b7f760919221 Mon Sep 17 00:00:00 2001 From: "J. Hinchliffe" Date: Wed, 25 Jun 2025 14:48:45 +0100 Subject: [PATCH] assembler: start refactoring/rewriting tokeniser --- assembler/src/error.rs | 6 +++++ assembler/src/source.rs | 1 + assembler/src/source/token.rs | 36 +++++++---------------------- assembler/src/source/token_info.rs | 24 +++++++++++++++++++ assembler/src/source/tokeniser.rs | 37 ++++++++++++++++++++++++++++-- 5 files changed, 74 insertions(+), 30 deletions(-) create mode 100644 assembler/src/source/token_info.rs diff --git a/assembler/src/error.rs b/assembler/src/error.rs index 483b68d..c26a30d 100644 --- a/assembler/src/error.rs +++ b/assembler/src/error.rs @@ -59,12 +59,18 @@ pub enum AssembleErrorKind { IO(std::io::Error), /// Errors emitted from the [`Tokeniser`]. Tokenise(TokeniserError), + /// Returned for code where the functionality has not yet been implemented but we + /// don't want the program to panic. + Unimplemented(String), } impl Display for AssembleErrorKind { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::Tokenise(why) => write!(f, "tokeniser error: {why}"), + Self::Unimplemented(why) => write!(f, "used unimplemented feature: {why}"), + Self::IO(why) => write!(f, "problem occurred with I/O: {why}"), + #[expect(unreachable_patterns)] _ => write!( f, "unhandled error type in Display implementation! See error.rs!" diff --git a/assembler/src/source.rs b/assembler/src/source.rs index 1146508..93ec85d 100644 --- a/assembler/src/source.rs +++ b/assembler/src/source.rs @@ -7,6 +7,7 @@ use crate::error::AssembleError; pub mod source_info; pub mod token; +pub mod token_info; pub mod tokeniser; /// Attempts to load and open a source file, returning a [`Vec`] or an diff --git a/assembler/src/source/token.rs b/assembler/src/source/token.rs index ba3c8d1..360ac64 100644 --- a/assembler/src/source/token.rs +++ b/assembler/src/source/token.rs @@ -2,7 +2,12 @@ //! easier to build from scratch and edit his code than it would be to try and wrangle it //! into shape. -use crate::source::source_info::SourceInfo; +use crate::source::{ + source_info::SourceInfo, + token_info::{ + DirectiveToken, InstructionToken, LabelToken, RegisterToken, SymbolToken, + }, +}; #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum TokenType { @@ -29,34 +34,9 @@ pub enum TokenType { #[derive(Debug)] pub struct Token { /// The type of the token. - token_type: TokenType, + pub token_type: TokenType, /// Where in the source code is this [`Token`]? - source_info: SourceInfo, -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct SymbolToken { - pub name: String, -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct LabelToken { - pub name: String, -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct DirectiveToken { - pub directive: String, -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct RegisterToken { - pub name: String, -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct InstructionToken { - pub mnemonic: String, + pub source_info: SourceInfo, } impl Token { diff --git a/assembler/src/source/token_info.rs b/assembler/src/source/token_info.rs new file mode 100644 index 0000000..af5fdcd --- /dev/null +++ b/assembler/src/source/token_info.rs @@ -0,0 +1,24 @@ +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct SymbolToken { + pub name: String, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct LabelToken { + pub name: String, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct DirectiveToken { + pub directive: String, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct RegisterToken { + pub name: String, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct InstructionToken { + pub mnemonic: String, +} diff --git a/assembler/src/source/tokeniser.rs b/assembler/src/source/tokeniser.rs index 902fc68..d7c0c6b 100644 --- a/assembler/src/source/tokeniser.rs +++ b/assembler/src/source/tokeniser.rs @@ -1,7 +1,40 @@ //! This file contains the [`Tokeniser`], which consumes a [`Vec`] of input bytes and //! outputs a [`Vec`]. -/// Consumes a [`Vec`] and outputs a [`Vec`] of [Token]'s. -pub struct Tokeniser {} +use std::path::{Path, PathBuf}; + +use crate::{ + error::{AssembleError, AssembleErrorKind}, + source::{load_source_bytes, token::Token}, +}; pub mod error; + +/// Consumes a [`Vec`] and outputs a [`Vec`] of [Token]'s. +pub struct Tokeniser { + /// The data in the file. + pub data: Vec, + /// The path to the file. + pub path: PathBuf, +} + +impl Tokeniser { + #[must_use] + pub const fn from_data(data: Vec, path: PathBuf) -> Self { + Self { data, path } + } + + /// Creates a [`Tokeniser`] from a file path. + pub fn new>(path: P) -> Result { + let path = path.as_ref().to_path_buf(); + let data = load_source_bytes(&path)?; + + Ok(Self { data, path }) + } + + pub fn tokenise(self) -> Result, AssembleError> { + Err(AssembleError::new_other_error( + AssembleErrorKind::Unimplemented("tokeniser not written yet!".to_string()), + )) + } +}