assembler: start refactoring/rewriting tokeniser

This commit is contained in:
2025-06-25 14:48:45 +01:00
parent 11a57eab51
commit 4e5db58a84
5 changed files with 74 additions and 30 deletions
+6
View File
@@ -59,12 +59,18 @@ pub enum AssembleErrorKind {
IO(std::io::Error),
/// Errors emitted from the [`Tokeniser`].
Tokenise(TokeniserError),
/// Returned for code where the functionality has not yet been implemented but we
/// don't want the program to panic.
Unimplemented(String),
}
impl Display for AssembleErrorKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Tokenise(why) => write!(f, "tokeniser error: {why}"),
Self::Unimplemented(why) => write!(f, "used unimplemented feature: {why}"),
Self::IO(why) => write!(f, "problem occurred with I/O: {why}"),
#[expect(unreachable_patterns)]
_ => write!(
f,
"unhandled error type in Display implementation! See error.rs!"
+1
View File
@@ -7,6 +7,7 @@ use crate::error::AssembleError;
pub mod source_info;
pub mod token;
pub mod token_info;
pub mod tokeniser;
/// Attempts to load and open a source file, returning a [`Vec<u8>`] or an
+8 -28
View File
@@ -2,7 +2,12 @@
//! easier to build from scratch and edit his code than it would be to try and wrangle it
//! into shape.
use crate::source::source_info::SourceInfo;
use crate::source::{
source_info::SourceInfo,
token_info::{
DirectiveToken, InstructionToken, LabelToken, RegisterToken, SymbolToken,
},
};
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum TokenType {
@@ -29,34 +34,9 @@ pub enum TokenType {
#[derive(Debug)]
pub struct Token {
/// The type of the token.
token_type: TokenType,
pub token_type: TokenType,
/// Where in the source code is this [`Token`]?
source_info: SourceInfo,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct SymbolToken {
pub name: String,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct LabelToken {
pub name: String,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct DirectiveToken {
pub directive: String,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct RegisterToken {
pub name: String,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct InstructionToken {
pub mnemonic: String,
pub source_info: SourceInfo,
}
impl Token {
+24
View File
@@ -0,0 +1,24 @@
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct SymbolToken {
pub name: String,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct LabelToken {
pub name: String,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct DirectiveToken {
pub directive: String,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct RegisterToken {
pub name: String,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct InstructionToken {
pub mnemonic: String,
}
+35 -2
View File
@@ -1,7 +1,40 @@
//! This file contains the [`Tokeniser`], which consumes a [`Vec`] of input bytes and
//! outputs a [`Vec<Token>`].
/// Consumes a [`Vec<u8>`] and outputs a [`Vec`] of [Token]'s.
pub struct Tokeniser {}
use std::path::{Path, PathBuf};
use crate::{
error::{AssembleError, AssembleErrorKind},
source::{load_source_bytes, token::Token},
};
pub mod error;
/// Consumes a [`Vec<u8>`] and outputs a [`Vec`] of [Token]'s.
pub struct Tokeniser {
/// The data in the file.
pub data: Vec<u8>,
/// The path to the file.
pub path: PathBuf,
}
impl Tokeniser {
#[must_use]
pub const fn from_data(data: Vec<u8>, path: PathBuf) -> Self {
Self { data, path }
}
/// Creates a [`Tokeniser`] from a file path.
pub fn new<P: AsRef<Path>>(path: P) -> Result<Self, AssembleError> {
let path = path.as_ref().to_path_buf();
let data = load_source_bytes(&path)?;
Ok(Self { data, path })
}
pub fn tokenise(self) -> Result<Vec<Token>, AssembleError> {
Err(AssembleError::new_other_error(
AssembleErrorKind::Unimplemented("tokeniser not written yet!".to_string()),
))
}
}