From 8bb252e941ac7a1777221ae6ddfc597fafd103dc Mon Sep 17 00:00:00 2001 From: "J. Hinchliffe" Date: Sat, 28 Jun 2025 23:35:55 +0100 Subject: [PATCH] tokeniser: return TokeniserErrors where relevant. The UnexpectedEndOfInput case is a little vague. --- assembler/src/source/tokeniser.rs | 50 +++++++++++++++++-------- assembler/src/source/tokeniser/error.rs | 18 ++++++--- 2 files changed, 47 insertions(+), 21 deletions(-) diff --git a/assembler/src/source/tokeniser.rs b/assembler/src/source/tokeniser.rs index 49cb4b5..261562d 100644 --- a/assembler/src/source/tokeniser.rs +++ b/assembler/src/source/tokeniser.rs @@ -9,7 +9,7 @@ use common::prelude::*; use crate::{ context::AssemblerContext, - error::{AssembleError, AssembleErrorKind, IoError, IoErrorKind}, + error::{AssembleError, AssembleErrorKind}, model::module::Module, source::{ lines::{LineSpan, lines_with_spans}, @@ -18,6 +18,7 @@ use crate::{ source_info::SourceInfo, token::{Token, TokenType}, token_info::{DirectiveToken, LabelToken, RegisterToken, SymbolToken}, + tokeniser::error::TokeniserError, }, }; @@ -161,11 +162,11 @@ impl Tokeniser { while !remaining.is_empty() { let start_column = column; - // Try to match a token + // Try to match a token. let (token_type, consumed) = self.match_token(remaining, line_span.line_number, column)?; - // Filter out string continuation tokens and comments + // Filter out string continuation tokens and comments. match token_type { TokenType::StringContinuation => { // Don't add to token stream, just consume input @@ -186,11 +187,11 @@ impl Tokeniser { } } - // Advance position + // Advance position. remaining = &remaining[consumed..]; column += consumed; - // Skip whitespace + // Skip whitespace. let before_trim = remaining.len(); remaining = remaining.trim_start(); column += before_trim - remaining.len(); @@ -389,26 +390,43 @@ impl Tokeniser { return Ok(m); } + let mut idx_iter = 0..; + // Handle miscellaneous characters. - match input.chars().enumerate().next() { - Some((_, ',')) => Ok((TokenType::Comma, 1)), - Some((idx, c)) => { + match input.chars().next() { + Some(',') => { + _ = idx_iter.next(); + + Ok((TokenType::Comma, 1)) + } + Some(c) => { + let Some(idx) = idx_iter.next() else { + unreachable!() + }; + let source = SourceInfo::new(line_number, self.module.clone(), idx..idx + 1); Err(AssembleError::new_source_error( source, - AssembleErrorKind::Tokeniser(error::TokeniserError::UnexpectedChar( - c, + AssembleErrorKind::Tokeniser(TokeniserError::UnexpectedChar(c)), + )) + } + None => { + let Some(idx) = idx_iter.next() else { + unreachable!() + }; + + let source = + SourceInfo::new(line_number, self.module.clone(), idx..idx + 1); + + Err(AssembleError::new_source_error( + source, + AssembleErrorKind::Tokeniser(TokeniserError::UnexpectedEndOfInput( + input.len(), )), )) } - None => Err(AssembleError::new_other_error(AssembleErrorKind::Io( - IoError::new( - IoErrorKind::InvalidData, - Some("Unexpected end of input".to_string()), - ), - ))), } } } diff --git a/assembler/src/source/tokeniser/error.rs b/assembler/src/source/tokeniser/error.rs index ae8eddf..ed3f683 100644 --- a/assembler/src/source/tokeniser/error.rs +++ b/assembler/src/source/tokeniser/error.rs @@ -13,19 +13,27 @@ pub enum TokeniserError { InvalidNumber(&'static str), /// An unrecognized token was encountered. UnrecognisedToken, + /// Returned if the consumed count was lower than the length of the input file. + /// This is a sign you will need to debug some [`Tokeniser`] code to ensure that + /// [`Tokeniser::match_token`] is working as intended. + /// + /// First field is length of the line. + UnexpectedEndOfInput(usize), } impl TokeniserError {} impl std::fmt::Display for TokeniserError { + #[rustfmt::skip] fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::UnexpectedChar(c) => write!(f, "unexpected char '{c}' found in input")?, - Self::InvalidNumber(lit) => { - write!(f, "invalid integer literal \"{lit}\" found in input")?; - } - &Self::UnrecognisedToken => write!(f, "unrecognised token found in input")?, - &Self::UnterminatedString => write!(f, "unterminated string literal")?, + Self::InvalidNumber(lit) => write!(f, "invalid integer literal \"{lit}\" found in input")?, + Self::UnrecognisedToken => write!(f, "unrecognised token found in input")?, + Self::UnterminatedString => write!(f, "unterminated string literal")?, + Self::UnexpectedEndOfInput(line_length) => write!( + f, "unexpected end of input, input length: {line_length}" + )?, } Ok(())