tokeniser: return TokeniserErrors where relevant.

The UnexpectedEndOfInput case is a little vague.
This commit is contained in:
2025-06-28 23:35:55 +01:00
parent 5317988fdd
commit 8bb252e941
2 changed files with 47 additions and 21 deletions
+34 -16
View File
@@ -9,7 +9,7 @@ use common::prelude::*;
use crate::{
context::AssemblerContext,
error::{AssembleError, AssembleErrorKind, IoError, IoErrorKind},
error::{AssembleError, AssembleErrorKind},
model::module::Module,
source::{
lines::{LineSpan, lines_with_spans},
@@ -18,6 +18,7 @@ use crate::{
source_info::SourceInfo,
token::{Token, TokenType},
token_info::{DirectiveToken, LabelToken, RegisterToken, SymbolToken},
tokeniser::error::TokeniserError,
},
};
@@ -161,11 +162,11 @@ impl Tokeniser {
while !remaining.is_empty() {
let start_column = column;
// Try to match a token
// Try to match a token.
let (token_type, consumed) =
self.match_token(remaining, line_span.line_number, column)?;
// Filter out string continuation tokens and comments
// Filter out string continuation tokens and comments.
match token_type {
TokenType::StringContinuation => {
// Don't add to token stream, just consume input
@@ -186,11 +187,11 @@ impl Tokeniser {
}
}
// Advance position
// Advance position.
remaining = &remaining[consumed..];
column += consumed;
// Skip whitespace
// Skip whitespace.
let before_trim = remaining.len();
remaining = remaining.trim_start();
column += before_trim - remaining.len();
@@ -389,26 +390,43 @@ impl Tokeniser {
return Ok(m);
}
let mut idx_iter = 0..;
// Handle miscellaneous characters.
match input.chars().enumerate().next() {
Some((_, ',')) => Ok((TokenType::Comma, 1)),
Some((idx, c)) => {
match input.chars().next() {
Some(',') => {
_ = idx_iter.next();
Ok((TokenType::Comma, 1))
}
Some(c) => {
let Some(idx) = idx_iter.next() else {
unreachable!()
};
let source =
SourceInfo::new(line_number, self.module.clone(), idx..idx + 1);
Err(AssembleError::new_source_error(
source,
AssembleErrorKind::Tokeniser(error::TokeniserError::UnexpectedChar(
c,
AssembleErrorKind::Tokeniser(TokeniserError::UnexpectedChar(c)),
))
}
None => {
let Some(idx) = idx_iter.next() else {
unreachable!()
};
let source =
SourceInfo::new(line_number, self.module.clone(), idx..idx + 1);
Err(AssembleError::new_source_error(
source,
AssembleErrorKind::Tokeniser(TokeniserError::UnexpectedEndOfInput(
input.len(),
)),
))
}
None => Err(AssembleError::new_other_error(AssembleErrorKind::Io(
IoError::new(
IoErrorKind::InvalidData,
Some("Unexpected end of input".to_string()),
),
))),
}
}
}
+13 -5
View File
@@ -13,19 +13,27 @@ pub enum TokeniserError {
InvalidNumber(&'static str),
/// An unrecognized token was encountered.
UnrecognisedToken,
/// Returned if the consumed count was lower than the length of the input file.
/// This is a sign you will need to debug some [`Tokeniser`] code to ensure that
/// [`Tokeniser::match_token`] is working as intended.
///
/// First field is length of the line.
UnexpectedEndOfInput(usize),
}
impl TokeniserError {}
impl std::fmt::Display for TokeniserError {
#[rustfmt::skip]
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::UnexpectedChar(c) => write!(f, "unexpected char '{c}' found in input")?,
Self::InvalidNumber(lit) => {
write!(f, "invalid integer literal \"{lit}\" found in input")?;
}
&Self::UnrecognisedToken => write!(f, "unrecognised token found in input")?,
&Self::UnterminatedString => write!(f, "unterminated string literal")?,
Self::InvalidNumber(lit) => write!(f, "invalid integer literal \"{lit}\" found in input")?,
Self::UnrecognisedToken => write!(f, "unrecognised token found in input")?,
Self::UnterminatedString => write!(f, "unterminated string literal")?,
Self::UnexpectedEndOfInput(line_length) => write!(
f, "unexpected end of input, input length: {line_length}"
)?,
}
Ok(())