assembler: enhance error handling and tokenization logic

This commit is contained in:
2025-06-26 17:00:14 +01:00
parent 40f8b1d57b
commit ed4fcc8495
11 changed files with 514 additions and 98 deletions
+60 -10
View File
@@ -1,9 +1,13 @@
//! Unit tests for the tokenizer
use common::prelude::Register;
use crate::{
context::AssemblerContext,
source::{
opcode::Opcode,
token::{Token, TokenType},
token_info::RegisterToken,
tokeniser::Tokeniser,
},
};
@@ -65,7 +69,7 @@ fn test_single_instruction() {
.any(|t| matches!(t, TokenType::Instruction(_)))
);
if let TokenType::Instruction(instr) = &tokens[0].token_type {
assert_eq!(instr.mnemonic, "add");
assert_eq!(instr.to_string(), "add");
} else {
panic!("Expected instruction token");
}
@@ -73,15 +77,13 @@ fn test_single_instruction() {
#[test]
fn test_all_instructions() {
let instructions = [
"add", "sub", "mul", "div", "jmp", "call", "ret", "lli", "nop", "halt",
];
let instructions = ["add", "sub", "jmp", "call", "return", "lli", "nop", "hlt"];
for instr in &instructions {
let tokens = tokenize_source(instr).expect("Failed to tokenize instruction");
if let TokenType::Instruction(parsed_instr) = &tokens[0].token_type {
assert_eq!(parsed_instr.mnemonic, *instr);
assert_eq!(parsed_instr.to_string(), *instr);
} else {
panic!("Expected instruction token for {instr}");
}
@@ -90,7 +92,7 @@ fn test_all_instructions() {
#[test]
fn test_registers() {
let test_cases = [("rg0", "r0"), ("rgf", "rgf"), ("pcx", "pcx")];
let test_cases = [("rg0", "rg0"), ("rgf", "rgf"), ("pcx", "pcx")];
for (input, expected) in &test_cases {
let tokens = tokenize_source(input).expect("Failed to tokenize register");
@@ -149,10 +151,9 @@ fn test_labels() {
#[test]
fn test_directives() {
let test_cases = [
(".global", "global"),
(".section", "section"),
(".data", "data"),
(".text", "text"),
("global", "global"),
("section", "section"),
("local", "local"),
];
for (input, expected) in &test_cases {
@@ -185,3 +186,52 @@ fn test_symbols() {
}
}
}
#[test]
fn test_complex_instruction_line() {
let source = "addi rg1, rg2, 0xFF";
let tokens = tokenize_source(source).expect("Failed to tokenise complex instruction");
// Should have: instruction, register, comma, register, comma, immediate, newline, EOF
assert!(tokens.len() >= 6);
assert!(matches!(tokens[0].token_type, TokenType::Instruction(_)));
assert!(matches!(tokens[1].token_type, TokenType::Register(_)));
assert!(matches!(tokens[2].token_type, TokenType::Comma));
assert!(matches!(tokens[3].token_type, TokenType::Register(_)));
assert!(matches!(tokens[4].token_type, TokenType::Comma));
assert!(matches!(tokens[5].token_type, TokenType::Immediate(_)));
}
#[test]
fn test_multiline_with_comments() {
const EXPECTED_TOKEN_TYPES: [TokenType; 11] = [
TokenType::Instruction(Opcode::Add),
TokenType::Register(RegisterToken::new(Register::Rg0)),
TokenType::Comma,
TokenType::Register(RegisterToken::new(Register::Rg1)),
TokenType::Newline,
TokenType::Instruction(Opcode::SubI),
TokenType::Register(RegisterToken::new(Register::Rg2)),
TokenType::Comma,
TokenType::Immediate(10),
TokenType::Newline,
TokenType::Eof,
];
const SOURCE: &str = r"add rg0, rg1 // Another comment
subi rg2, 10";
let tokens =
tokenize_source(SOURCE).expect("Failed to tokenise source with comments");
let token_types = extract_token_types(&tokens);
assert_eq!(
token_types.len(),
EXPECTED_TOKEN_TYPES.len(),
"{token_types:#?}"
);
for (expected, got) in EXPECTED_TOKEN_TYPES.iter().zip(token_types.iter()) {
assert!(!(expected != *got), "Expected {expected:?}, got {got:?}");
}
}