assembler: enhance error handling and tokenization logic

2025-06-26 17:00:14 +01:00
parent 40f8b1d57b
commit ed4fcc8495
11 changed files with 514 additions and 98 deletions
@@ -1,9 +1,13 @@
 //! Unit tests for the tokenizer

+use common::prelude::Register;
+
 use crate::{
    context::AssemblerContext,
    source::{
+        opcode::Opcode,
        token::{Token, TokenType},
+        token_info::RegisterToken,
        tokeniser::Tokeniser,
    },
 };
@@ -65,7 +69,7 @@ fn test_single_instruction() {
            .any(|t| matches!(t, TokenType::Instruction(_)))
    );
    if let TokenType::Instruction(instr) = &tokens[0].token_type {
-        assert_eq!(instr.mnemonic, "add");
+        assert_eq!(instr.to_string(), "add");
    } else {
        panic!("Expected instruction token");
    }
@@ -73,15 +77,13 @@ fn test_single_instruction() {

 #[test]
 fn test_all_instructions() {
-    let instructions = [
-        "add", "sub", "mul", "div", "jmp", "call", "ret", "lli", "nop", "halt",
-    ];
+    let instructions = ["add", "sub", "jmp", "call", "return", "lli", "nop", "hlt"];

    for instr in &instructions {
        let tokens = tokenize_source(instr).expect("Failed to tokenize instruction");

        if let TokenType::Instruction(parsed_instr) = &tokens[0].token_type {
-            assert_eq!(parsed_instr.mnemonic, *instr);
+            assert_eq!(parsed_instr.to_string(), *instr);
        } else {
            panic!("Expected instruction token for {instr}");
        }
@@ -90,7 +92,7 @@ fn test_all_instructions() {

 #[test]
 fn test_registers() {
-    let test_cases = [("rg0", "r0"), ("rgf", "rgf"), ("pcx", "pcx")];
+    let test_cases = [("rg0", "rg0"), ("rgf", "rgf"), ("pcx", "pcx")];

    for (input, expected) in &test_cases {
        let tokens = tokenize_source(input).expect("Failed to tokenize register");
@@ -149,10 +151,9 @@ fn test_labels() {
 #[test]
 fn test_directives() {
    let test_cases = [
-        (".global", "global"),
-        (".section", "section"),
-        (".data", "data"),
-        (".text", "text"),
+        ("global", "global"),
+        ("section", "section"),
+        ("local", "local"),
    ];

    for (input, expected) in &test_cases {
@@ -185,3 +186,52 @@ fn test_symbols() {
        }
    }
 }
+
+#[test]
+fn test_complex_instruction_line() {
+    let source = "addi rg1, rg2, 0xFF";
+    let tokens = tokenize_source(source).expect("Failed to tokenise complex instruction");
+
+    // Should have: instruction, register, comma, register, comma, immediate, newline, EOF
+    assert!(tokens.len() >= 6);
+    assert!(matches!(tokens[0].token_type, TokenType::Instruction(_)));
+    assert!(matches!(tokens[1].token_type, TokenType::Register(_)));
+    assert!(matches!(tokens[2].token_type, TokenType::Comma));
+    assert!(matches!(tokens[3].token_type, TokenType::Register(_)));
+    assert!(matches!(tokens[4].token_type, TokenType::Comma));
+    assert!(matches!(tokens[5].token_type, TokenType::Immediate(_)));
+}
+
+#[test]
+fn test_multiline_with_comments() {
+    const EXPECTED_TOKEN_TYPES: [TokenType; 11] = [
+        TokenType::Instruction(Opcode::Add),
+        TokenType::Register(RegisterToken::new(Register::Rg0)),
+        TokenType::Comma,
+        TokenType::Register(RegisterToken::new(Register::Rg1)),
+        TokenType::Newline,
+        TokenType::Instruction(Opcode::SubI),
+        TokenType::Register(RegisterToken::new(Register::Rg2)),
+        TokenType::Comma,
+        TokenType::Immediate(10),
+        TokenType::Newline,
+        TokenType::Eof,
+    ];
+
+    const SOURCE: &str = r"add rg0, rg1 // Another comment
+        subi rg2, 10";
+
+    let tokens =
+        tokenize_source(SOURCE).expect("Failed to tokenise source with comments");
+    let token_types = extract_token_types(&tokens);
+
+    assert_eq!(
+        token_types.len(),
+        EXPECTED_TOKEN_TYPES.len(),
+        "{token_types:#?}"
+    );
+
+    for (expected, got) in EXPECTED_TOKEN_TYPES.iter().zip(token_types.iter()) {
+        assert!(!(expected != *got), "Expected {expected:?}, got {got:?}");
+    }
+}