assembler: update tokeniser to allow extra prefixes and separators (0xDEAD_BEEF)

This commit is contained in:
2025-06-25 19:15:51 +01:00
parent 7cb7525484
commit d9807b5b36
3 changed files with 218 additions and 8 deletions
+193
View File
@@ -0,0 +1,193 @@
//! Unit tests for the tokenizer
use crate::{
context::AssemblerContext,
source::{
token::{Token, TokenType},
tokeniser::Tokeniser,
},
};
use std::path::PathBuf;
/// Helper function to create a tokenizer from source text
fn create_tokenizer_from_source(source: &str) -> Tokeniser {
let data = source.as_bytes().to_vec();
let path = PathBuf::from("test.dsa");
Tokeniser::from_data(data, path)
}
/// Helper function to tokenize source and return tokens
fn tokenize_source(source: &str) -> Result<Vec<Token>, crate::error::AssembleError> {
let tokenizer = create_tokenizer_from_source(source);
let context = AssemblerContext::new();
tokenizer.tokenise(&context)
}
/// Helper function to extract token types from a token vector
fn extract_token_types(tokens: &[Token]) -> Vec<&TokenType> {
tokens.iter().map(|t| &t.token_type).collect()
}
#[test]
fn test_empty_source() {
let tokens = tokenize_source("").expect("Failed to tokenize empty source");
// Should have at least EOF token
assert!(!tokens.is_empty());
assert!(matches!(
tokens
.last()
.expect("Expected at least one token")
.token_type,
TokenType::Eof
));
}
#[test]
fn test_whitespace_only() {
let tokens = tokenize_source(" \n \n ").expect("Failed to tokenize whitespace");
// Should have newlines and EOF
let token_types = extract_token_types(&tokens);
assert!(token_types.iter().any(|t| matches!(t, TokenType::Newline)));
assert!(token_types.iter().any(|t| matches!(t, TokenType::Eof)));
}
#[test]
fn test_single_instruction() {
let tokens = tokenize_source("add").expect("Failed to tokenize instruction");
let token_types = extract_token_types(&tokens);
// Should have instruction, newline, and EOF
assert!(
token_types
.iter()
.any(|t| matches!(t, TokenType::Instruction(_)))
);
if let TokenType::Instruction(instr) = &tokens[0].token_type {
assert_eq!(instr.mnemonic, "add");
} else {
panic!("Expected instruction token");
}
}
#[test]
fn test_all_instructions() {
let instructions = [
"add", "sub", "mul", "div", "jmp", "call", "ret", "lli", "nop", "halt",
];
for instr in &instructions {
let tokens = tokenize_source(instr).expect("Failed to tokenize instruction");
if let TokenType::Instruction(parsed_instr) = &tokens[0].token_type {
assert_eq!(parsed_instr.mnemonic, *instr);
} else {
panic!("Expected instruction token for {instr}");
}
}
}
#[test]
fn test_registers() {
let test_cases = [
("r0", "r0"),
("r15", "r15"),
("sp", "sp"),
("fp", "fp"),
("pc", "pc"),
];
for (input, expected) in &test_cases {
let tokens = tokenize_source(input).expect("Failed to tokenize register");
if let TokenType::Register(reg) = &tokens[0].token_type {
assert_eq!(reg.name, *expected);
} else {
panic!("Expected register token for {input}");
}
}
}
#[test]
fn test_immediates() {
let test_cases = [
("42", 42),
("0", 0),
("0xFF", 255),
("0x1234", 0x1234),
("0xDEADBEEF", 0xDEAD_BEEF),
("0o12", 0o12),
("0b101", 0b101),
];
for (input, expected) in &test_cases {
let tokens = tokenize_source(input).expect("Failed to tokenize immediate");
if let TokenType::Immediate(value) = &tokens[0].token_type {
assert_eq!(*value, *expected);
} else {
panic!("Expected immediate token for {input}");
}
}
}
#[test]
fn test_labels() {
let test_cases = [
("loop_start:", "loop_start"),
("main:", "main"),
("_private_label:", "_private_label"),
("Label123:", "Label123"),
];
for (input, expected) in &test_cases {
let tokens = tokenize_source(input).expect("Failed to tokenize label");
if let TokenType::Label(label) = &tokens[0].token_type {
assert_eq!(label.name, *expected);
} else {
panic!("Expected label token for {input}");
}
}
}
#[test]
fn test_directives() {
let test_cases = [
(".global", "global"),
(".section", "section"),
(".data", "data"),
(".text", "text"),
];
for (input, expected) in &test_cases {
let tokens = tokenize_source(input).expect("Failed to tokenize directive");
if let TokenType::Directive(directive) = &tokens[0].token_type {
assert_eq!(directive.directive, *expected);
} else {
panic!("Expected directive token for {input}");
}
}
}
#[test]
fn test_symbols() {
let test_cases = [
("my_symbol", "my_symbol"),
("_private", "_private"),
("Symbol123", "Symbol123"),
("camelCase", "camelCase"),
];
for (input, expected) in &test_cases {
let tokens = tokenize_source(input).expect("Failed to tokenize symbol");
if let TokenType::Symbol(symbol) = &tokens[0].token_type {
assert_eq!(symbol.name, *expected);
} else {
panic!("Expected symbol token for {input}");
}
}
}