assembler: update tokeniser to allow extra prefixes and separators (0xDEAD_BEEF)
This commit is contained in:
@@ -0,0 +1,193 @@
|
||||
//! Unit tests for the tokenizer
|
||||
|
||||
use crate::{
|
||||
context::AssemblerContext,
|
||||
source::{
|
||||
token::{Token, TokenType},
|
||||
tokeniser::Tokeniser,
|
||||
},
|
||||
};
|
||||
use std::path::PathBuf;
|
||||
|
||||
/// Helper function to create a tokenizer from source text
|
||||
fn create_tokenizer_from_source(source: &str) -> Tokeniser {
|
||||
let data = source.as_bytes().to_vec();
|
||||
let path = PathBuf::from("test.dsa");
|
||||
Tokeniser::from_data(data, path)
|
||||
}
|
||||
|
||||
/// Helper function to tokenize source and return tokens
|
||||
fn tokenize_source(source: &str) -> Result<Vec<Token>, crate::error::AssembleError> {
|
||||
let tokenizer = create_tokenizer_from_source(source);
|
||||
let context = AssemblerContext::new();
|
||||
tokenizer.tokenise(&context)
|
||||
}
|
||||
|
||||
/// Helper function to extract token types from a token vector
|
||||
fn extract_token_types(tokens: &[Token]) -> Vec<&TokenType> {
|
||||
tokens.iter().map(|t| &t.token_type).collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_source() {
|
||||
let tokens = tokenize_source("").expect("Failed to tokenize empty source");
|
||||
|
||||
// Should have at least EOF token
|
||||
assert!(!tokens.is_empty());
|
||||
assert!(matches!(
|
||||
tokens
|
||||
.last()
|
||||
.expect("Expected at least one token")
|
||||
.token_type,
|
||||
TokenType::Eof
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_whitespace_only() {
|
||||
let tokens = tokenize_source(" \n \n ").expect("Failed to tokenize whitespace");
|
||||
|
||||
// Should have newlines and EOF
|
||||
let token_types = extract_token_types(&tokens);
|
||||
assert!(token_types.iter().any(|t| matches!(t, TokenType::Newline)));
|
||||
assert!(token_types.iter().any(|t| matches!(t, TokenType::Eof)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_single_instruction() {
|
||||
let tokens = tokenize_source("add").expect("Failed to tokenize instruction");
|
||||
let token_types = extract_token_types(&tokens);
|
||||
|
||||
// Should have instruction, newline, and EOF
|
||||
assert!(
|
||||
token_types
|
||||
.iter()
|
||||
.any(|t| matches!(t, TokenType::Instruction(_)))
|
||||
);
|
||||
if let TokenType::Instruction(instr) = &tokens[0].token_type {
|
||||
assert_eq!(instr.mnemonic, "add");
|
||||
} else {
|
||||
panic!("Expected instruction token");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_all_instructions() {
|
||||
let instructions = [
|
||||
"add", "sub", "mul", "div", "jmp", "call", "ret", "lli", "nop", "halt",
|
||||
];
|
||||
|
||||
for instr in &instructions {
|
||||
let tokens = tokenize_source(instr).expect("Failed to tokenize instruction");
|
||||
|
||||
if let TokenType::Instruction(parsed_instr) = &tokens[0].token_type {
|
||||
assert_eq!(parsed_instr.mnemonic, *instr);
|
||||
} else {
|
||||
panic!("Expected instruction token for {instr}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_registers() {
|
||||
let test_cases = [
|
||||
("r0", "r0"),
|
||||
("r15", "r15"),
|
||||
("sp", "sp"),
|
||||
("fp", "fp"),
|
||||
("pc", "pc"),
|
||||
];
|
||||
|
||||
for (input, expected) in &test_cases {
|
||||
let tokens = tokenize_source(input).expect("Failed to tokenize register");
|
||||
|
||||
if let TokenType::Register(reg) = &tokens[0].token_type {
|
||||
assert_eq!(reg.name, *expected);
|
||||
} else {
|
||||
panic!("Expected register token for {input}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_immediates() {
|
||||
let test_cases = [
|
||||
("42", 42),
|
||||
("0", 0),
|
||||
("0xFF", 255),
|
||||
("0x1234", 0x1234),
|
||||
("0xDEADBEEF", 0xDEAD_BEEF),
|
||||
("0o12", 0o12),
|
||||
("0b101", 0b101),
|
||||
];
|
||||
|
||||
for (input, expected) in &test_cases {
|
||||
let tokens = tokenize_source(input).expect("Failed to tokenize immediate");
|
||||
|
||||
if let TokenType::Immediate(value) = &tokens[0].token_type {
|
||||
assert_eq!(*value, *expected);
|
||||
} else {
|
||||
panic!("Expected immediate token for {input}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_labels() {
|
||||
let test_cases = [
|
||||
("loop_start:", "loop_start"),
|
||||
("main:", "main"),
|
||||
("_private_label:", "_private_label"),
|
||||
("Label123:", "Label123"),
|
||||
];
|
||||
|
||||
for (input, expected) in &test_cases {
|
||||
let tokens = tokenize_source(input).expect("Failed to tokenize label");
|
||||
|
||||
if let TokenType::Label(label) = &tokens[0].token_type {
|
||||
assert_eq!(label.name, *expected);
|
||||
} else {
|
||||
panic!("Expected label token for {input}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_directives() {
|
||||
let test_cases = [
|
||||
(".global", "global"),
|
||||
(".section", "section"),
|
||||
(".data", "data"),
|
||||
(".text", "text"),
|
||||
];
|
||||
|
||||
for (input, expected) in &test_cases {
|
||||
let tokens = tokenize_source(input).expect("Failed to tokenize directive");
|
||||
|
||||
if let TokenType::Directive(directive) = &tokens[0].token_type {
|
||||
assert_eq!(directive.directive, *expected);
|
||||
} else {
|
||||
panic!("Expected directive token for {input}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_symbols() {
|
||||
let test_cases = [
|
||||
("my_symbol", "my_symbol"),
|
||||
("_private", "_private"),
|
||||
("Symbol123", "Symbol123"),
|
||||
("camelCase", "camelCase"),
|
||||
];
|
||||
|
||||
for (input, expected) in &test_cases {
|
||||
let tokens = tokenize_source(input).expect("Failed to tokenize symbol");
|
||||
|
||||
if let TokenType::Symbol(symbol) = &tokens[0].token_type {
|
||||
assert_eq!(symbol.name, *expected);
|
||||
} else {
|
||||
panic!("Expected symbol token for {input}");
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user