Files
damn_simple_architecture/assembler/src/source/tokeniser/tests.rs
T

419 lines
12 KiB
Rust

//! Unit tests for the tokenizer
use common::prelude::Register;
use crate::{
model::module::Module,
source::{
opcode::Opcode,
token::{Token, TokenType},
token_info::RegisterToken,
tokeniser::Tokeniser,
},
};
use std::{path::PathBuf, sync::Arc};
/// Helper function to create a tokenizer from source text
fn create_tokenizer_from_source(source: &str) -> Tokeniser {
let path = PathBuf::from("test.dsa");
let module = Module::new(path).expect("Cannot create module!");
Tokeniser::from_data(source.as_bytes().to_vec(), Arc::new(module))
}
/// Helper function to tokenize source and return tokens
fn tokenize_source(source: &str) -> Result<Vec<Token>, crate::error::AssembleError> {
let tokenizer = create_tokenizer_from_source(source);
tokenizer.tokenise()
}
/// Helper function to extract token types from a token vector
fn extract_token_types(tokens: &[Token]) -> Vec<&TokenType> {
tokens.iter().map(|t| &t.token_type).collect()
}
#[test]
fn test_empty_source() {
let tokens = tokenize_source("").expect("Failed to tokenize empty source");
// Should have at least EOF token
assert!(!tokens.is_empty());
assert!(matches!(
tokens
.last()
.expect("Expected at least one token")
.token_type,
TokenType::Eof
));
}
#[test]
fn test_whitespace_only() {
let tokens = tokenize_source(" \n \n ").expect("Failed to tokenize whitespace");
// Should have newlines and EOF
let token_types = extract_token_types(&tokens);
assert!(token_types.iter().any(|t| matches!(t, TokenType::Newline)));
assert!(token_types.iter().any(|t| matches!(t, TokenType::Eof)));
}
#[test]
fn test_single_instruction() {
let tokens = tokenize_source("add").expect("Failed to tokenize instruction");
let token_types = extract_token_types(&tokens);
// Should have instruction, newline, and EOF
assert!(
token_types
.iter()
.any(|t| matches!(t, TokenType::Instruction(_)))
);
if let TokenType::Instruction(instr) = &tokens[0].token_type {
assert_eq!(instr.to_string(), "add");
} else {
panic!("Expected instruction token");
}
}
#[test]
fn test_all_instructions() {
let instructions = ["add", "sub", "jmp", "call", "return", "lli", "nop", "hlt"];
for instr in &instructions {
let tokens = tokenize_source(instr).expect("Failed to tokenize instruction");
if let TokenType::Instruction(parsed_instr) = &tokens[0].token_type {
assert_eq!(parsed_instr.to_string(), *instr);
} else {
panic!("Expected instruction token for {instr}");
}
}
}
#[test]
fn test_registers() {
let test_cases = [("rg0", "rg0"), ("rgf", "rgf"), ("pcx", "pcx")];
for (input, expected) in &test_cases {
let tokens = tokenize_source(input).expect("Failed to tokenize register");
if let TokenType::Register(reg) = &tokens[0].token_type {
assert_eq!(reg.reg.to_string(), *expected);
} else {
panic!("Expected register token for {input}");
}
}
}
#[test]
fn test_immediates() {
let test_cases = [
("42", 42),
("0", 0),
("0xFF", 255),
("0x1234", 0x1234),
("0xDEADBEEF", 0xDEAD_BEEF),
("0o12", 0o12),
("0b101", 0b101),
];
for (input, expected) in &test_cases {
let tokens = tokenize_source(input).expect("Failed to tokenize immediate");
if let TokenType::Immediate(value) = &tokens[0].token_type {
assert_eq!(*value, *expected);
} else {
panic!("Expected immediate token for {input}");
}
}
}
#[test]
fn test_labels() {
let test_cases = [
("loop_start:", "loop_start"),
("main:", "main"),
("_private_label:", "_private_label"),
("Label123:", "Label123"),
];
for (input, expected) in &test_cases {
let tokens = tokenize_source(input).expect("Failed to tokenize label");
if let TokenType::Label(label) = &tokens[0].token_type {
assert_eq!(label.name, *expected);
} else {
panic!("Expected label token for {input}");
}
}
}
#[test]
fn test_directives() {
let test_cases = [
("global", "global"),
("section", "section"),
("local", "local"),
];
for (input, expected) in &test_cases {
let tokens = tokenize_source(input).expect("Failed to tokenize directive");
if let TokenType::Directive(directive) = &tokens[0].token_type {
assert_eq!(directive.directive, *expected);
} else {
panic!("Expected directive token for {input}");
}
}
}
#[test]
fn test_symbols() {
let test_cases = [
("my_symbol", "my_symbol"),
("_private", "_private"),
("Symbol123", "Symbol123"),
("camelCase", "camelCase"),
];
for (input, expected) in &test_cases {
let tokens = tokenize_source(input).expect("Failed to tokenize symbol");
if let TokenType::Symbol(symbol) = &tokens[0].token_type {
assert_eq!(symbol.name, *expected);
} else {
panic!("Expected symbol token for {input}");
}
}
}
#[test]
fn test_complex_instruction_line() {
let source = "addi rg1, rg2, 0xFF";
let tokens = tokenize_source(source).expect("Failed to tokenise complex instruction");
// Should have: instruction, register, comma, register, comma, immediate, newline, EOF
assert!(tokens.len() >= 6);
assert!(matches!(tokens[0].token_type, TokenType::Instruction(_)));
assert!(matches!(tokens[1].token_type, TokenType::Register(_)));
assert!(matches!(tokens[2].token_type, TokenType::Comma));
assert!(matches!(tokens[3].token_type, TokenType::Register(_)));
assert!(matches!(tokens[4].token_type, TokenType::Comma));
assert!(matches!(tokens[5].token_type, TokenType::Immediate(_)));
}
#[test]
fn test_multiline_with_comments() {
const EXPECTED_TOKEN_TYPES: [TokenType; 11] = [
TokenType::Instruction(Opcode::Add),
TokenType::Register(RegisterToken::new(Register::Rg0)),
TokenType::Comma,
TokenType::Register(RegisterToken::new(Register::Rg1)),
TokenType::Newline,
TokenType::Instruction(Opcode::SubI),
TokenType::Register(RegisterToken::new(Register::Rg2)),
TokenType::Comma,
TokenType::Immediate(10),
TokenType::Newline,
TokenType::Eof,
];
const SOURCE: &str = r"add rg0, rg1 // Another comment
subi rg2, 10";
let tokens =
tokenize_source(SOURCE).expect("Failed to tokenise source with comments");
let token_types = extract_token_types(&tokens);
assert_eq!(
token_types.len(),
EXPECTED_TOKEN_TYPES.len(),
"{token_types:#?}"
);
for (expected, got) in EXPECTED_TOKEN_TYPES.iter().zip(token_types.iter()) {
assert!(!(expected != *got), "Expected {expected:?}, got {got:?}");
}
}
#[test]
fn test_tokenise_brainf_interpreter() {
const SOURCE: &str = include_str!("../../../../resources/dsa/bf.dsa");
let tokens =
tokenize_source(SOURCE).expect("Failed to tokenise the brainfuck compiler!");
dbg!(tokens);
}
#[test]
fn test_string_literals() {
let test_cases = [
(r#""hello world""#, "hello world"),
(
r#""++++++++++++++++++++++++++++++++++++++++++++""#,
"++++++++++++++++++++++++++++++++++++++++++++",
),
(r#""Invalid Instruction!""#, "Invalid Instruction!"),
(r#""""#, ""),
];
for (input, expected) in &test_cases {
let tokens = tokenize_source(input).expect("Failed to tokenize string literal");
if let TokenType::String(value) = &tokens[0].token_type {
assert_eq!(value, expected);
} else {
panic!("Expected string token for {input}");
}
}
}
#[test]
fn test_data_directives() {
let test_cases = [("db", "db"), ("dw", "dw"), ("resb", "resb")];
for (input, expected) in &test_cases {
let tokens = tokenize_source(input).expect("Failed to tokenize data declaration");
if let TokenType::Directive(decl) = &tokens[0].token_type {
assert_eq!(decl.directive, *expected);
} else {
panic!("Expected data declaration token for {input}");
}
}
}
#[test]
fn test_include_directive() {
let source = r#"include print "./lib/print.dsa""#;
let tokens = tokenize_source(source).expect("Failed to tokenize include directive");
assert!(tokens.len() >= 3);
assert!(matches!(tokens[0].token_type, TokenType::Directive(_)));
assert!(matches!(tokens[1].token_type, TokenType::Symbol(_)));
assert!(matches!(tokens[2].token_type, TokenType::String(_)));
}
#[test]
fn test_hex_addresses() {
let test_cases = [("0x10000", 0x10000), ("0x30000", 0x30000)];
for (input, expected) in &test_cases {
let tokens = tokenize_source(input).expect("Failed to tokenize hex address");
if let TokenType::Immediate(value) = &tokens[0].token_type {
assert_eq!(*value, *expected);
} else {
panic!("Expected immediate token for {input}");
}
}
}
#[test]
fn test_memory_operations() {
let source = "ldw rg1, rg2";
let tokens = tokenize_source(source).expect("Failed to tokenize memory operation");
assert!(tokens.len() >= 4);
assert!(matches!(tokens[0].token_type, TokenType::Instruction(_)));
assert!(matches!(tokens[1].token_type, TokenType::Register(_)));
assert!(matches!(tokens[2].token_type, TokenType::Comma));
assert!(matches!(tokens[3].token_type, TokenType::Register(_)));
}
#[test]
fn test_function_calls() {
let source = "call print::print";
let tokens = tokenize_source(source).expect("Failed to tokenize function call");
assert!(tokens.len() >= 2);
assert!(matches!(tokens[0].token_type, TokenType::Instruction(_)));
// The symbol might be parsed differently depending on how :: is handled
// This test checks basic structure
assert!(
tokens
.iter()
.any(|t| matches!(t.token_type, TokenType::Symbol(_)))
);
}
#[test]
fn test_comments_are_ignored() {
let source = "add rg0, rg1 // this is a comment\nsub rg2, rg3";
let tokens = tokenize_source(source).expect("Failed to tokenize with comments");
// Comments should be stripped, so we should only have instruction tokens
let instruction_count = tokens
.iter()
.filter(|t| matches!(t.token_type, TokenType::Instruction(_)))
.count();
assert_eq!(instruction_count, 2);
}
#[test]
fn test_newline_always_present() {
// Test that even without explicit newline at end, one is added
let source = "add rg0, rg1"; // No newline at end
let tokens = tokenize_source(source).expect("Failed to tokenize without newline");
// Should have newline before EOF
let has_newline = tokens
.iter()
.any(|t| matches!(t.token_type, TokenType::Newline));
assert!(
has_newline,
"Expected newline to be added even when missing from input"
);
// EOF should be last.
assert!(matches!(
tokens
.last()
.expect("Expected at least one token")
.token_type,
TokenType::Eof
));
}
#[test]
fn test_complex_branching_code() {
let source = r"
cmp rg3, rg8
jeq increment
cmp rg3, rg9
jeq decrement";
let tokens = tokenize_source(source).expect("Failed to tokenize branching code");
let instruction_count = tokens
.iter()
.filter(|t| matches!(t.token_type, TokenType::Instruction(_)))
.count();
assert_eq!(instruction_count, 4);
let symbol_count = tokens
.iter()
.filter(|t| matches!(t.token_type, TokenType::Symbol(_)))
.count();
assert_eq!(symbol_count, 2); // increment and decrement labels
}
#[test]
fn test_stack_operations() {
let source = "push rg2\npop zero\npusha 2\npopa 2";
let tokens = tokenize_source(source).expect("Failed to tokenize stack operations");
let instruction_count = tokens
.iter()
.filter(|t| matches!(t.token_type, TokenType::Instruction(_)))
.count();
assert_eq!(instruction_count, 4);
}