419 lines
12 KiB
Rust
419 lines
12 KiB
Rust
//! Unit tests for the tokenizer
|
|
|
|
use common::prelude::Register;
|
|
|
|
use crate::{
|
|
model::module::Module,
|
|
source::{
|
|
opcode::Opcode,
|
|
token::{Token, TokenType},
|
|
token_info::RegisterToken,
|
|
tokeniser::Tokeniser,
|
|
},
|
|
};
|
|
use std::{path::PathBuf, sync::Arc};
|
|
|
|
/// Helper function to create a tokenizer from source text
|
|
fn create_tokenizer_from_source(source: &str) -> Tokeniser {
|
|
let path = PathBuf::from("test.dsa");
|
|
let module = Module::new(path).expect("Cannot create module!");
|
|
|
|
Tokeniser::from_data(source.as_bytes().to_vec(), Arc::new(module))
|
|
}
|
|
|
|
/// Helper function to tokenize source and return tokens
|
|
fn tokenize_source(source: &str) -> Result<Vec<Token>, crate::error::AssembleError> {
|
|
let tokenizer = create_tokenizer_from_source(source);
|
|
|
|
tokenizer.tokenise()
|
|
}
|
|
|
|
/// Helper function to extract token types from a token vector
|
|
fn extract_token_types(tokens: &[Token]) -> Vec<&TokenType> {
|
|
tokens.iter().map(|t| &t.token_type).collect()
|
|
}
|
|
|
|
#[test]
|
|
fn test_empty_source() {
|
|
let tokens = tokenize_source("").expect("Failed to tokenize empty source");
|
|
|
|
// Should have at least EOF token
|
|
assert!(!tokens.is_empty());
|
|
assert!(matches!(
|
|
tokens
|
|
.last()
|
|
.expect("Expected at least one token")
|
|
.token_type,
|
|
TokenType::Eof
|
|
));
|
|
}
|
|
|
|
#[test]
|
|
fn test_whitespace_only() {
|
|
let tokens = tokenize_source(" \n \n ").expect("Failed to tokenize whitespace");
|
|
|
|
// Should have newlines and EOF
|
|
let token_types = extract_token_types(&tokens);
|
|
assert!(token_types.iter().any(|t| matches!(t, TokenType::Newline)));
|
|
assert!(token_types.iter().any(|t| matches!(t, TokenType::Eof)));
|
|
}
|
|
|
|
#[test]
|
|
fn test_single_instruction() {
|
|
let tokens = tokenize_source("add").expect("Failed to tokenize instruction");
|
|
let token_types = extract_token_types(&tokens);
|
|
|
|
// Should have instruction, newline, and EOF
|
|
assert!(
|
|
token_types
|
|
.iter()
|
|
.any(|t| matches!(t, TokenType::Instruction(_)))
|
|
);
|
|
if let TokenType::Instruction(instr) = &tokens[0].token_type {
|
|
assert_eq!(instr.to_string(), "add");
|
|
} else {
|
|
panic!("Expected instruction token");
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_all_instructions() {
|
|
let instructions = ["add", "sub", "jmp", "call", "return", "lli", "nop", "hlt"];
|
|
|
|
for instr in &instructions {
|
|
let tokens = tokenize_source(instr).expect("Failed to tokenize instruction");
|
|
|
|
if let TokenType::Instruction(parsed_instr) = &tokens[0].token_type {
|
|
assert_eq!(parsed_instr.to_string(), *instr);
|
|
} else {
|
|
panic!("Expected instruction token for {instr}");
|
|
}
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_registers() {
|
|
let test_cases = [("rg0", "rg0"), ("rgf", "rgf"), ("pcx", "pcx")];
|
|
|
|
for (input, expected) in &test_cases {
|
|
let tokens = tokenize_source(input).expect("Failed to tokenize register");
|
|
|
|
if let TokenType::Register(reg) = &tokens[0].token_type {
|
|
assert_eq!(reg.reg.to_string(), *expected);
|
|
} else {
|
|
panic!("Expected register token for {input}");
|
|
}
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_immediates() {
|
|
let test_cases = [
|
|
("42", 42),
|
|
("0", 0),
|
|
("0xFF", 255),
|
|
("0x1234", 0x1234),
|
|
("0xDEADBEEF", 0xDEAD_BEEF),
|
|
("0o12", 0o12),
|
|
("0b101", 0b101),
|
|
];
|
|
|
|
for (input, expected) in &test_cases {
|
|
let tokens = tokenize_source(input).expect("Failed to tokenize immediate");
|
|
|
|
if let TokenType::Immediate(value) = &tokens[0].token_type {
|
|
assert_eq!(*value, *expected);
|
|
} else {
|
|
panic!("Expected immediate token for {input}");
|
|
}
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_labels() {
|
|
let test_cases = [
|
|
("loop_start:", "loop_start"),
|
|
("main:", "main"),
|
|
("_private_label:", "_private_label"),
|
|
("Label123:", "Label123"),
|
|
];
|
|
|
|
for (input, expected) in &test_cases {
|
|
let tokens = tokenize_source(input).expect("Failed to tokenize label");
|
|
|
|
if let TokenType::Label(label) = &tokens[0].token_type {
|
|
assert_eq!(label.name, *expected);
|
|
} else {
|
|
panic!("Expected label token for {input}");
|
|
}
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_directives() {
|
|
let test_cases = [
|
|
("global", "global"),
|
|
("section", "section"),
|
|
("local", "local"),
|
|
];
|
|
|
|
for (input, expected) in &test_cases {
|
|
let tokens = tokenize_source(input).expect("Failed to tokenize directive");
|
|
|
|
if let TokenType::Directive(directive) = &tokens[0].token_type {
|
|
assert_eq!(directive.directive, *expected);
|
|
} else {
|
|
panic!("Expected directive token for {input}");
|
|
}
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_symbols() {
|
|
let test_cases = [
|
|
("my_symbol", "my_symbol"),
|
|
("_private", "_private"),
|
|
("Symbol123", "Symbol123"),
|
|
("camelCase", "camelCase"),
|
|
];
|
|
|
|
for (input, expected) in &test_cases {
|
|
let tokens = tokenize_source(input).expect("Failed to tokenize symbol");
|
|
|
|
if let TokenType::Symbol(symbol) = &tokens[0].token_type {
|
|
assert_eq!(symbol.name, *expected);
|
|
} else {
|
|
panic!("Expected symbol token for {input}");
|
|
}
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_complex_instruction_line() {
|
|
let source = "addi rg1, rg2, 0xFF";
|
|
let tokens = tokenize_source(source).expect("Failed to tokenise complex instruction");
|
|
|
|
// Should have: instruction, register, comma, register, comma, immediate, newline, EOF
|
|
assert!(tokens.len() >= 6);
|
|
assert!(matches!(tokens[0].token_type, TokenType::Instruction(_)));
|
|
assert!(matches!(tokens[1].token_type, TokenType::Register(_)));
|
|
assert!(matches!(tokens[2].token_type, TokenType::Comma));
|
|
assert!(matches!(tokens[3].token_type, TokenType::Register(_)));
|
|
assert!(matches!(tokens[4].token_type, TokenType::Comma));
|
|
assert!(matches!(tokens[5].token_type, TokenType::Immediate(_)));
|
|
}
|
|
|
|
#[test]
|
|
fn test_multiline_with_comments() {
|
|
const EXPECTED_TOKEN_TYPES: [TokenType; 11] = [
|
|
TokenType::Instruction(Opcode::Add),
|
|
TokenType::Register(RegisterToken::new(Register::Rg0)),
|
|
TokenType::Comma,
|
|
TokenType::Register(RegisterToken::new(Register::Rg1)),
|
|
TokenType::Newline,
|
|
TokenType::Instruction(Opcode::SubI),
|
|
TokenType::Register(RegisterToken::new(Register::Rg2)),
|
|
TokenType::Comma,
|
|
TokenType::Immediate(10),
|
|
TokenType::Newline,
|
|
TokenType::Eof,
|
|
];
|
|
|
|
const SOURCE: &str = r"add rg0, rg1 // Another comment
|
|
subi rg2, 10";
|
|
|
|
let tokens =
|
|
tokenize_source(SOURCE).expect("Failed to tokenise source with comments");
|
|
let token_types = extract_token_types(&tokens);
|
|
|
|
assert_eq!(
|
|
token_types.len(),
|
|
EXPECTED_TOKEN_TYPES.len(),
|
|
"{token_types:#?}"
|
|
);
|
|
|
|
for (expected, got) in EXPECTED_TOKEN_TYPES.iter().zip(token_types.iter()) {
|
|
assert!(!(expected != *got), "Expected {expected:?}, got {got:?}");
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_tokenise_brainf_interpreter() {
|
|
const SOURCE: &str = include_str!("../../../../resources/dsa/bf.dsa");
|
|
|
|
let tokens =
|
|
tokenize_source(SOURCE).expect("Failed to tokenise the brainfuck compiler!");
|
|
|
|
dbg!(tokens);
|
|
}
|
|
|
|
#[test]
|
|
fn test_string_literals() {
|
|
let test_cases = [
|
|
(r#""hello world""#, "hello world"),
|
|
(
|
|
r#""++++++++++++++++++++++++++++++++++++++++++++""#,
|
|
"++++++++++++++++++++++++++++++++++++++++++++",
|
|
),
|
|
(r#""Invalid Instruction!""#, "Invalid Instruction!"),
|
|
(r#""""#, ""),
|
|
];
|
|
|
|
for (input, expected) in &test_cases {
|
|
let tokens = tokenize_source(input).expect("Failed to tokenize string literal");
|
|
|
|
if let TokenType::String(value) = &tokens[0].token_type {
|
|
assert_eq!(value, expected);
|
|
} else {
|
|
panic!("Expected string token for {input}");
|
|
}
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_data_directives() {
|
|
let test_cases = [("db", "db"), ("dw", "dw"), ("resb", "resb")];
|
|
|
|
for (input, expected) in &test_cases {
|
|
let tokens = tokenize_source(input).expect("Failed to tokenize data declaration");
|
|
|
|
if let TokenType::Directive(decl) = &tokens[0].token_type {
|
|
assert_eq!(decl.directive, *expected);
|
|
} else {
|
|
panic!("Expected data declaration token for {input}");
|
|
}
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_include_directive() {
|
|
let source = r#"include print "./lib/print.dsa""#;
|
|
let tokens = tokenize_source(source).expect("Failed to tokenize include directive");
|
|
|
|
assert!(tokens.len() >= 3);
|
|
assert!(matches!(tokens[0].token_type, TokenType::Directive(_)));
|
|
assert!(matches!(tokens[1].token_type, TokenType::Symbol(_)));
|
|
assert!(matches!(tokens[2].token_type, TokenType::String(_)));
|
|
}
|
|
|
|
#[test]
|
|
fn test_hex_addresses() {
|
|
let test_cases = [("0x10000", 0x10000), ("0x30000", 0x30000)];
|
|
|
|
for (input, expected) in &test_cases {
|
|
let tokens = tokenize_source(input).expect("Failed to tokenize hex address");
|
|
|
|
if let TokenType::Immediate(value) = &tokens[0].token_type {
|
|
assert_eq!(*value, *expected);
|
|
} else {
|
|
panic!("Expected immediate token for {input}");
|
|
}
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_memory_operations() {
|
|
let source = "ldw rg1, rg2";
|
|
let tokens = tokenize_source(source).expect("Failed to tokenize memory operation");
|
|
|
|
assert!(tokens.len() >= 4);
|
|
assert!(matches!(tokens[0].token_type, TokenType::Instruction(_)));
|
|
assert!(matches!(tokens[1].token_type, TokenType::Register(_)));
|
|
assert!(matches!(tokens[2].token_type, TokenType::Comma));
|
|
assert!(matches!(tokens[3].token_type, TokenType::Register(_)));
|
|
}
|
|
|
|
#[test]
|
|
fn test_function_calls() {
|
|
let source = "call print::print";
|
|
let tokens = tokenize_source(source).expect("Failed to tokenize function call");
|
|
|
|
assert!(tokens.len() >= 2);
|
|
assert!(matches!(tokens[0].token_type, TokenType::Instruction(_)));
|
|
// The symbol might be parsed differently depending on how :: is handled
|
|
// This test checks basic structure
|
|
assert!(
|
|
tokens
|
|
.iter()
|
|
.any(|t| matches!(t.token_type, TokenType::Symbol(_)))
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_comments_are_ignored() {
|
|
let source = "add rg0, rg1 // this is a comment\nsub rg2, rg3";
|
|
let tokens = tokenize_source(source).expect("Failed to tokenize with comments");
|
|
|
|
// Comments should be stripped, so we should only have instruction tokens
|
|
let instruction_count = tokens
|
|
.iter()
|
|
.filter(|t| matches!(t.token_type, TokenType::Instruction(_)))
|
|
.count();
|
|
|
|
assert_eq!(instruction_count, 2);
|
|
}
|
|
|
|
#[test]
|
|
fn test_newline_always_present() {
|
|
// Test that even without explicit newline at end, one is added
|
|
let source = "add rg0, rg1"; // No newline at end
|
|
let tokens = tokenize_source(source).expect("Failed to tokenize without newline");
|
|
|
|
// Should have newline before EOF
|
|
let has_newline = tokens
|
|
.iter()
|
|
.any(|t| matches!(t.token_type, TokenType::Newline));
|
|
|
|
assert!(
|
|
has_newline,
|
|
"Expected newline to be added even when missing from input"
|
|
);
|
|
|
|
// EOF should be last.
|
|
assert!(matches!(
|
|
tokens
|
|
.last()
|
|
.expect("Expected at least one token")
|
|
.token_type,
|
|
TokenType::Eof
|
|
));
|
|
}
|
|
|
|
#[test]
|
|
fn test_complex_branching_code() {
|
|
let source = r"
|
|
cmp rg3, rg8
|
|
jeq increment
|
|
cmp rg3, rg9
|
|
jeq decrement";
|
|
|
|
let tokens = tokenize_source(source).expect("Failed to tokenize branching code");
|
|
|
|
let instruction_count = tokens
|
|
.iter()
|
|
.filter(|t| matches!(t.token_type, TokenType::Instruction(_)))
|
|
.count();
|
|
|
|
assert_eq!(instruction_count, 4);
|
|
|
|
let symbol_count = tokens
|
|
.iter()
|
|
.filter(|t| matches!(t.token_type, TokenType::Symbol(_)))
|
|
.count();
|
|
|
|
assert_eq!(symbol_count, 2); // increment and decrement labels
|
|
}
|
|
|
|
#[test]
|
|
fn test_stack_operations() {
|
|
let source = "push rg2\npop zero\npusha 2\npopa 2";
|
|
let tokens = tokenize_source(source).expect("Failed to tokenize stack operations");
|
|
|
|
let instruction_count = tokens
|
|
.iter()
|
|
.filter(|t| matches!(t.token_type, TokenType::Instruction(_)))
|
|
.count();
|
|
|
|
assert_eq!(instruction_count, 4);
|
|
}
|