//! Unit tests for the tokenizer use common::prelude::Register; use crate::{ model::module::Module, source::{ opcode::Opcode, token::{Token, TokenType}, token_info::RegisterToken, tokeniser::Tokeniser, }, }; use std::{path::PathBuf, sync::Arc}; /// Helper function to create a tokenizer from source text fn create_tokenizer_from_source(source: &str) -> Tokeniser { let path = PathBuf::from("test.dsa"); let module = Module::new(path).expect("Cannot create module!"); Tokeniser::from_data(source.as_bytes().to_vec(), Arc::new(module)) } /// Helper function to tokenize source and return tokens fn tokenize_source(source: &str) -> Result, crate::error::AssembleError> { let tokenizer = create_tokenizer_from_source(source); tokenizer.tokenise() } /// Helper function to extract token types from a token vector fn extract_token_types(tokens: &[Token]) -> Vec<&TokenType> { tokens.iter().map(|t| &t.token_type).collect() } #[test] fn test_empty_source() { let tokens = tokenize_source("").expect("Failed to tokenize empty source"); // Should have at least EOF token assert!(!tokens.is_empty()); assert!(matches!( tokens .last() .expect("Expected at least one token") .token_type, TokenType::Eof )); } #[test] fn test_whitespace_only() { let tokens = tokenize_source(" \n \n ").expect("Failed to tokenize whitespace"); // Should have newlines and EOF let token_types = extract_token_types(&tokens); assert!(token_types.iter().any(|t| matches!(t, TokenType::Newline))); assert!(token_types.iter().any(|t| matches!(t, TokenType::Eof))); } #[test] fn test_single_instruction() { let tokens = tokenize_source("add").expect("Failed to tokenize instruction"); let token_types = extract_token_types(&tokens); // Should have instruction, newline, and EOF assert!( token_types .iter() .any(|t| matches!(t, TokenType::Instruction(_))) ); if let TokenType::Instruction(instr) = &tokens[0].token_type { assert_eq!(instr.to_string(), "add"); } else { panic!("Expected instruction token"); } } #[test] fn test_all_instructions() { let instructions = ["add", "sub", "jmp", "call", "return", "lli", "nop", "hlt"]; for instr in &instructions { let tokens = tokenize_source(instr).expect("Failed to tokenize instruction"); if let TokenType::Instruction(parsed_instr) = &tokens[0].token_type { assert_eq!(parsed_instr.to_string(), *instr); } else { panic!("Expected instruction token for {instr}"); } } } #[test] fn test_registers() { let test_cases = [("rg0", "rg0"), ("rgf", "rgf"), ("pcx", "pcx")]; for (input, expected) in &test_cases { let tokens = tokenize_source(input).expect("Failed to tokenize register"); if let TokenType::Register(reg) = &tokens[0].token_type { assert_eq!(reg.reg.to_string(), *expected); } else { panic!("Expected register token for {input}"); } } } #[test] fn test_immediates() { let test_cases = [ ("42", 42), ("0", 0), ("0xFF", 255), ("0x1234", 0x1234), ("0xDEADBEEF", 0xDEAD_BEEF), ("0o12", 0o12), ("0b101", 0b101), ]; for (input, expected) in &test_cases { let tokens = tokenize_source(input).expect("Failed to tokenize immediate"); if let TokenType::Immediate(value) = &tokens[0].token_type { assert_eq!(*value, *expected); } else { panic!("Expected immediate token for {input}"); } } } #[test] fn test_labels() { let test_cases = [ ("loop_start:", "loop_start"), ("main:", "main"), ("_private_label:", "_private_label"), ("Label123:", "Label123"), ]; for (input, expected) in &test_cases { let tokens = tokenize_source(input).expect("Failed to tokenize label"); if let TokenType::Label(label) = &tokens[0].token_type { assert_eq!(label.name, *expected); } else { panic!("Expected label token for {input}"); } } } #[test] fn test_directives() { let test_cases = [ ("global", "global"), ("section", "section"), ("local", "local"), ]; for (input, expected) in &test_cases { let tokens = tokenize_source(input).expect("Failed to tokenize directive"); if let TokenType::Directive(directive) = &tokens[0].token_type { assert_eq!(directive.directive, *expected); } else { panic!("Expected directive token for {input}"); } } } #[test] fn test_symbols() { let test_cases = [ ("my_symbol", "my_symbol"), ("_private", "_private"), ("Symbol123", "Symbol123"), ("camelCase", "camelCase"), ]; for (input, expected) in &test_cases { let tokens = tokenize_source(input).expect("Failed to tokenize symbol"); if let TokenType::Symbol(symbol) = &tokens[0].token_type { assert_eq!(symbol.name, *expected); } else { panic!("Expected symbol token for {input}"); } } } #[test] fn test_complex_instruction_line() { let source = "addi rg1, rg2, 0xFF"; let tokens = tokenize_source(source).expect("Failed to tokenise complex instruction"); // Should have: instruction, register, comma, register, comma, immediate, newline, EOF assert!(tokens.len() >= 6); assert!(matches!(tokens[0].token_type, TokenType::Instruction(_))); assert!(matches!(tokens[1].token_type, TokenType::Register(_))); assert!(matches!(tokens[2].token_type, TokenType::Comma)); assert!(matches!(tokens[3].token_type, TokenType::Register(_))); assert!(matches!(tokens[4].token_type, TokenType::Comma)); assert!(matches!(tokens[5].token_type, TokenType::Immediate(_))); } #[test] fn test_multiline_with_comments() { const EXPECTED_TOKEN_TYPES: [TokenType; 11] = [ TokenType::Instruction(Opcode::Add), TokenType::Register(RegisterToken::new(Register::Rg0)), TokenType::Comma, TokenType::Register(RegisterToken::new(Register::Rg1)), TokenType::Newline, TokenType::Instruction(Opcode::SubI), TokenType::Register(RegisterToken::new(Register::Rg2)), TokenType::Comma, TokenType::Immediate(10), TokenType::Newline, TokenType::Eof, ]; const SOURCE: &str = r"add rg0, rg1 // Another comment subi rg2, 10"; let tokens = tokenize_source(SOURCE).expect("Failed to tokenise source with comments"); let token_types = extract_token_types(&tokens); assert_eq!( token_types.len(), EXPECTED_TOKEN_TYPES.len(), "{token_types:#?}" ); for (expected, got) in EXPECTED_TOKEN_TYPES.iter().zip(token_types.iter()) { assert!(!(expected != *got), "Expected {expected:?}, got {got:?}"); } } #[test] fn test_tokenise_brainf_interpreter() { const SOURCE: &str = include_str!("../../../../resources/dsa/bf.dsa"); let tokens = tokenize_source(SOURCE).expect("Failed to tokenise the brainfuck compiler!"); dbg!(tokens); } #[test] fn test_string_literals() { let test_cases = [ (r#""hello world""#, "hello world"), ( r#""++++++++++++++++++++++++++++++++++++++++++++""#, "++++++++++++++++++++++++++++++++++++++++++++", ), (r#""Invalid Instruction!""#, "Invalid Instruction!"), (r#""""#, ""), ]; for (input, expected) in &test_cases { let tokens = tokenize_source(input).expect("Failed to tokenize string literal"); if let TokenType::String(value) = &tokens[0].token_type { assert_eq!(value, expected); } else { panic!("Expected string token for {input}"); } } } #[test] fn test_data_directives() { let test_cases = [("db", "db"), ("dw", "dw"), ("resb", "resb")]; for (input, expected) in &test_cases { let tokens = tokenize_source(input).expect("Failed to tokenize data declaration"); if let TokenType::Directive(decl) = &tokens[0].token_type { assert_eq!(decl.directive, *expected); } else { panic!("Expected data declaration token for {input}"); } } } #[test] fn test_include_directive() { let source = r#"include print "./lib/print.dsa""#; let tokens = tokenize_source(source).expect("Failed to tokenize include directive"); assert!(tokens.len() >= 3); assert!(matches!(tokens[0].token_type, TokenType::Directive(_))); assert!(matches!(tokens[1].token_type, TokenType::Symbol(_))); assert!(matches!(tokens[2].token_type, TokenType::String(_))); } #[test] fn test_hex_addresses() { let test_cases = [("0x10000", 0x10000), ("0x30000", 0x30000)]; for (input, expected) in &test_cases { let tokens = tokenize_source(input).expect("Failed to tokenize hex address"); if let TokenType::Immediate(value) = &tokens[0].token_type { assert_eq!(*value, *expected); } else { panic!("Expected immediate token for {input}"); } } } #[test] fn test_memory_operations() { let source = "ldw rg1, rg2"; let tokens = tokenize_source(source).expect("Failed to tokenize memory operation"); assert!(tokens.len() >= 4); assert!(matches!(tokens[0].token_type, TokenType::Instruction(_))); assert!(matches!(tokens[1].token_type, TokenType::Register(_))); assert!(matches!(tokens[2].token_type, TokenType::Comma)); assert!(matches!(tokens[3].token_type, TokenType::Register(_))); } #[test] fn test_function_calls() { let source = "call print::print"; let tokens = tokenize_source(source).expect("Failed to tokenize function call"); assert!(tokens.len() >= 2); assert!(matches!(tokens[0].token_type, TokenType::Instruction(_))); // The symbol might be parsed differently depending on how :: is handled // This test checks basic structure assert!( tokens .iter() .any(|t| matches!(t.token_type, TokenType::Symbol(_))) ); } #[test] fn test_comments_are_ignored() { let source = "add rg0, rg1 // this is a comment\nsub rg2, rg3"; let tokens = tokenize_source(source).expect("Failed to tokenize with comments"); // Comments should be stripped, so we should only have instruction tokens let instruction_count = tokens .iter() .filter(|t| matches!(t.token_type, TokenType::Instruction(_))) .count(); assert_eq!(instruction_count, 2); } #[test] fn test_newline_always_present() { // Test that even without explicit newline at end, one is added let source = "add rg0, rg1"; // No newline at end let tokens = tokenize_source(source).expect("Failed to tokenize without newline"); // Should have newline before EOF let has_newline = tokens .iter() .any(|t| matches!(t.token_type, TokenType::Newline)); assert!( has_newline, "Expected newline to be added even when missing from input" ); // EOF should be last. assert!(matches!( tokens .last() .expect("Expected at least one token") .token_type, TokenType::Eof )); } #[test] fn test_complex_branching_code() { let source = r" cmp rg3, rg8 jeq increment cmp rg3, rg9 jeq decrement"; let tokens = tokenize_source(source).expect("Failed to tokenize branching code"); let instruction_count = tokens .iter() .filter(|t| matches!(t.token_type, TokenType::Instruction(_))) .count(); assert_eq!(instruction_count, 4); let symbol_count = tokens .iter() .filter(|t| matches!(t.token_type, TokenType::Symbol(_))) .count(); assert_eq!(symbol_count, 2); // increment and decrement labels } #[test] fn test_stack_operations() { let source = "push rg2\npop zero\npusha 2\npopa 2"; let tokens = tokenize_source(source).expect("Failed to tokenize stack operations"); let instruction_count = tokens .iter() .filter(|t| matches!(t.token_type, TokenType::Instruction(_))) .count(); assert_eq!(instruction_count, 4); }