diff --git a/assembler/src/source/tokeniser/tests.rs b/assembler/src/source/tokeniser/tests.rs index 392d8f5..e5092c7 100644 --- a/assembler/src/source/tokeniser/tests.rs +++ b/assembler/src/source/tokeniser/tests.rs @@ -246,3 +246,173 @@ fn test_tokenise_brainf_interpreter() { dbg!(tokens); } + +#[test] +fn test_string_literals() { + let test_cases = [ + (r#""hello world""#, "hello world"), + ( + r#""++++++++++++++++++++++++++++++++++++++++++++""#, + "++++++++++++++++++++++++++++++++++++++++++++", + ), + (r#""Invalid Instruction!""#, "Invalid Instruction!"), + (r#""""#, ""), + ]; + + for (input, expected) in &test_cases { + let tokens = tokenize_source(input).expect("Failed to tokenize string literal"); + + if let TokenType::String(value) = &tokens[0].token_type { + assert_eq!(value, expected); + } else { + panic!("Expected string token for {input}"); + } + } +} + +#[test] +fn test_data_directives() { + let test_cases = [("db", "db"), ("dw", "dw"), ("resb", "resb")]; + + for (input, expected) in &test_cases { + let tokens = tokenize_source(input).expect("Failed to tokenize data declaration"); + + if let TokenType::Directive(decl) = &tokens[0].token_type { + assert_eq!(decl.directive, *expected); + } else { + panic!("Expected data declaration token for {input}"); + } + } +} + +#[test] +fn test_include_directive() { + let source = r#"include print "./lib/print.dsa""#; + let tokens = tokenize_source(source).expect("Failed to tokenize include directive"); + + assert!(tokens.len() >= 3); + assert!(matches!(tokens[0].token_type, TokenType::Directive(_))); + assert!(matches!(tokens[1].token_type, TokenType::Symbol(_))); + assert!(matches!(tokens[2].token_type, TokenType::String(_))); +} + +#[test] +fn test_hex_addresses() { + let test_cases = [("0x10000", 0x10000), ("0x30000", 0x30000)]; + + for (input, expected) in &test_cases { + let tokens = tokenize_source(input).expect("Failed to tokenize hex address"); + + if let TokenType::Immediate(value) = &tokens[0].token_type { + assert_eq!(*value, *expected); + } else { + panic!("Expected immediate token for {input}"); + } + } +} + +#[test] +fn test_memory_operations() { + let source = "ldw rg1, rg2"; + let tokens = tokenize_source(source).expect("Failed to tokenize memory operation"); + + assert!(tokens.len() >= 4); + assert!(matches!(tokens[0].token_type, TokenType::Instruction(_))); + assert!(matches!(tokens[1].token_type, TokenType::Register(_))); + assert!(matches!(tokens[2].token_type, TokenType::Comma)); + assert!(matches!(tokens[3].token_type, TokenType::Register(_))); +} + +#[test] +fn test_function_calls() { + let source = "call print::print"; + let tokens = tokenize_source(source).expect("Failed to tokenize function call"); + + assert!(tokens.len() >= 2); + assert!(matches!(tokens[0].token_type, TokenType::Instruction(_))); + // The symbol might be parsed differently depending on how :: is handled + // This test checks basic structure + assert!( + tokens + .iter() + .any(|t| matches!(t.token_type, TokenType::Symbol(_))) + ); +} + +#[test] +fn test_comments_are_ignored() { + let source = "add rg0, rg1 // this is a comment\nsub rg2, rg3"; + let tokens = tokenize_source(source).expect("Failed to tokenize with comments"); + + // Comments should be stripped, so we should only have instruction tokens + let instruction_count = tokens + .iter() + .filter(|t| matches!(t.token_type, TokenType::Instruction(_))) + .count(); + + assert_eq!(instruction_count, 2); +} + +#[test] +fn test_newline_always_present() { + // Test that even without explicit newline at end, one is added + let source = "add rg0, rg1"; // No newline at end + let tokens = tokenize_source(source).expect("Failed to tokenize without newline"); + + // Should have newline before EOF + let has_newline = tokens + .iter() + .any(|t| matches!(t.token_type, TokenType::Newline)); + + assert!( + has_newline, + "Expected newline to be added even when missing from input" + ); + + // EOF should be last. + assert!(matches!( + tokens + .last() + .expect("Expected at least one token") + .token_type, + TokenType::Eof + )); +} + +#[test] +fn test_complex_branching_code() { + let source = r" + cmp rg3, rg8 + jeq increment + cmp rg3, rg9 + jeq decrement"; + + let tokens = tokenize_source(source).expect("Failed to tokenize branching code"); + + let instruction_count = tokens + .iter() + .filter(|t| matches!(t.token_type, TokenType::Instruction(_))) + .count(); + + assert_eq!(instruction_count, 4); + + let symbol_count = tokens + .iter() + .filter(|t| matches!(t.token_type, TokenType::Symbol(_))) + .count(); + + assert_eq!(symbol_count, 2); // increment and decrement labels +} + +#[test] +fn test_stack_operations() { + let source = "push rg2\npop zero\npusha 2\npopa 2"; + let tokens = tokenize_source(source).expect("Failed to tokenize stack operations"); + + let instruction_count = tokens + .iter() + .filter(|t| matches!(t.token_type, TokenType::Instruction(_))) + .count(); + + assert_eq!(instruction_count, 4); +} diff --git a/resources/dsa/bf.dsa b/resources/dsa/bf.dsa index d87d25f..ccd7854 100644 --- a/resources/dsa/bf.dsa +++ b/resources/dsa/bf.dsa @@ -5,7 +5,7 @@ include print "./lib/print.dsa" // "print hello world" -db program: "++++++++++++++++++++++++++++++++++++++++++++ +db program "++++++++++++++++++++++++++++++++++++++++++++ >++++++++++++++++++++++++++++++++ >++++++++++++++++ > @@ -35,10 +35,10 @@ db program: "++++++++++++++++++++++++++++++++++++++++++++ ] <<++..." -db error: "Invalid Instruction!" -dw stack: 0x10000 -dw input: 0x30000 -resb data: 1024 +db error "Invalid Instruction!" +dw stack 0x10000 +dw input 0x30000 +resb data 1024 // set up a stack so we can call functions _init_stack: