tokeniser/syntax: (db varname: -> db varname) dropped colon, updated tests.

2025-06-29 00:22:10 +01:00
parent 6ceb35d439
commit 21582f1297
2 changed files with 175 additions and 5 deletions
@@ -246,3 +246,173 @@ fn test_tokenise_brainf_interpreter() {

    dbg!(tokens);
 }
+
+#[test]
+fn test_string_literals() {
+    let test_cases = [
+        (r#""hello world""#, "hello world"),
+        (
+            r#""++++++++++++++++++++++++++++++++++++++++++++""#,
+            "++++++++++++++++++++++++++++++++++++++++++++",
+        ),
+        (r#""Invalid Instruction!""#, "Invalid Instruction!"),
+        (r#""""#, ""),
+    ];
+
+    for (input, expected) in &test_cases {
+        let tokens = tokenize_source(input).expect("Failed to tokenize string literal");
+
+        if let TokenType::String(value) = &tokens[0].token_type {
+            assert_eq!(value, expected);
+        } else {
+            panic!("Expected string token for {input}");
+        }
+    }
+}
+
+#[test]
+fn test_data_directives() {
+    let test_cases = [("db", "db"), ("dw", "dw"), ("resb", "resb")];
+
+    for (input, expected) in &test_cases {
+        let tokens = tokenize_source(input).expect("Failed to tokenize data declaration");
+
+        if let TokenType::Directive(decl) = &tokens[0].token_type {
+            assert_eq!(decl.directive, *expected);
+        } else {
+            panic!("Expected data declaration token for {input}");
+        }
+    }
+}
+
+#[test]
+fn test_include_directive() {
+    let source = r#"include print "./lib/print.dsa""#;
+    let tokens = tokenize_source(source).expect("Failed to tokenize include directive");
+
+    assert!(tokens.len() >= 3);
+    assert!(matches!(tokens[0].token_type, TokenType::Directive(_)));
+    assert!(matches!(tokens[1].token_type, TokenType::Symbol(_)));
+    assert!(matches!(tokens[2].token_type, TokenType::String(_)));
+}
+
+#[test]
+fn test_hex_addresses() {
+    let test_cases = [("0x10000", 0x10000), ("0x30000", 0x30000)];
+
+    for (input, expected) in &test_cases {
+        let tokens = tokenize_source(input).expect("Failed to tokenize hex address");
+
+        if let TokenType::Immediate(value) = &tokens[0].token_type {
+            assert_eq!(*value, *expected);
+        } else {
+            panic!("Expected immediate token for {input}");
+        }
+    }
+}
+
+#[test]
+fn test_memory_operations() {
+    let source = "ldw rg1, rg2";
+    let tokens = tokenize_source(source).expect("Failed to tokenize memory operation");
+
+    assert!(tokens.len() >= 4);
+    assert!(matches!(tokens[0].token_type, TokenType::Instruction(_)));
+    assert!(matches!(tokens[1].token_type, TokenType::Register(_)));
+    assert!(matches!(tokens[2].token_type, TokenType::Comma));
+    assert!(matches!(tokens[3].token_type, TokenType::Register(_)));
+}
+
+#[test]
+fn test_function_calls() {
+    let source = "call print::print";
+    let tokens = tokenize_source(source).expect("Failed to tokenize function call");
+
+    assert!(tokens.len() >= 2);
+    assert!(matches!(tokens[0].token_type, TokenType::Instruction(_)));
+    // The symbol might be parsed differently depending on how :: is handled
+    // This test checks basic structure
+    assert!(
+        tokens
+            .iter()
+            .any(|t| matches!(t.token_type, TokenType::Symbol(_)))
+    );
+}
+
+#[test]
+fn test_comments_are_ignored() {
+    let source = "add rg0, rg1 // this is a comment\nsub rg2, rg3";
+    let tokens = tokenize_source(source).expect("Failed to tokenize with comments");
+
+    // Comments should be stripped, so we should only have instruction tokens
+    let instruction_count = tokens
+        .iter()
+        .filter(|t| matches!(t.token_type, TokenType::Instruction(_)))
+        .count();
+
+    assert_eq!(instruction_count, 2);
+}
+
+#[test]
+fn test_newline_always_present() {
+    // Test that even without explicit newline at end, one is added
+    let source = "add rg0, rg1"; // No newline at end
+    let tokens = tokenize_source(source).expect("Failed to tokenize without newline");
+
+    // Should have newline before EOF
+    let has_newline = tokens
+        .iter()
+        .any(|t| matches!(t.token_type, TokenType::Newline));
+
+    assert!(
+        has_newline,
+        "Expected newline to be added even when missing from input"
+    );
+
+    // EOF should be last.
+    assert!(matches!(
+        tokens
+            .last()
+            .expect("Expected at least one token")
+            .token_type,
+        TokenType::Eof
+    ));
+}
+
+#[test]
+fn test_complex_branching_code() {
+    let source = r"
+    cmp rg3, rg8
+    jeq increment
+    cmp rg3, rg9
+    jeq decrement";
+
+    let tokens = tokenize_source(source).expect("Failed to tokenize branching code");
+
+    let instruction_count = tokens
+        .iter()
+        .filter(|t| matches!(t.token_type, TokenType::Instruction(_)))
+        .count();
+
+    assert_eq!(instruction_count, 4);
+
+    let symbol_count = tokens
+        .iter()
+        .filter(|t| matches!(t.token_type, TokenType::Symbol(_)))
+        .count();
+
+    assert_eq!(symbol_count, 2); // increment and decrement labels
+}
+
+#[test]
+fn test_stack_operations() {
+    let source = "push rg2\npop zero\npusha 2\npopa 2";
+    let tokens = tokenize_source(source).expect("Failed to tokenize stack operations");
+
+    let instruction_count = tokens
+        .iter()
+        .filter(|t| matches!(t.token_type, TokenType::Instruction(_)))
+        .count();
+
+    assert_eq!(instruction_count, 4);
+}
@@ -5,7 +5,7 @@
 include print "./lib/print.dsa"

 // "print hello world"
-db program: "++++++++++++++++++++++++++++++++++++++++++++
+db program "++++++++++++++++++++++++++++++++++++++++++++
 >++++++++++++++++++++++++++++++++
 >++++++++++++++++
 >
@@ -35,10 +35,10 @@ db program: "++++++++++++++++++++++++++++++++++++++++++++
 ]
 <<++..."

-db error: "Invalid Instruction!"
-dw stack: 0x10000
-dw input: 0x30000
-resb data: 1024
+db error "Invalid Instruction!"
+dw stack 0x10000
+dw input 0x30000
+resb data 1024

 // set up a stack so we can call functions
 _init_stack: