tokeniser/syntax: (db varname: -> db varname) dropped colon, updated tests.

2025-06-29 00:22:10 +01:00
parent 6ceb35d439
commit 21582f1297
2 changed files with 175 additions and 5 deletions
@@ -246,3 +246,173 @@ fn test_tokenise_brainf_interpreter() {
    dbg!(tokens);
 }
 #[test]
 fn test_string_literals() {
    let test_cases = [
        (r#""hello world""#, "hello world"),
        (
            r#""++++++++++++++++++++++++++++++++++++++++++++""#,
            "++++++++++++++++++++++++++++++++++++++++++++",
        ),
        (r#""Invalid Instruction!""#, "Invalid Instruction!"),
        (r#""""#, ""),
    ];
    for (input, expected) in &test_cases {
        let tokens = tokenize_source(input).expect("Failed to tokenize string literal");
        if let TokenType::String(value) = &tokens[0].token_type {
            assert_eq!(value, expected);
        } else {
            panic!("Expected string token for {input}");
        }
    }
 }
 #[test]
 fn test_data_directives() {
    let test_cases = [("db", "db"), ("dw", "dw"), ("resb", "resb")];
    for (input, expected) in &test_cases {
        let tokens = tokenize_source(input).expect("Failed to tokenize data declaration");
        if let TokenType::Directive(decl) = &tokens[0].token_type {
            assert_eq!(decl.directive, *expected);
        } else {
            panic!("Expected data declaration token for {input}");
        }
    }
 }
 #[test]
 fn test_include_directive() {
    let source = r#"include print "./lib/print.dsa""#;
    let tokens = tokenize_source(source).expect("Failed to tokenize include directive");
    assert!(tokens.len() >= 3);
    assert!(matches!(tokens[0].token_type, TokenType::Directive(_)));
    assert!(matches!(tokens[1].token_type, TokenType::Symbol(_)));
    assert!(matches!(tokens[2].token_type, TokenType::String(_)));
 }
 #[test]
 fn test_hex_addresses() {
    let test_cases = [("0x10000", 0x10000), ("0x30000", 0x30000)];
    for (input, expected) in &test_cases {
        let tokens = tokenize_source(input).expect("Failed to tokenize hex address");
        if let TokenType::Immediate(value) = &tokens[0].token_type {
            assert_eq!(*value, *expected);
        } else {
            panic!("Expected immediate token for {input}");
        }
    }
 }
 #[test]
 fn test_memory_operations() {
    let source = "ldw rg1, rg2";
    let tokens = tokenize_source(source).expect("Failed to tokenize memory operation");
    assert!(tokens.len() >= 4);
    assert!(matches!(tokens[0].token_type, TokenType::Instruction(_)));
    assert!(matches!(tokens[1].token_type, TokenType::Register(_)));
    assert!(matches!(tokens[2].token_type, TokenType::Comma));
    assert!(matches!(tokens[3].token_type, TokenType::Register(_)));
 }
 #[test]
 fn test_function_calls() {
    let source = "call print::print";
    let tokens = tokenize_source(source).expect("Failed to tokenize function call");
    assert!(tokens.len() >= 2);
    assert!(matches!(tokens[0].token_type, TokenType::Instruction(_)));
    // The symbol might be parsed differently depending on how :: is handled
    // This test checks basic structure
    assert!(
        tokens
            .iter()
            .any(|t| matches!(t.token_type, TokenType::Symbol(_)))
    );
 }
 #[test]
 fn test_comments_are_ignored() {
    let source = "add rg0, rg1 // this is a comment\nsub rg2, rg3";
    let tokens = tokenize_source(source).expect("Failed to tokenize with comments");
    // Comments should be stripped, so we should only have instruction tokens
    let instruction_count = tokens
        .iter()
        .filter(|t| matches!(t.token_type, TokenType::Instruction(_)))
        .count();
    assert_eq!(instruction_count, 2);
 }
 #[test]
 fn test_newline_always_present() {
    // Test that even without explicit newline at end, one is added
    let source = "add rg0, rg1"; // No newline at end
    let tokens = tokenize_source(source).expect("Failed to tokenize without newline");
    // Should have newline before EOF
    let has_newline = tokens
        .iter()
        .any(|t| matches!(t.token_type, TokenType::Newline));
    assert!(
        has_newline,
        "Expected newline to be added even when missing from input"
    );
    // EOF should be last.
    assert!(matches!(
        tokens
            .last()
            .expect("Expected at least one token")
            .token_type,
        TokenType::Eof
    ));
 }
 #[test]
 fn test_complex_branching_code() {
    let source = r"
    cmp rg3, rg8
    jeq increment
    cmp rg3, rg9
    jeq decrement";
    let tokens = tokenize_source(source).expect("Failed to tokenize branching code");
    let instruction_count = tokens
        .iter()
        .filter(|t| matches!(t.token_type, TokenType::Instruction(_)))
        .count();
    assert_eq!(instruction_count, 4);
    let symbol_count = tokens
        .iter()
        .filter(|t| matches!(t.token_type, TokenType::Symbol(_)))
        .count();
    assert_eq!(symbol_count, 2); // increment and decrement labels
 }
 #[test]
 fn test_stack_operations() {
    let source = "push rg2\npop zero\npusha 2\npopa 2";
    let tokens = tokenize_source(source).expect("Failed to tokenize stack operations");
    let instruction_count = tokens
        .iter()
        .filter(|t| matches!(t.token_type, TokenType::Instruction(_)))
        .count();
    assert_eq!(instruction_count, 4);
 }
@@ -5,7 +5,7 @@
 include print "./lib/print.dsa"
 // "print hello world"
-db program: "++++++++++++++++++++++++++++++++++++++++++++
+db program "++++++++++++++++++++++++++++++++++++++++++++
 >++++++++++++++++++++++++++++++++
 >++++++++++++++++
 >
@@ -35,10 +35,10 @@ db program: "++++++++++++++++++++++++++++++++++++++++++++
 ]
 <<++..."
-db error: "Invalid Instruction!"
+db error "Invalid Instruction!"
-dw stack: 0x10000
+dw stack 0x10000
-dw input: 0x30000
+dw input 0x30000
-resb data: 1024
+resb data 1024
 // set up a stack so we can call functions
 _init_stack: