diff --git a/docs/DSA_Assembly_Reference.md b/docs/DSA_Assembly_Reference.md new file mode 100644 index 0000000..7f99089 --- /dev/null +++ b/docs/DSA_Assembly_Reference.md @@ -0,0 +1,944 @@ +# DSA Assembly Language Reference + +## Overview + +This document is the comprehensive reference for writing DSA assembly programs. It covers assembly syntax, pseudo-instructions, directives, the module system, calling conventions, and provides complete examples. + +**Related Documents:** +- For hardware instruction details and encoding: See *DSA ISA Specification* +- For build system and toolchain: See project documentation + +## Assembly Syntax + +### General Rules + +- **Case Insensitive:** Mnemonics can be uppercase or lowercase (`mov` = `MOV`) +- **Comments:** Use `//` for line comments or `/* */` for block comments +- **Labels:** Identifier followed by colon (e.g., `main:`, `loop:`) +- **Whitespace:** Flexible spacing between operands +- **Numbers:** + - Decimal: `100`, `255` + - Hexadecimal: `0x10`, `0xFFFF` + - Binary: `0b1010` (if supported by assembler) + +### Operand Order Convention + +DSA assembly uses **GAS-style syntax** (source → destination): + +```asm +mov rg0, rg1 ; Copy rg0 TO rg1 (destination is last) +add rg0, rg1, rg2 ; rg2 = rg0 + rg1 (destination is last) +``` + +For load/store with immediates: +```asm +lli 0x1234, rg0 ; Load immediate 0x1234 INTO rg0 +ldw rg0, rg1, 8 ; Load from (rg0+8) INTO rg1 +stw rg0, rg1, 8 ; Store rg0 TO address (rg1+8) +``` + +## Registers + +| Register(s) | Type | Description | Usage Notes | +|-------------|------|-------------|-------------| +| **rg0-rgf** | General | 16 general-purpose registers | Use for variables, temporaries | +| **acc** | Special | Accumulator | ⚠️ Volatile - pseudo-instructions may overwrite | +| **spr** | Special | Stack pointer | Points to top of stack | +| **bpr** | Special | Base pointer | Used for stack frames | +| **ret** | Special | Return address | Holds return address for functions | +| **zero** | Read-only | Always zero | Reads return 0, writes discarded | +| **pcx** | Read-only | Program counter | Cannot be written directly | +| **idr** | Privileged | Interrupt descriptor table | Kernel mode only | +| **mmr** | Privileged | Memory map register | Kernel mode only | +| **noreg** | Placeholder | No register | Used in encoding, triggers fault if accessed | + +**Register Conventions:** +- **acc**: Used by pseudo-instructions for temporary values - do not rely on it being preserved +- **rgf**: Used by label-addressing pseudo-instructions as a scratch register +- **rg0-rge**: Available for general use; calling convention defines which are preserved + +## Hardware Instructions + +This section shows assembly syntax. For encoding details, see the ISA Specification. + +### Data Movement + +```asm +mov src_reg, dest_reg ; Copy value from src_reg to dest_reg +movs src_reg, dest_reg ; Copy with sign extension +``` + +**Examples:** +```asm +mov rg0, rg1 ; rg1 = rg0 +movs acc, rg2 ; rg2 = sign_extend(acc) +``` + +### Memory Load Instructions + +```asm +ldb base_reg, dest_reg [, offset] ; Load byte (zero-extend) +ldbs base_reg, dest_reg [, offset] ; Load byte (sign-extend) +ldh base_reg, dest_reg [, offset] ; Load halfword (zero-extend) +ldhs base_reg, dest_reg [, offset] ; Load halfword (sign-extend) +ldw base_reg, dest_reg [, offset] ; Load word +``` + +**Offset:** Optional signed 16-bit offset (defaults to 0) + +**Examples:** +```asm +ldb rg0, rg1 ; Load byte from address in rg0 +ldw rg0, rg1, 8 ; Load word from (rg0 + 8) +ldhs rg2, rg3, -4 ; Load signed halfword from (rg2 - 4) +``` + +**Alignment Requirements:** +- `ldb/ldbs`: No alignment required +- `ldh/ldhs`: Must be 2-byte aligned +- `ldw`: Must be 4-byte aligned + +### Memory Store Instructions + +```asm +stb src_reg, base_reg [, offset] ; Store byte +sth src_reg, base_reg [, offset] ; Store halfword +stw src_reg, base_reg [, offset] ; Store word +``` + +**Examples:** +```asm +stb rg0, rg1 ; Store byte to address in rg1 +stw rg0, rg1, 12 ; Store word to (rg1 + 12) +sth acc, spr, -2 ; Store halfword to (spr - 2) +``` + +**Alignment Requirements:** Same as loads + +### Immediate Load Instructions + +```asm +lli immediate, dest_reg ; Load lower 16 bits (CLEARS upper 16!) +lui immediate, dest_reg ; Load upper 16 bits (preserves lower 16) +``` + +**⚠️ CRITICAL:** `lli` clears the upper 16 bits! Always use `lli` before `lui`. + +**Loading 32-bit Constants:** +```asm +lli 0x1234, rg0 ; rg0 = 0x00001234 +lui 0xABCD, rg0 ; rg0 = 0xABCD1234 +``` + +**Loading Addresses:** See `lwi` pseudo-instruction + +### Jump and Branch Instructions + +```asm +jmp addr [, offset_reg] ; Unconditional jump +jeq addr [, offset_reg] ; Jump if equal +jne addr [, offset_reg] ; Jump if not equal +jgt addr [, offset_reg] ; Jump if greater than +jge addr [, offset_reg] ; Jump if greater or equal +jlt addr [, offset_reg] ; Jump if less than +jle addr [, offset_reg] ; Jump if less or equal +``` + +**Jump Modes:** +```asm +; Absolute jump (using zero register) +jmp label, zero ; Jump to label address +jmp 0x4000, zero ; Jump to absolute address 0x4000 + +; Register-based jump +jmp 0, ret ; Jump to address in ret register +jmp 4, ret ; Jump to (ret + 4) + +; PC-relative (if assembler supports) +jeq loop_start ; Jump to loop_start if equal flag set +``` + +**Conditional Jumps:** Based on flags set by `cmp` instruction + +### Comparison + +```asm +cmp reg1, reg2 ; Compare reg1 with reg2, set flags +``` + +**Flags Set:** +- Equal: `reg1 == reg2` +- GreaterThan: `reg1 > reg2` +- LessThan: `reg1 < reg2` +- GreaterThanOrEqual: `reg1 >= reg2` +- LessThanOrEqual: `reg1 <= reg2` + +**Example:** +```asm +cmp rg0, zero ; Compare rg0 with 0 +jeq is_zero ; Branch if rg0 == 0 +jgt is_positive ; Branch if rg0 > 0 +jlt is_negative ; Branch if rg0 < 0 +``` + +### Arithmetic Instructions + +```asm +add src1, src2, dest ; dest = src1 + src2 +sub src1, src2, dest ; dest = src1 - src2 +iadd src, immediate, dest ; dest = src + immediate +isub src, immediate, dest ; dest = src - immediate +inc reg ; reg = reg + 1 +dec reg ; reg = reg - 1 +``` + +**Examples:** +```asm +add rg0, rg1, rg2 ; rg2 = rg0 + rg1 +sub rg0, rg1, rg2 ; rg2 = rg0 - rg1 +iadd rg0, 10, rg0 ; rg0 = rg0 + 10 +isub rg1, 5, rg2 ; rg2 = rg1 - 5 +inc spr ; spr = spr + 1 +dec spr ; spr = spr - 1 +``` + +**Note:** For `iadd`/`isub`, destination can be the same as source for in-place operations. + +### Bitwise Logical Operations + +```asm +and src1, src2, dest ; dest = src1 & src2 +or src1, src2, dest ; dest = src1 | src2 +xor src1, src2, dest ; dest = src1 ^ src2 +not src, dest ; dest = ~src +nand src1, src2, dest ; dest = ~(src1 & src2) +nor src1, src2, dest ; dest = ~(src1 | src2) +xnor src1, src2, dest ; dest = ~(src1 ^ src2) +``` + +**Examples:** +```asm +and rg0, rg1, rg2 ; rg2 = rg0 & rg1 +or rg0, rg1, rg2 ; rg2 = rg0 | rg1 +not rg0, rg1 ; rg1 = ~rg0 +xor rg0, rg0, rg0 ; rg0 = 0 (XOR register with itself) +``` + +### Shift Operations + +```asm +shl reg, shift_amount ; Shift left by amount (0-31) +shr reg, shift_amount ; Shift right by amount (0-31) +``` + +**Shift Amount:** +- Can be a literal: `shl rg0, 2` (shift by 2) +- Can be a register: `shl rg0, rg1` (shift by value in rg1, uses low 5 bits) + +**Examples:** +```asm +shl rg0, 2 ; rg0 = rg0 << 2 +shr rg1, 3 ; rg1 = rg1 >> 3 +shl rg0, rg1 ; rg0 = rg0 << (rg1 & 0x1F) +``` + +**Note:** Shift right is logical (zero-fill), not arithmetic + +### System and Control Instructions + +```asm +hlt ; Halt processor +nop ; No operation +int interrupt_code ; Trigger interrupt (8-bit code) +irt ; Return from interrupt +``` + +**Examples:** +```asm +hlt ; Stop execution +nop ; Do nothing (timing/alignment) +int 0x21 ; Trigger interrupt 0x21 +irt ; Return from interrupt handler +``` + +## Pseudo-Instructions + +Pseudo-instructions are assembly-level constructs that expand into one or more hardware instructions. + +### Data Definition Directives + +```asm +db label: value1 [, value2, ...] ; Define bytes +dh label: value1 [, value2, ...] ; Define halfwords (16-bit) +dw label: value1 [, value2, ...] ; Define words (32-bit) +``` + +**Examples:** +```asm +db message: "Hello, World!", 0 ; String with null terminator +db bytes: 0x01, 0x02, 0x03 ; Array of bytes +dh numbers: 1000, 2000, 3000 ; Array of halfwords +dw stack_base: 0x10000 ; Single word value +dw table: 0, 0, 0, 0 ; Array of 4 words +``` + +**String Encoding:** Strings are encoded as byte sequences with escape sequences: +- `\n` = newline (0x0A) +- `\t` = tab (0x09) +- `\r` = carriage return (0x0D) +- `\\` = backslash +- `\"` = double quote +- `\0` = null (0x00) + +### Memory Reservation Directives + +```asm +resb label: size ; Reserve 'size' bytes +resh label: size ; Reserve 'size' halfwords +resw label: size ; Reserve 'size' words +``` + +**Examples:** +```asm +resb buffer: 256 ; Reserve 256 bytes +resh array: 100 ; Reserve 100 halfwords (200 bytes) +resw heap: 1024 ; Reserve 1024 words (4096 bytes) +``` + +**Note:** Reserved memory is uninitialized (contents undefined). + +### Stack Operations + +```asm +push reg ; Push register onto stack +pop reg ; Pop stack into register +``` + +**Expansion:** +```asm +; push rg0 expands to: +iadd spr, 4, spr ; spr = spr + 4 (stack grows up) +stw rg0, spr, 0 ; Store rg0 to [spr] + +; pop rg0 expands to: +ldw spr, rg0, 0 ; Load [spr] into rg0 +isub spr, 4, spr ; spr = spr - 4 +``` + +**Note:** DSA stack grows upward (toward higher addresses). + +**Examples:** +```asm +push rg0 ; Save rg0 on stack +push rg1 ; Save rg1 on stack +; ... do work ... +pop rg1 ; Restore rg1 +pop rg0 ; Restore rg0 +``` + +### Load Address Pseudo-Instruction + +```asm +lwi label, dest_reg ; Load address of label into register +``` + +**Expansion:** +```asm +; lwi message, rg0 expands to: +lli message, rg0 ; Load lower 16 bits of address +lui message, rg0 ; Load upper 16 bits of address +``` + +**Example:** +```asm +db message: "Hello!", 0 + +lwi message, rg0 ; rg0 = address of message +ldb rg0, rg1 ; rg1 = first byte of message ('H') +``` + +### Memory Access with Labels + +Load and store instructions can use labels directly: + +```asm +ldb label, dest_reg [, offset] +ldh label, dest_reg [, offset] +ldw label, dest_reg [, offset] +stb src_reg, label [, offset] +sth src_reg, label [, offset] +stw src_reg, label [, offset] +``` + +**Expansion (uses rgf as scratch):** +```asm +; ldb buffer, rg2 expands to: +lli buffer, rgf ; Load lower 16 bits of buffer address +lui buffer, rgf ; Load upper 16 bits of buffer address +ldb rgf, rg2, 0 ; Load byte from address in rgf + +; stw rg1, current expands to: +lli current, rgf ; Load lower 16 bits of current address +lui current, rgf ; Load upper 16 bits of current address +stw rg1, rgf, 0 ; Store word to address in rgf +``` + +**⚠️ Important:** These pseudo-instructions use `rgf` as a scratch register! Do not use `rgf` for other purposes when using label-based memory access. + +**Examples:** +```asm +dw counter: 0 + +ldw counter, rg0 ; Load value of counter +iadd rg0, 1, rg0 ; Increment +stw rg0, counter ; Store back +``` + +### Function Call Pseudo-Instructions + +```asm +call namespace::function ; Call function from included module +return ; Return from function +``` + +**Expansion:** +```asm +; call print::print expands to: +lwi print::print, ret ; Load function address into ret +jmp 0, ret ; Jump to function (saves return in pcx) +; (The assembler/linker resolves namespace::function to address) + +; return expands to: +jmp 0, ret ; Jump to address in ret register +``` + +**Note:** The actual return address handling may be more complex depending on the calling convention. + +### Module System + +```asm +include namespace "path/to/file.dsa" +``` + +**Example:** +```asm +include print "lib/print.dsa" +include math "lib/math.dsa" + +; Can now call: +call print::print +call math::multiply +``` + +**Namespace Resolution:** +- Functions in included modules are accessible via `namespace::label` +- Namespace is the identifier before the filename +- Labels in included files are prefixed with the namespace + +## Calling Convention + +DSA uses a standard calling convention for function calls. + +### Stack Frame Layout + +``` +Higher Addresses +├─────────────┤ +│ Arg N │ ← spr + (8 + 4*(N-1)) +│ ... │ +│ Arg 2 │ ← spr + 16 +│ Arg 1 │ ← spr + 12 +│ Arg 0 │ ← spr + 8 (first argument) +├─────────────┤ +│ Ret Addr │ ← spr + 4 (return address) +├─────────────┤ +│ Old BPR │ ← spr + 0 (saved base pointer) +├─────────────┤ ← bpr, spr (current frame) +│ Locals │ (local variables, if any) +Lower Addresses +``` + +### Calling Sequence + +**Caller Responsibilities:** + +1. **Push arguments in reverse order** (last argument first): +```asm +push arg2 +push arg1 +push arg0 +``` + +2. **Call the function:** +```asm +call namespace::function +``` + +3. **Clean up arguments** after return: +```asm +pop zero ; Discard or retrieve arg0 +pop zero ; Discard arg1 +pop zero ; Discard arg2 +``` + +**Callee Responsibilities:** + +1. **Set up stack frame:** +```asm +function: + push bpr ; Save old base pointer + mov spr, bpr ; Establish new base pointer +``` + +2. **Access arguments:** +```asm + ldw bpr, rg0, 8 ; Load arg0 from spr+8 + ldw bpr, rg1, 12 ; Load arg1 from spr+12 + ldw bpr, rg2, 16 ; Load arg2 from spr+16 +``` + +3. **Execute function body:** +```asm + ; Function logic here + add rg0, rg1, acc ; Example: acc = arg0 + arg1 +``` + +4. **Store return value** (optional, overwrites arg0): +```asm + stw acc, bpr, 8 ; Store result where arg0 was +``` + +5. **Restore stack frame:** +```asm + mov bpr, spr ; Restore stack pointer + pop bpr ; Restore old base pointer +``` + +6. **Return to caller:** +```asm + return +``` + +### Complete Example + +```asm +; Function: add two numbers +; Args: arg0, arg1 +; Returns: sum in arg0 position + +add_function: + push bpr ; Save base pointer + mov spr, bpr ; Set up stack frame + + ldw bpr, rg0, 8 ; Load arg0 + ldw bpr, rg1, 12 ; Load arg1 + add rg0, rg1, acc ; acc = arg0 + arg1 + + stw acc, bpr, 8 ; Store result + + mov bpr, spr ; Restore stack + pop bpr ; Restore base pointer + return + +; Caller: +main: + lwi stack_base, bpr + mov bpr, spr + + lli 5, rg0 + lli 7, rg1 + + push rg1 ; Push arg1 (7) + push rg0 ; Push arg0 (5) + call local::add_function + pop rg2 ; Get result (12) + pop zero ; Discard arg1 + + hlt + +dw stack_base: 0x10000 +``` + +### Register Usage Conventions + +| Register(s) | Usage | Preserved? | +|-------------|-------|------------| +| rg0-rg3 | Function arguments, temporaries | No (caller-saved) | +| rg4-rge | Local variables | Yes (callee-saved if used) | +| rgf | Scratch (used by label addressing) | No | +| acc | Temporary calculations | No | +| spr | Stack pointer | Yes (must be restored) | +| bpr | Base pointer | Yes (must be restored) | +| ret | Return address | Managed by call/return | + +**Notes:** +- Functions should save and restore rg4-rge if they use them +- rg0-rg3 may be overwritten by called functions +- acc and rgf are volatile - assume they're overwritten + +## Complete Examples + +### Example 1: Multiplication Library + +```asm +// multiply.dsa +// Multiplies two numbers using repeated addition +// +// Usage: +// include multiply "multiply.dsa" +// push arg1 +// push arg0 +// call multiply::multiply +// pop result +// pop zero ; discard second argument + +multiply: + push bpr + mov spr, bpr + + ldw bpr, rg0, 8 ; Load multiplier + ldw bpr, rg1, 12 ; Load multiplicand + + lli 0, acc ; Initialize result to 0 + +loop_start: + add acc, rg0, acc ; acc += multiplier + dec rg1 ; multiplicand-- + + cmp rg1, zero + jgt loop_start ; Continue if multiplicand > 0 + + stw acc, bpr, 8 ; Store result for caller + + mov bpr, spr + pop bpr + return +``` + +### Example 2: Print Library + +```asm +// print.dsa +// Prints null-terminated string to display memory +// +// Usage: +// include print "print.dsa" +// +// push string_address +// call print::print +// pop zero +// +// call print::reset ; Reset cursor (no args) + +dw display: 0x20000 ; Display memory base address +dw current: 0x20000 ; Current cursor position + +// Print function +print: + push bpr + mov spr, bpr + + ldw bpr, rg0, 8 ; Get string address argument + ldw current, rg1 ; Get current cursor position + +print_loop: + ldb rg0, acc ; Load character + stb acc, rg1 ; Store to display + + iadd rg0, 1, rg0 ; Advance string pointer + iadd rg1, 1, rg1 ; Advance cursor + + cmp acc, zero ; Check for null terminator + jne print_loop ; Continue if not null + + stw rg1, current ; Save cursor position + + mov bpr, spr + pop bpr + return + +// Reset cursor function +reset: + push bpr + mov spr, bpr + + ldw display, rg1 ; Load display base + stw rg1, current ; Reset cursor to start + + mov bpr, spr + pop bpr + return +``` + +### Example 3: Main Program + +```asm +// main.dsa +// Demonstrates using included libraries + +include print "./print.dsa" + +dw stack: 0x10000 +db string: "'To confuse your enemy, you must first confuse yourself' - Probably Sun Tzu.", 0 + +init: + // Set up stack + ldw stack, bpr + mov bpr, spr + +start: + // Load string address + lwi string, rg1 + + // Call print function + push rg1 + call print::print + pop rg1 ; Clean up (rg1 now contains arg we passed) + + hlt +``` + +### Example 4: Conditional Logic + +```asm +// Demonstrates comparisons and branching + +dw value: 42 + +main: + ldw value, rg0 + + cmp rg0, zero + jeq is_zero + jgt is_positive + jlt is_negative + +is_zero: + // Handle zero case + lwi zero_msg, rg1 + jmp print_and_exit + +is_positive: + // Handle positive case + lwi positive_msg, rg1 + jmp print_and_exit + +is_negative: + // Handle negative case + lwi negative_msg, rg1 + jmp print_and_exit + +print_and_exit: + push rg1 + call print::print + pop zero + hlt + +db zero_msg: "Value is zero", 0 +db positive_msg: "Value is positive", 0 +db negative_msg: "Value is negative", 0 +``` + +### Example 5: Loop with Counter + +```asm +// Count from 0 to 9 + +dw stack: 0x10000 + +main: + ldw stack, bpr + mov bpr, spr + + lli 0, rg0 ; Counter = 0 + lli 10, rg1 ; Limit = 10 + +loop: + // Do something with counter in rg0 + push rg0 + call process_value + pop zero + + inc rg0 ; Counter++ + cmp rg0, rg1 ; Compare with limit + jlt loop ; Loop if counter < limit + + hlt + +process_value: + push bpr + mov spr, bpr + + ldw bpr, rg0, 8 ; Get value + ; Process value here... + + mov bpr, spr + pop bpr + return +``` + +## Best Practices + +### 1. Stack Management +- Always balance push/pop operations +- Set up stack frame in every function +- Clean up arguments after function calls +- Use `pop zero` to discard unwanted values + +### 2. Register Usage +- Don't rely on `acc` being preserved +- Don't use `rgf` for variables (used by label addressing) +- Save callee-saved registers if you modify them +- Use `zero` register for zero constants + +### 3. Memory Access +- Ensure proper alignment for halfword/word access +- Use label-based addressing for clearer code +- Check that labels are defined before use + +### 4. Function Design +- Document calling convention in comments +- Validate input arguments when appropriate +- Use consistent parameter order +- Return values via stack or designated register + +### 5. Code Organization +- Use meaningful label names +- Comment complex operations +- Group related functions in modules +- Use includes for code reuse + +### 6. Performance +- Minimize memory accesses (use registers) +- Avoid unnecessary comparisons +- Use shifts for multiplication/division by powers of 2 +- Consider instruction pipelining if supported + +## Assembler Directives + +### Alignment (if supported) +```asm +.align 4 ; Align to 4-byte boundary +.align 2 ; Align to 2-byte boundary +``` + +### Origin (if supported) +```asm +.org 0x1000 ; Set location counter to 0x1000 +``` + +### Section Control (if supported) +```asm +.text ; Code section +.data ; Data section +.bss ; Uninitialized data section +``` + +**Note:** Assembler directive support depends on the specific DSA assembler implementation. + +## Common Patterns + +### Loading 32-bit Constants +```asm +lli lower_16_bits, reg +lui upper_16_bits, reg +``` + +### Zero a Register +```asm +mov zero, reg ; Method 1 +xor reg, reg, reg ; Method 2 +lli 0, reg ; Method 3 +``` + +### Copy Memory +```asm +ldw src_addr, rg0 ; Load from source +stw rg0, dest_addr ; Store to destination +``` + +### Multiply by Power of 2 +```asm +shl reg, 3 ; Multiply by 8 (2^3) +``` + +### Divide by Power of 2 +```asm +shr reg, 2 ; Divide by 4 (2^2) +``` + +### Boolean NOT +```asm +cmp reg, zero +jeq was_zero ; If reg == 0, result is 1 +lli 0, reg +jmp done +was_zero: +lli 1, reg +done: +``` + +### Min/Max +```asm +; max(rg0, rg1) -> rg2 +mov rg0, rg2 ; Assume rg0 is max +cmp rg0, rg1 +jge done +mov rg1, rg2 ; rg1 was larger +done: +``` + +## Troubleshooting + +### Common Errors + +**Alignment Fault:** +- Check that halfword loads/stores use even addresses +- Check that word loads/stores use addresses divisible by 4 + +**Illegal Instruction:** +- Verify opcode is valid +- Check that shift amount is 0 for non-shift instructions +- Ensure you're not using `noreg` as a source/destination + +**Stack Corruption:** +- Verify push/pop balance +- Check that functions restore `bpr` before returning +- Ensure caller cleans up arguments + +**Wrong Results:** +- Verify `lli` is called before `lui` when loading constants +- Check that you're not relying on `acc` or `rgf` being preserved +- Verify signed vs. unsigned loads (ldb vs. ldbs) + +### Debugging Tips + +1. Add `nop` instructions as breakpoint markers +2. Print register values using display memory +3. Use single-step execution to trace program flow +4. Verify stack pointer values at function boundaries +5. Check label addresses in disassembly + +## Appendix: Instruction Quick Reference + +| Category | Instructions | +|----------|-------------| +| **Data Movement** | mov, movs | +| **Memory Load** | ldb, ldbs, ldh, ldhs, ldw | +| **Memory Store** | stb, sth, stw | +| **Immediate Load** | lli, lui | +| **Jump/Branch** | jmp, jeq, jne, jgt, jge, jlt, jle | +| **Comparison** | cmp | +| **Arithmetic** | add, sub, iadd, isub, inc, dec | +| **Logical** | and, or, xor, not, nand, nor, xnor | +| **Shift** | shl, shr | +| **System** | hlt, nop, int, irt | +| **Pseudo** | db, dh, dw, resb, resh, resw, push, pop, lwi, call, return, include | + +## Version History + +- **v1.0** - Initial comprehensive reference + - Combined hardware instructions and pseudo-instructions + - Added complete calling convention + - Included practical examples + - Documented common patterns and best practices diff --git a/docs/DSA_ISA_Specification.md b/docs/DSA_ISA_Specification.md new file mode 100644 index 0000000..c2973ee --- /dev/null +++ b/docs/DSA_ISA_Specification.md @@ -0,0 +1,401 @@ +# DSA Instruction Set Architecture Specification + +## Overview + +The Damn Simple Architecture (DSA) is a 32-bit RISC-style architecture designed for simplicity and educational purposes. This document provides the complete instruction set architecture specification, including all hardware instructions, registers, and encoding formats. + +## Data Types and Sizes + +| Type | Size | Alignment | +|------|------|-----------| +| Byte | 8 bits | 1-byte aligned | +| Halfword | 16 bits | 2-byte aligned | +| Word | 32 bits | 4-byte aligned | + +All multi-byte values use little-endian byte order. + +## Registers + +DSA provides 32 programmer-accessible registers plus several internal system registers. + +### Programmer-Accessible Registers + +| Hex | Register | Type | Description | +|-----|----------|------|-------------| +| 0x00-0x0F | **rg0-rgf** | General Purpose | 16 general-purpose registers for variables and temporary values | +| 0x10 | **acc** | Special | Accumulator for calculations and temporary storage
⚠️ May be overwritten by pseudo-instructions | +| 0x11 | **spr** | Special | Stack pointer - points to top of stack | +| 0x12 | **bpr** | Special | Base pointer - used for stack frame management | +| 0x13 | **ret** | Special | Return address register - stores function return addresses | +| 0x14 | **idr** | Privileged | Interrupt descriptor table address
Read/write triggers protection fault in user mode | +| 0x15 | **mmr** | Privileged | Hardware memory map table address
Read/write triggers protection fault in user mode | +| 0x16 | **zero** | Read-only | Constant zero value
Reads always return 0, writes are discarded | +| 0x17 | **noreg** | Placeholder | Indicates unused register field
Read/write triggers illegal instruction fault | +| 0x18-0x1F | - | Reserved | Reserved for future use | + +**Note on PCX (Program Counter):** +- PCX is a read-only system register that can be accessed in some contexts +- Writing to PCX triggers a protection fault +- PCX is automatically updated by jump and branch instructions + +### System Registers (Internal) + +These registers are used internally by the CPU and are not directly accessible via assembly instructions: + +| Register | Description | +|----------|-------------| +| **MAR** | Memory Address Register - holds address for memory operations | +| **MDR** | Memory Data Register - holds data for memory transfers | +| **CIR** | Current Instruction Register - holds instruction being executed | +| **STS** | Status Register - stores comparison and arithmetic flags | +| **PCX** | Program Counter - stores address of next instruction | + +### Status Register (STS) Layout + +The status register is a 32-bit register with the following flag bits: + +| Bit | Name | Description | Boot Value | +|-----|------|-------------|------------| +| 0 | **Equal** | Set if last comparison result was equal | 0 | +| 1 | **GreaterThan** | Set if last comparison result was greater than | 0 | +| 2 | **GreaterThanOrEqual** | Set if last comparison was greater than or equal | 0 | +| 3 | **LessThan** | Set if last comparison result was less than | 0 | +| 4 | **LessThanOrEqual** | Set if last comparison was less than or equal | 0 | +| 5 | **Zero** | Set if last arithmetic/logic operation result was zero | 0 | +| 6-31 | - | Reserved | 0 | + +## Instruction Encoding Formats + +DSA uses three instruction encoding formats: + +### R-Type (Register) Instructions + +Used for operations with register operands only, including shifts. + +``` + 31-26 | 25-21 | 20-16 | 15-11 | 10-6 | 5-0 +--------+---------+---------+---------+--------+------- + Opcode | SrcReg1 | SrcReg2 | DestReg | ShiftAmt | Unused +``` + +- **Opcode** (6 bits): Instruction operation code +- **SrcReg1** (5 bits): First source register +- **SrcReg2** (5 bits): Second source register +- **DestReg** (5 bits): Destination register +- **ShiftAmt** (5 bits): Shift amount (for shift instructions only, must be 0 otherwise) +- **Unused** (6 bits): Must be 0 + +**Important Rules:** +- ShiftAmt must be 0 for non-shift instructions (else illegal instruction fault) +- Unused register fields must be set to `noreg` (0x17) if not used +- Using registers in unexpected positions may cause illegal instruction fault + +### I-Type (Immediate) Instructions + +Used for operations with a 16-bit immediate value. + +``` + 31-26 | 25-21 | 20-16 | 15-0 +--------+---------+---------+------------- + Opcode | SrcReg | DestReg | 16-bit Immediate +``` + +- **Opcode** (6 bits): Instruction operation code +- **SrcReg** (5 bits): Source register (base for memory ops) +- **DestReg** (5 bits): Destination register (or offset register for jumps) +- **Immediate** (16 bits): Signed 16-bit immediate value or offset + +**Usage:** +- Arithmetic: Immediate is a signed value +- Memory access: Immediate is a signed byte offset from base address +- Branches: Immediate is a signed offset from current PCX +- Literal loads: Immediate is unsigned 16-bit value + +### J-Type (Jump) Instructions + +Used for absolute jumps with large address ranges. + +``` + 31-26 | 25-0 +--------+---------------------- + Opcode | 26-bit Address +``` + +- **Opcode** (6 bits): Jump instruction code +- **Address** (26 bits): Partial address for jump + +**Address Calculation:** +1. Left-shift the 26-bit address by 2 (word alignment) +2. OR with upper 4 bits of current PCX +3. Result is final 32-bit jump address + +**Jump Range:** 256MB region around current PC (±128MB) + +**Note:** J-type instructions are defined but currently unused. Use I-type JMP with register addressing for long jumps. + +## Hardware Instructions + +### Data Movement + +| Hex | Mnemonic | Type | Operands | Description | +|-----|----------|------|----------|-------------| +| 0x00 | **NOP** | R | - | No operation - does nothing | +| 0x01 | **MOV** | R | SrcReg, DestReg | Copy value from SrcReg to DestReg | +| 0x02 | **MOVS** | R | SrcReg, DestReg | Copy with sign extension to fill 32 bits | + +**MOV/MOVS Details:** +- MOV performs direct copy (all 32 bits) +- MOVS sign-extends the value (useful after byte/halfword loads) +- Both instructions set the Zero flag if result is zero + +### Memory Access - Load Instructions + +All loads require proper alignment or trigger an alignment fault. + +| Hex | Mnemonic | Type | Operands | Description | +|-----|----------|------|----------|-------------| +| 0x03 | **LDB** | I | BaseReg, DestReg, Offset | Load byte (8-bit), zero-extend to 32 bits | +| 0x04 | **LDBS** | I | BaseReg, DestReg, Offset | Load byte (8-bit), sign-extend to 32 bits | +| 0x05 | **LDH** | I | BaseReg, DestReg, Offset | Load halfword (16-bit), zero-extend to 32 bits | +| 0x06 | **LDHS** | I | BaseReg, DestReg, Offset | Load halfword (16-bit), sign-extend to 32 bits | +| 0x07 | **LDW** | I | BaseReg, DestReg, Offset | Load word (32-bit) | + +**Load Operation:** +- Effective address = BaseReg + SignExtend(Offset) +- Offset is a signed 16-bit value +- Alignment requirements: + - LDB/LDBS: No alignment required (byte-aligned) + - LDH/LDHS: Must be 2-byte aligned + - LDW: Must be 4-byte aligned + +**Encoding Note:** +In machine code, the order is: BaseReg (SrcReg field), DestReg field, Offset (Immediate field) + +### Memory Access - Store Instructions + +All stores require proper alignment or trigger an alignment fault. + +| Hex | Mnemonic | Type | Operands | Description | +|-----|----------|------|----------|-------------| +| 0x08 | **STB** | I | SrcReg, BaseReg, Offset | Store byte (8-bit) to memory | +| 0x09 | **STH** | I | SrcReg, BaseReg, Offset | Store halfword (16-bit) to memory | +| 0x0A | **STW** | I | SrcReg, BaseReg, Offset | Store word (32-bit) to memory | + +**Store Operation:** +- Effective address = BaseReg + SignExtend(Offset) +- Offset is a signed 16-bit value +- Only the relevant bits are stored (8, 16, or 32) +- Alignment requirements: + - STB: No alignment required (byte-aligned) + - STH: Must be 2-byte aligned + - STW: Must be 4-byte aligned + +**Encoding Note:** +In machine code: SrcReg (SrcReg field), BaseReg (DestReg field), Offset (Immediate field) + +### Immediate Load Instructions + +| Hex | Mnemonic | Type | Operands | Description | +|-----|----------|------|----------|-------------| +| 0x0B | **LLI** | I | DestReg, Value | Load 16-bit value into lower 16 bits
⚠️ **CLEARS upper 16 bits!** | +| 0x0C | **LUI** | I | DestReg, Value | Load 16-bit value into upper 16 bits
Lower 16 bits unchanged | + +**Usage for 32-bit Values:** +``` +LLI 0x1234, rg0 ; rg0 = 0x00001234 +LUI 0xABCD, rg0 ; rg0 = 0xABCD1234 +``` + +**⚠️ CRITICAL:** Always execute LLI before LUI, as LLI clears the upper 16 bits! + +**Encoding Note:** +In machine code: Value (Immediate field), DestReg field (SrcReg unused, set to noreg) + +### Jump and Branch Instructions + +| Hex | Mnemonic | Type | Operands | Description | +|-----|----------|------|----------|-------------| +| 0x0D | **JMP** | I | DestReg, Offset | Unconditional jump to (DestReg + Offset) | +| 0x0E | **JEQ** | I | DestReg, Offset | Jump if Equal flag set | +| 0x0F | **JNE** | I | DestReg, Offset | Jump if Equal flag NOT set | +| 0x10 | **JGT** | I | DestReg, Offset | Jump if GreaterThan flag set | +| 0x11 | **JGE** | I | DestReg, Offset | Jump if GreaterThan OR Equal flag set | +| 0x12 | **JLT** | I | DestReg, Offset | Jump if LessThan flag set | +| 0x13 | **JLE** | I | DestReg, Offset | Jump if LessThan OR Equal flag set | + +**Jump Calculation:** +- Target address = DestReg + SignExtend(Offset) +- If DestReg = zero, this becomes absolute addressing with Offset +- If DestReg = pcx, this becomes PC-relative addressing +- Conditional jumps check flags in STS register + +**Encoding Note:** +In machine code: DestReg field, Offset (Immediate field) (SrcReg unused, set to noreg) + +### Comparison + +| Hex | Mnemonic | Type | Operands | Description | +|-----|----------|------|----------|-------------| +| 0x14 | **CMP** | R | Reg1, Reg2 | Compare Reg1 with Reg2, set flags in STS | + +**Flag Setting:** +- Equal: Set if Reg1 == Reg2 +- GreaterThan: Set if Reg1 > Reg2 (signed) +- GreaterThanOrEqual: Set if Reg1 >= Reg2 (signed) +- LessThan: Set if Reg1 < Reg2 (signed) +- LessThanOrEqual: Set if Reg1 <= Reg2 (signed) +- Zero: Set if (Reg1 - Reg2) == 0 (same as Equal) + +**Encoding Note:** +DestReg and ShiftAmt fields unused (set to noreg and 0) + +### Arithmetic Instructions + +| Hex | Mnemonic | Type | Operands | Description | +|-----|----------|------|----------|-------------| +| 0x15 | **INC** | R | Reg | Increment register by 1 | +| 0x16 | **DEC** | R | Reg | Decrement register by 1 | +| 0x19 | **ADD** | R | Src1, Src2, Dest | Dest = Src1 + Src2 | +| 0x1A | **SUB** | R | Src1, Src2, Dest | Dest = Src1 - Src2 | +| 0x25 | **IADD** | I | Src, Literal, Dest | Dest = Src + SignExtend(Literal) | +| 0x26 | **ISUB** | I | Src, Literal, Dest | Dest = Src - SignExtend(Literal) | + +**Flag Effects:** +- Zero flag set if result is zero +- Other flags undefined after arithmetic (use CMP for comparisons) + +**Encoding Notes:** +- INC/DEC: Reg in SrcReg1 field, also copied to DestReg field +- IADD/ISUB: Immediate is signed 16-bit value + +### Bitwise Logical Operations + +| Hex | Mnemonic | Type | Operands | Description | +|-----|----------|------|----------|-------------| +| 0x1B | **AND** | R | Src1, Src2, Dest | Dest = Src1 & Src2 (bitwise AND) | +| 0x1C | **OR** | R | Src1, Src2, Dest | Dest = Src1 \| Src2 (bitwise OR) | +| 0x1D | **NOT** | R | Src, Dest | Dest = ~Src (bitwise NOT) | +| 0x1E | **XOR** | R | Src1, Src2, Dest | Dest = Src1 ^ Src2 (bitwise XOR) | +| 0x1F | **NAND** | R | Src1, Src2, Dest | Dest = ~(Src1 & Src2) (bitwise NAND) | +| 0x20 | **NOR** | R | Src1, Src2, Dest | Dest = ~(Src1 \| Src2) (bitwise NOR) | +| 0x21 | **XNOR** | R | Src1, Src2, Dest | Dest = ~(Src1 ^ Src2) (bitwise XNOR) | + +**Flag Effects:** +- Zero flag set if result is zero +- Other flags undefined + +**Encoding Note:** +NOT uses only Src and Dest; SrcReg2 unused (set to noreg) + +### Shift Operations + +| Hex | Mnemonic | Type | Operands | Description | +|-----|----------|------|----------|-------------| +| 0x17 | **SHL** | R | Reg, ShiftAmount | Shift Reg left by ShiftAmount bits
Zero-fill from right | +| 0x18 | **SHR** | R | Reg, ShiftAmount | Shift Reg right by ShiftAmount bits
Zero-fill from left (logical shift) | + +**Shift Amount:** +- Can be a 5-bit literal (0-31) in ShiftAmt field +- Can be a register value (low 5 bits used) + - If using register: Place in SrcReg2, set ShiftAmt to 0 + - If using literal: Place in ShiftAmt field, set SrcReg2 to noreg + +**Flag Effects:** +- Zero flag set if result is zero + +**Encoding Notes:** +- Reg in both SrcReg1 and DestReg fields +- For literal shifts: ShiftAmt field contains shift count +- For register shifts: SrcReg2 contains register, ShiftAmt must be 0 + +### System and Control Instructions + +| Hex | Mnemonic | Type | Operands | Description | +|-----|----------|------|----------|-------------| +| 0x22 | **INT** | I | InterruptCode | Trigger interrupt with 8-bit code
Saves return address to ret register
Sets bpr to kernel stack | +| 0x23 | **IRT** | R | - | Return from interrupt
Restores execution context | +| 0x24 | **HLT** | R | - | Halt processor execution
Stops fetch-decode-execute cycle | + +**INT Behavior:** +1. Save current PCX to ret register +2. Switch bpr to kernel stack address +3. Look up interrupt handler address in interrupt descriptor table (idr) +4. Jump to handler at interrupt vector + +**IRT Behavior:** +1. Restore previous execution context +2. Return to address in ret register +3. Restore user stack pointer + +**Encoding Notes:** +- INT: InterruptCode in low 8 bits of Immediate field +- IRT/HLT: All register fields set to noreg, ShiftAmt to 0 + +## Instruction Summary Table + +| Opcode | Mnemonic | Type | Category | +|--------|----------|------|----------| +| 0x00 | NOP | R | Control | +| 0x01 | MOV | R | Data Movement | +| 0x02 | MOVS | R | Data Movement | +| 0x03 | LDB | I | Memory Load | +| 0x04 | LDBS | I | Memory Load | +| 0x05 | LDH | I | Memory Load | +| 0x06 | LDHS | I | Memory Load | +| 0x07 | LDW | I | Memory Load | +| 0x08 | STB | I | Memory Store | +| 0x09 | STH | I | Memory Store | +| 0x0A | STW | I | Memory Store | +| 0x0B | LLI | I | Immediate Load | +| 0x0C | LUI | I | Immediate Load | +| 0x0D | JMP | I | Jump | +| 0x0E | JEQ | I | Branch | +| 0x0F | JNE | I | Branch | +| 0x10 | JGT | I | Branch | +| 0x11 | JGE | I | Branch | +| 0x12 | JLT | I | Branch | +| 0x13 | JLE | I | Branch | +| 0x14 | CMP | R | Comparison | +| 0x15 | INC | R | Arithmetic | +| 0x16 | DEC | R | Arithmetic | +| 0x17 | SHL | R | Shift | +| 0x18 | SHR | R | Shift | +| 0x19 | ADD | R | Arithmetic | +| 0x1A | SUB | R | Arithmetic | +| 0x1B | AND | R | Logical | +| 0x1C | OR | R | Logical | +| 0x1D | NOT | R | Logical | +| 0x1E | XOR | R | Logical | +| 0x1F | NAND | R | Logical | +| 0x20 | NOR | R | Logical | +| 0x21 | XNOR | R | Logical | +| 0x22 | INT | I | System | +| 0x23 | IRT | R | System | +| 0x24 | HLT | R | System | +| 0x25 | IADD | I | Arithmetic | +| 0x26 | ISUB | I | Arithmetic | + +## Exception Conditions + +The following conditions trigger exceptions: + +| Exception | Trigger Condition | +|-----------|------------------| +| **Illegal Instruction** | - Invalid opcode
- noreg used as source/destination
- ShiftAmt non-zero for non-shift instruction
- Register field violations | +| **Protection Fault** | - Write to pcx register
- Read/write idr or mmr in user mode
- Read from noreg
- Write to zero register (discarded, no fault) | +| **Alignment Fault** | - LDH/LDHS/STH with odd address
- LDW/STW with address not divisible by 4 | +| **Memory Access Violation** | - Access to unmapped or protected memory
- Stack overflow/underflow | + +## Calling Convention + +See the DSA Assembly Language Reference for the complete calling convention and ABI specification. + +## Notes on Design + +1. **Word Size:** All addresses and general computation is 32-bit +2. **Endianness:** Little-endian byte order +3. **Stack Growth:** Stack grows upward (incrementing addresses) +4. **Alignment:** Natural alignment required for halfword and word accesses +5. **Sign Extension:** All immediate values are sign-extended unless noted +6. **Zero Register:** Provides constant zero, writes are legal but discarded +7. **Reserved Encodings:** Opcodes 0x27-0x3F reserved for future use diff --git a/resources/ideas/DSA_Project_Roadmap.md b/docs/DSA_Project_Roadmap.md similarity index 100% rename from resources/ideas/DSA_Project_Roadmap.md rename to docs/DSA_Project_Roadmap.md diff --git a/resources/ideas/DSA_Project_Roadmap.pdf b/docs/DSA_Project_Roadmap.pdf similarity index 100% rename from resources/ideas/DSA_Project_Roadmap.pdf rename to docs/DSA_Project_Roadmap.pdf diff --git a/docs/IMPLEMENTATION_DISCREPANCIES.md b/docs/IMPLEMENTATION_DISCREPANCIES.md new file mode 100644 index 0000000..329abae --- /dev/null +++ b/docs/IMPLEMENTATION_DISCREPANCIES.md @@ -0,0 +1,638 @@ +# DSA Implementation vs Documentation Discrepancies + +## Critical Discrepancies + +### 1. **Stack Growth Direction** ❌ CRITICAL + +**Documentation states:** Stack grows upward (toward higher addresses) + +**Implementation shows (expand.rs:44-51):** +```rust +fn expand_push(current: &Node, nodes: &mut Vec) -> Result<(), AssembleError> { + // ... + nodes.extend(vec![ + node!(label, Opcode::SubI, spr, 4, spr), // spr = spr - 4 + node!(None, Opcode::Stw, reg, spr, 0), + ]); +``` + +**Implementation shows (expand.rs:130-137):** +```rust +fn expand_pop(current: &Node, nodes: &mut Vec) -> Result<(), AssembleError> { + // ... + nodes.extend(vec![ + node!(label, Opcode::Ldw, spr, reg, 0), + node!(None, Opcode::AddI, spr, 4, spr), // spr = spr + 4 + ]); +``` + +**Reality:** Stack grows **DOWNWARD** (toward lower addresses) +- PUSH: Decrements SPR by 4, then stores +- POP: Loads, then increments SPR by 4 + +**Impact:** All documentation examples and calling convention diagrams are backwards! + +--- + +### 2. **CALL Pseudo-instruction Expansion** ❌ CRITICAL + +**Documentation states (DSA_Assembly_Reference.md):** +```asm +; call print::print expands to: +lwi print::print, ret ; Load function address into ret +jmp 0, ret ; Jump to function (saves return in pcx) +``` + +**Implementation shows (expand.rs:109-123):** +```rust +fn expand_call(current: &Node, nodes: &mut Vec) -> Result<(), AssembleError> { + nodes.extend(vec![ + node!(label, Opcode::SubI, spr, 4, spr), // Decrement stack pointer + node!(None, Opcode::Stw, pcx, spr, 0), // Store PCX (return addr) on stack + node!(None, Opcode::Jmp, addr, zero), // Jump to function + ]); +``` + +**Reality:** CALL expansion is: +1. Decrement SPR by 4 +2. Store PCX (return address) to stack +3. Jump to function address + +**Impact:** Return address is stored on the STACK, not in RET register! + +--- + +### 3. **RETURN Pseudo-instruction Expansion** ❌ CRITICAL + +**Documentation states:** +```asm +; return expands to: +jmp 0, ret ; Jump to address in ret register +``` + +**Implementation shows (expand.rs:125-135):** +```rust +fn expand_return(current: &Node, nodes: &mut Vec) { + nodes.extend(vec![ + node!(label, Opcode::Ldw, spr, ret, 0), // Load return addr from stack + node!(None, Opcode::AddI, spr, 4, spr), // Increment stack pointer + node!(None, Opcode::Jmp, 4, ret), // Jump to (ret + 4) + ]); +} +``` + +**Reality:** RETURN expansion is: +1. Load return address from stack into RET register +2. Increment SPR by 4 +3. Jump to (RET + 4) + +**Why +4?** The stored PCX points to the instruction AFTER the call's jump, so we need to add 4 to skip past the stored PCX instruction itself... or this might be a bug in the implementation. + +**Impact:** Return mechanism is completely different from documentation! + +--- + +### 4. **Calling Convention - Stack Frame Layout** ❌ CRITICAL + +**Documentation states:** +``` +Higher Addresses +├─────────────┤ +│ Arg N │ ← spr + (8 + 4*(N-1)) +│ ... │ +│ Arg 2 │ ← spr + 16 +│ Arg 1 │ ← spr + 12 +│ Arg 0 │ ← spr + 8 +├─────────────┤ +│ Ret Addr │ ← spr + 4 +├─────────────┤ +│ Old BPR │ ← spr + 0 +├─────────────┤ ← bpr, spr +│ Locals │ +Lower Addresses +``` + +**Reality based on implementation:** +Since stack grows DOWN: +``` +Lower Addresses +├─────────────┤ ← Current SPR/BPR +│ Old BPR │ ← spr + 0 (immediately above SPR) +├─────────────┤ +│ Ret Addr │ ← spr + 4 (pushed by CALL) +├─────────────┤ +│ Arg 0 │ ← spr + 8 +│ Arg 1 │ ← spr + 12 +│ Arg 2 │ ← spr + 16 +│ ... │ +│ Arg N │ ← spr + (8 + 4*(N-1)) +├─────────────┤ +Higher Addresses +``` + +**The diagram needs to be flipped!** The offsets are correct, but the direction is wrong. + +--- + +### 5. **Label-Based Load/Store Scratch Register** ⚠️ IMPORTANT + +**Documentation states:** Uses `rgf` as scratch register + +**Implementation confirms (expand.rs:138-153):** +```rust +fn expand_ldx(current: &Node, nodes: &mut Vec) -> Result<(), AssembleError> { + // For ldb label, reg: + nodes.extend(vec![ + node!(current.label(), Opcode::Lli, name, reg), + node!(None, Opcode::Lui, name, reg), + node!(None, opcode, reg, reg, offset), + ]); +``` + +**Wait! This is WRONG in the implementation!** + +The load expansion uses the DESTINATION register as scratch: +```asm +ldb buffer, rg2 expands to: + lli buffer, rg2 ; Uses rg2 as destination + lui buffer, rg2 ; Uses rg2 as destination + ldb rg2, rg2, 0 ; Uses rg2 as base +``` + +**Documentation says it should use rgf:** +```asm +ldb buffer, rg2 expands to: + lli buffer, rgf ; Uses rgf as scratch + lui buffer, rgf ; Uses rgf as scratch + ldb rgf, rg2, 0 ; Load from rgf into rg2 +``` + +**For stores (expand.rs:155-176):** +```rust +fn expand_stx(current: &Node, nodes: &mut Vec) -> Result<(), AssembleError> { + // For stb reg, label: + let temp = Token::Register(Register::Acc); // Uses ACC, not RGF! + + nodes.extend(vec![ + node!(current.label(), Opcode::Lli, dest, temp), + node!(None, Opcode::Lui, dest, temp), + node!(None, opcode, base, temp, offset), + ]); +``` + +**Reality:** +- Load pseudo-instructions use the DESTINATION register as scratch +- Store pseudo-instructions use the ACC register as scratch, NOT rgf + +**Impact:** Documentation is incorrect about which registers are used! + +--- + +### 6. **LWI Pseudo-instruction** ✅ CORRECT + +**Documentation and implementation agree:** +```rust +fn expand_lwi(current: &Node, nodes: &mut Vec) -> Result<(), AssembleError> { + nodes.extend(vec![ + node!(current.label(), Opcode::Lli, val, reg), + node!(None, Opcode::Lui, val, reg), + ]); +``` + +This matches the documented expansion. + +--- + +### 7. **PUSHA/POPA Pseudo-instructions** 📝 UNDOCUMENTED + +**These exist in implementation but are NOT in documentation!** + +**expand.rs:53-76:** +```rust +fn expand_pusha(current: &Node, nodes: &mut Vec) -> Result<(), AssembleError> { + let count = expect_token!(arg0, Immediate)?; + let spr = Token::Register(Register::Spr); + let registers: Vec = Register::general(); + + nodes.push(node!(label, Opcode::SubI, spr, Token::Immediate(count * 4), spr)); + + nodes.extend((0..count).rev().map(|i| { + node!(None, Opcode::Stw, + Token::Register(registers[i as usize]), + spr, + Token::Immediate(i * 4) + ) + })); +``` + +**expand.rs:78-101:** +```rust +fn expand_popa(current: &Node, nodes: &mut Vec) -> Result<(), AssembleError> { + let count = expect_token!(arg0, Immediate)?; + + nodes.extend((0..count).rev().map(|i| { + node!( + { if i == 0 { label.clone() } else { None } }, + Opcode::Ldw, + spr, + Token::Register(registers[i as usize]), + Token::Immediate(i * 4) + ) + })); + + nodes.push(node!(None, Opcode::AddI, spr, Token::Immediate(count * 4), spr)); +``` + +**What they do:** +- `pusha N` - Push first N general-purpose registers (rg0-rgN) to stack +- `popa N` - Pop first N general-purpose registers from stack + +**Missing from documentation entirely!** + +--- + +### 8. **Register Index Encoding** ⚠️ IMPORTANT + +**Documentation states:** System registers like MAR, MDR, STS, CIR, PCX are "internal" and not accessible + +**Implementation shows (instructions.rs:148-153):** +```rust +0x18 => Self::Mar, +0x19 => Self::Mdr, +0x1A => Self::Sts, +0x1B => Self::Cir, +0x1C => Self::Pcx, +``` + +**Reality:** These registers ARE encoded in the instruction format at indices 0x18-0x1C! + +**However, instructions.rs:186 shows:** +```rust +"null" => Ok(Self::NoReg), // Can parse "null" as NoReg +``` + +**Documentation never mentions "null" as an alternative name for noreg!** + +--- + +### 9. **LUI Immediate Value Handling** ⚠️ IMPORTANT + +**Documentation states:** +``` +lui immediate, dest_reg ; Load immediate into upper 16 bits +``` + +**Implementation shows (codegen.rs:248-254):** +```rust +fn build_load_immediate_instruction(...) -> Result { + // ... + match opcode { + Opcode::Lli => { + let instruction_args = args!(I, immediate: value as u16, r1: dest); + Ok(Instruction::LoadLowerImmediate(instruction_args)) + } + Opcode::Lui => { + let upper_value = value >> 16; // Shifts right by 16! + let instruction_args = args!(I, immediate: upper_value as u16, r1: dest); + Ok(Instruction::LoadUpperImmediate(instruction_args)) + } +``` + +**Reality:** When assembling `lui immediate, reg`, the assembler: +1. Takes the immediate value +2. Shifts it RIGHT by 16 bits +3. Stores the result in the instruction + +**This means:** +```asm +lli 0x1234, rg0 ; Stores 0x1234 in lower 16 bits +lui 0xABCD0000, rg0 ; Right-shifts to 0xABCD, stores in upper 16 bits +``` + +**Or more likely, the assembler expects:** +```asm +lli 0x1234, rg0 ; Stores 0x1234 in lower 16 bits +lui 0xABCD, rg0 ; Stores 0xABCD in upper 16 bits (no shift needed) +``` + +**Documentation needs clarification on what immediate value format LUI expects!** + +--- + +### 10. **Data Definition Encoding** ⚠️ IMPORTANT + +**Implementation (expand.rs:217-267):** +```rust +fn process_dx_data(args: Vec, size: usize) -> Result, AssembleError> { + for token in args { + match token { + Token::StringLit(mut s) => { + s.push('\0'); // Automatically adds null terminator! + for ch in s.chars() { + let mut char_buf = [0u8; 4]; + let char_bytes = ch.encode_utf8(&mut char_buf); + buffer.extend_from_slice(char_bytes.as_bytes()); + } + } + Token::Immediate(value) => { + buffer.extend_from_slice(&value.to_be_bytes()); // BIG ENDIAN! + } +``` + +**Key findings:** +1. String literals automatically get null terminator appended +2. Numeric values are stored in **BIG ENDIAN** format (to_be_bytes) +3. Documentation says "little-endian byte order" globally + +**Contradiction:** Data definition uses BIG ENDIAN, but doc says LITTLE ENDIAN! + +--- + +### 11. **Segment Instruction** 📝 UNDOCUMENTED + +**Implementation has a SEGMENT instruction (0x27/0x3F):** +```rust +Segment(u32) = 0x3F, +``` + +**This is completely undocumented!** + +From model.rs: +```rust +Self::Segment => write!(f, "[SEGMENT]"), +``` + +From codegen.rs: +```rust +Opcode::Segment => build_segment_instruction(&args), +``` + +**Purpose unclear, needs documentation!** + +--- + +### 12. **Data Instruction** 📝 UNDOCUMENTED + +**Implementation has a DATA instruction (0x3E):** +```rust +Data(u32) = 0x3E, +``` + +**This appears to be a meta-instruction for embedding raw data, but it's undocumented in the assembly reference!** + +--- + +### 13. **INC/DEC Instruction Encoding** ⚠️ MINOR + +**Implementation (codegen.rs:293-299):** +```rust +fn build_inc_dec_instruction(opcode: Opcode, args: &[Token]) -> Result { + let reg = expect_token!(reg_token, Register)?; + match opcode { + Opcode::Inc => Ok(Instruction::Increment(args!(R, sr1: reg))), + Opcode::Dec => Ok(Instruction::Decrement(args!(R, sr1: reg))), +``` + +**Reality:** INC/DEC only set SR1 field, not DR field. + +**But args.rs shows:** +```rust +impl RTypeArgs { + pub fn new(...) -> Self { + let sr1 = sr1.unwrap_or_default(); // Defaults to NoReg + let dr = dr.unwrap_or_default(); // Defaults to NoReg +``` + +**So the DR field gets set to NoReg, which is correct per documentation.** + +**However, the Display impl (instructions.rs:449) shows:** +```rust +Self::Increment(a) | Self::Decrement(a) => write!(f, " {}", a.sr1), +``` + +**This is correct - only shows SR1 in disassembly.** + +--- + +### 14. **Shift Instruction Operand Order** ⚠️ MINOR + +**Implementation (codegen.rs:301-312):** +```rust +fn build_shift_instruction(opcode: Opcode, args: &[Token]) -> Result { + let reg = expect_token!(reg_token, Register)?; + let amount = expect_token!(amount_token, Immediate)? as u8; + + match opcode { + Opcode::Shl => Ok(Instruction::ShiftLeft(args!(R, sr1: reg, shamt: amount))), +``` + +**This only handles LITERAL shift amounts, not REGISTER shift amounts!** + +**Documentation states both are supported:** +```asm +shl rg0, 2 ; Literal shift +shl rg0, rg1 ; Register shift +``` + +**The current codegen only handles the literal case!** + +**This is a BUG in the implementation - register shifts aren't properly assembled!** + +--- + +### 15. **Jump Instruction Operand Order** ⚠️ CONFUSION + +**Documentation shows assembly syntax:** +```asm +jmp addr [, offset_reg] +``` + +**But implementation (codegen.rs:256-270):** +```rust +fn build_jump_instruction(opcode: Opcode, args: &[Token]) -> Result { + let address = expect_token!(address_token, Immediate)?; + let offset = expect_token!(offset_token, Register)?; + let instruction_args = args!(I, immediate: address as u16, r1: offset); +``` + +**This expects:** +1. First arg: immediate (address) +2. Second arg: register (offset) + +**So assembly syntax should be:** +```asm +jmp immediate, offset_register +``` + +**Example:** +```asm +jmp 0x1000, zero ; Jump to 0x1000 +jmp 4, ret ; Jump to (ret + 4) +``` + +**Documentation syntax is correct, but parameter names are confusing!** + +The "address" is actually an OFFSET, and the register is the BASE! + +**Better naming:** +```asm +jmp offset, base_register +; Target = base_register + offset +``` + +--- + +### 16. **NOT Instruction Operand Count** ✅ MINOR ISSUE + +**Documentation shows:** +```asm +not src, dest ; Two operands +``` + +**Implementation (instructions.rs:428-429):** +```rust +Self::Compare(args) | Self::Not(args) => { + write!(f, " {}, {}", args.sr1, args.sr2) +} +``` + +**This displays BOTH sr1 and sr2 for NOT!** + +**But codegen.rs:354-362:** +```rust +fn build_not_instruction(args: &[Token]) -> Result { + let reg = expect_token!(reg_token, Register)?; + let dest = expect_token!(dest_token, Register)?; + Ok(Instruction::Not(args!(R, sr1: reg, dr: dest))) +``` + +**Sets sr1 and dr, NOT sr1 and sr2!** + +**The Display impl is WRONG - should show sr1 and dr:** +```rust +Self::Not(args) => write!(f, " {}, {}", args.sr1, args.dr) +``` + +**This is a display bug in the implementation!** + +--- + +### 17. **Register File Indexing** ✅ CORRECT + +**Documentation and implementation both agree:** +- 0x00-0x0F: rg0-rgf (general purpose) +- 0x10: acc +- 0x11: spr +- 0x12: bpr +- 0x13: ret +- 0x14: idr +- 0x15: mmr +- 0x16: zero +- 0x17: noreg + +**This matches perfectly.** + +--- + +### 18. **Immediate Arithmetic Destination** ⚠️ MINOR + +**Implementation (codegen.rs:314-330):** +```rust +fn build_arithmetic_immediate_instruction(...) -> Result { + let reg = expect_token!(reg_token, Register)?; + let immediate = expect_token!(immediate_token, Immediate)? as u16; + let dest = expect_token!(dest_token, Register)?; + let instruction_args = args!(I, immediate: immediate, r1: reg, r2: dest); +``` + +**This REQUIRES three arguments:** +1. Source register +2. Immediate value +3. Destination register + +**But documentation says destination is optional:** +``` +iadd src_reg, imm [, dest_reg] ; dest optional +``` + +**Reality:** The assembler REQUIRES the destination register! + +**If you want in-place operation:** +```asm +iadd rg0, 10, rg0 ; Required to specify rg0 twice +``` + +**Not:** +```asm +iadd rg0, 10 ; This won't work! +``` + +**Documentation is misleading - destination is NOT optional!** + +--- + +### 19. **Memory Instruction Offsets** ✅ CORRECT + +**Implementation correctly handles signed 16-bit offsets:** +```rust +let offset = expect_token!(offset_token, Immediate)? as u16; +``` + +**These are stored as u16 but interpreted as signed i16 at runtime.** + +**Documentation is correct about this.** + +--- + +### 20. **Instruction Opcode Values** ✅ VERIFIED + +Comparing model.rs opcodes with instructions.rs: + +| Instruction | model.rs | instructions.rs | Match | +|-------------|----------|-----------------|-------| +| Nop | 0x00 | 0x0 | ✅ | +| Mov | 0x01 | 0x1 | ✅ | +| MovSigned | 0x02 | 0x2 | ✅ | +| LoadByte | 0x03 | 0x3 | ✅ | +| ... | ... | ... | ✅ | +| AddImmediate | 0x25 | 0x25 | ✅ | +| SubImmediate | 0x26 | 0x26 | ✅ | +| Segment | 0x27 | 0x3F | ❌ MISMATCH! | + +**CRITICAL:** Segment instruction has opcode **0x27** in model.rs but **0x3F** in instructions.rs! + +--- + +## Summary of Critical Issues + +### Must Fix in Documentation: + +1. ✅ **Stack grows DOWNWARD** - flip all diagrams +2. ✅ **CALL expansion** - uses stack, not ret register directly +3. ✅ **RETURN expansion** - loads from stack, jumps to ret+4 +4. ✅ **Stack frame layout** - flip diagram vertically +5. ✅ **Load pseudo scratch register** - uses DEST reg, not rgf +6. ✅ **Store pseudo scratch register** - uses ACC, not rgf +7. ✅ **Add PUSHA/POPA documentation** +8. ✅ **Add SEGMENT instruction documentation** +9. ✅ **Add DATA instruction documentation** +10. ✅ **Clarify LUI immediate value handling** +11. ✅ **Fix endianness** - data definition uses BIG endian +12. ✅ **IADD/ISUB destination NOT optional** +13. ✅ **Add "null" as alias for noreg** +14. ✅ **Fix Segment opcode** - 0x27 or 0x3F? + +### Potential Implementation Bugs: + +1. ⚠️ **Shift instruction** - doesn't handle register shifts +2. ⚠️ **NOT display** - shows sr2 instead of dr +3. ⚠️ **RETURN +4 offset** - why is this needed? +4. ⚠️ **Segment opcode mismatch** - 0x27 vs 0x3F + +### Minor Documentation Improvements: + +1. Add explicit examples of stack growth direction +2. Show complete memory layout diagrams +3. Document which registers are volatile/preserved +4. Add troubleshooting section for common mistakes +5. Clarify jump instruction parameter semantics diff --git a/resources/ideas/design.rnote b/docs/design.rnote similarity index 100% rename from resources/ideas/design.rnote rename to docs/design.rnote diff --git a/docs/inconsistencies.md b/docs/inconsistencies.md new file mode 100644 index 0000000..5d387b0 --- /dev/null +++ b/docs/inconsistencies.md @@ -0,0 +1,149 @@ +# DSA Documentation Inconsistencies Analysis + +## 1. Register Descriptions + +### Issue: System Registers vs Assembly-Accessible Registers +- `registers.md` lists MAR, STS, CIR, MDR as "System" registers +- These are NOT mentioned in `dsa_assembly_reference.md` or `instruction_set.md` +- **Resolution**: System registers are internal CPU registers not directly accessible in assembly. They should be documented separately from programmer-accessible registers. + +### Issue: Register Naming Inconsistencies +- `registers.md` uses `RG0-RGF` (uppercase) +- `dsa_assembly_reference.md` uses `rg0-rgf` (lowercase) +- **Resolution**: Assembly syntax should be lowercase (standard convention) + +### Issue: NOREG Register +- `registers.md`: "Loads/using as dest register must cause an illegal instruction trap" +- `dsa_assembly_reference.md`: "on-read/write: illegal instruction fault" +- **Resolution**: Consistent terminology needed - use "illegal instruction fault" + +## 2. Instruction Operand Order Inconsistencies + +### Issue: Load Instructions +- `instruction_set.md`: `LDB BaseReg, Offset, DestReg` +- `dsa_assembly_reference.md`: `LDB base_reg, dest_reg [, offset]` +- **Resolution**: Assembly reference shows standard syntax (base, dest, offset optional), instruction set shows encoding order + +### Issue: Store Instructions +- `instruction_set.md`: `STB SrcReg, BaseReg, Offset` +- `dsa_assembly_reference.md`: `STB src_reg, base_reg [, offset]` +- **Resolution**: Consistent - offset is optional + +### Issue: Immediate Load Instructions +- `instruction_set.md`: `LLI DstReg, Value` (destination first) +- `dsa_assembly_reference.md`: `LLI imm, dest_reg` (immediate first) +- **Resolution**: Assembly reference shows gas-style syntax (source, dest), instruction set shows encoding order + +### Issue: Jump Instructions +- `instruction_set.md`: `JMP DestReg, Offset | Address` +- `dsa_assembly_reference.md`: `JMP addr [, offset_reg]` or `JMP imm, offset_reg` +- **Resolution**: Different perspectives - instruction set shows encoding, assembly shows usage + +## 3. Instruction Behavior Differences + +### Issue: IADD/ISUB Operands +- `instruction_set.md`: `IADD Src1, Literal, Dest` (3 operands) +- `dsa_assembly_reference.md`: `IADD src_reg, imm [, dest_reg]` (dest optional) +- **Resolution**: Assembly allows dest to default to src_reg + +### Issue: SHL/SHR Operands +- `instruction_set.md`: `SHL Reg, Literal | ValReg` +- `dsa_assembly_reference.md`: `SHL reg, shift_amount` +- **Resolution**: Both literal and register shifts supported + +## 4. Pseudo-Instruction Inconsistencies + +### Issue: PUSH/POP Expansion +- `pseudoinstructions.md`: + - PUSH = `INC SPR` then `STW register, SPR` + - POP = `LDW SPR, register` then `DEC SPR` +- Standard stack conventions suggest PUSH should decrement (grow down) +- **Resolution**: Clarify stack growth direction + +### Issue: LDB/LDH/LDW Pseudo vs Hardware +- `pseudoinstructions.md` lists LDB, LDH, LDW as pseudo-instructions with label addressing +- `instruction_set.md` lists them as hardware instructions +- **Resolution**: Both exist - hardware instructions use registers, pseudo-instructions add label support + +### Issue: LWI Naming +- `dsa_assembly_reference.md`: LWI = Load Word Immediate (load address) +- Could be confused with "Load Word Immediate" (load literal value) +- **Resolution**: LWI specifically means "Load Word address Into register" + +## 5. Calling Convention Details + +### Issue: Argument Offsets +- Calling convention says "first 3 args at offsets 8, 12, 16" +- This assumes 32-bit words (4 bytes each) +- Offset 8 is position of first argument (after return address at offset 4, and old BPR at offset 0) +- **Resolution**: Clarify that SPR+0 = old BPR, SPR+4 = return address, SPR+8 = first arg + +### Issue: Return Value Location +- Says "Store return value (if any) to `spr+8`" +- This overwrites the first argument +- **Resolution**: This is intentional - return value replaces first argument position after cleanup + +## 6. Missing Information + +### From instruction_set.md not in assembly reference: +- Instruction encoding details (R-type, I-type, J-type) +- Hex opcodes for each instruction +- Alignment requirements for memory operations +- Sign extension behavior details + +### From assembly reference not in instruction_set: +- Complete pseudo-instruction expansions showing what they compile to +- Library examples (multiply, print) +- Detailed calling convention walkthrough +- Module system (INCLUDE directive) + +### From registers.md not elsewhere: +- STS (Status Register) bit layout +- Boot values for status flags +- System registers (MAR, STS, CIR, MDR) + +## 7. Terminology Inconsistencies + +- "halfword" vs "half-word" vs "16-bit value" +- "word" assumed to be 32-bit (should be explicit) +- "register" vs "reg" in syntax +- "immediate" vs "literal" vs "constant" + +## 8. Critical Missing Details + +### CALL and RETURN Pseudo-instructions +- Assembly reference shows them but doesn't show their expansion +- Need to document what they expand to + +### Label Addressing Mode +- Shows expansions for loads/stores with labels +- Uses RGF as scratch register - should this be documented as reserved for this purpose? + +### Stack Direction +- Not explicitly stated whether stack grows up or down +- PUSH uses INC SPR (suggests growing up) - unusual! + +## Recommendations + +1. **Separate Documentation into Logical Layers**: + - ISA Specification (hardware-level, for CPU implementers) + - Assembly Language Reference (for programmers) + - ABI/Calling Convention (for compiler/linker writers) + +2. **Standardize Terminology**: + - Use consistent casing (lowercase for assembly mnemonics) + - Define terms clearly (word = 32-bit, halfword = 16-bit, byte = 8-bit) + - Distinguish "literal" (immediate value in code) from "address" (memory location) + +3. **Document Stack Convention Clearly**: + - Explicitly state stack grows upward (unusual but valid) + - Show memory layout diagrams + +4. **Show Complete Pseudo-instruction Expansions**: + - CALL, RETURN need full expansion documentation + - Document which register(s) are used as temporaries + +5. **Clarify Register Usage Conventions**: + - ACC: used by pseudo-instructions, volatile + - RGF: used by label addressing, volatile + - RG0-RGE: general purpose, callee may use per calling convention diff --git a/resources/ideas/dsa_assembly_reference.md b/resources/ideas/dsa_assembly_reference.md deleted file mode 100644 index cfdee16..0000000 --- a/resources/ideas/dsa_assembly_reference.md +++ /dev/null @@ -1,427 +0,0 @@ -# DSA Assembly Language Instruction Reference - -## Overview - -This document provides a comprehensive reference for the DSA (Damn Simple Architecture) assembly language, including all hardware instructions and pseudo-instructions with their syntax variations and usage examples. - -## Calling Convention - -| Step | Responsibility | Action | Description | -|------|----------------|--------|-------------| -| 1 | **Caller** | Push arguments | Push exactly n arguments to the stack (in order, last argument pushed first) | -| 2 | **Caller** | Call function | Execute `call namespace::function` - this automatically pushes the return address (pcx) and jumps to the function | -| 3 | **Function** | Set up stack frame | Execute `push bpr; mov spr, bpr` to establish new stack frame | -| 4 | **Function** | Access arguments | Read arguments starting at `spr+8` (first 3 args at offsets 8, 12, 16) | -| 5 | **Function** | Execute function | Perform the function's operations using the arguments | -| 6 | **Function** | Store return value | Write return value (if any) to `spr+8` | -| 7 | **Function** | Restore stack frame | Execute `mov bpr, spr; pop bpr` to restore previous stack frame | -| 8 | **Function** | Return | Execute `return` pseudo-instruction to return to caller | -| 9 | **Caller** | Clean up stack | Pop exactly n arguments from the stack to clean up | -| 10 | **Caller** | Handle unused values | Use `pop zero` to discard any unused stack values if needed | - -**Notes:** -- The namespace in step 2 is the name assigned in the `include` statement -- The `call` pseudo-instruction automatically handles return address management so long as the callee does not mess with the stack -- Arguments are accessed by the callee using offsets from the base pointer (bpr) - -## Registers - -| Register | Type | Description | -|----------|------|---------------------------------------------------------------------------------------------------| -| `rg0-rgf` | General Purpose | General-purpose registers. | -| `acc` | Special | Accumulator for calculations and temporary storage - don't use this for variables as pseudo instructions may overwrite this implicitly! | -| `spr` | Special | Stack pointer | -| `bpr` | Special | Base pointer for stack frames | -| `ret` | Special | Return address register | -| `idr` | Privileged | Interrupt descriptor table address
**on-read/write: protection fault (unless in kernel mode)** | -| `mmr` | Privileged | Hardware memory map table address
**on-read/write: protection fault (unless in kernel mode)** | -| `zero` | Read-only | Always contains zero
**on-read: always returns zero**
**on-write: value is voided** | -| `pcx` | Read-only | Program counter
**on-write: protection fault** | -| `noreg` | Placeholder | Indicates absence of register argument
**on-read/write: illegal instruction fault** | - -## Hardware Instructions - -### Data Movement Instructions - -| Mnemonic | Operands | Description | -|----------|----------|-------------| -| **MOV** | `src_reg, dest_reg` | Copy value from source to destination register | -| **MOVS** | `src_reg, dest_reg` | Copy with sign extension | - -**Examples:** -```asm -mov rg0, rg1 ; Copy rg0 to rg1 -movs rg0, rg1 ; Copy rg0 to rg1 with sign extension -``` -### Memory Access Instructions - -#### Load Instructions - -| Mnemonic | Operands | Description | -|----------|----------|-------------| -| **LDB** | `base_reg, dest_reg [, offset]`
`label, dest_reg [, offset]` | Load byte from memory | -| **LDBS** | `base_reg, dest_reg [, offset]`
`label, dest_reg [, offset]` | Load byte with sign extension | -| **LDH** | `base_reg, dest_reg [, offset]`
`label, dest_reg [, offset]` | Load half-word (16-bit) | -| **LDHS** | `base_reg, dest_reg [, offset]`
`label, dest_reg [, offset]` | Load half-word with sign extension | -| **LDW** | `base_reg, dest_reg [, offset]`
`label, dest_reg [, offset]` | Load word (32-bit) | - -**Examples:** -```asm -; Direct register addressing -ldb rg0, rg1 ; Load byte from address in rg0 -ldw rg0, rg1, 8 ; Load word from (rg0 + 8) - -; Label addressing -ldb buffer, rg2 ; Load byte from label 'buffer' -ldw stack, bpr ; Load stack address into base pointer -``` -**Label Expansions:** -```asm -; ldb buffer, rg2 expands to: -lli buffer, rg2 ; Load lower 16 bits of buffer address -lui buffer, rg2 ; Load upper 16 bits of buffer address -ldb rg2, rg2 ; Load byte from address in rg2 - -; ldw stack, bpr expands to: -lli stack, bpr ; Load lower 16 bits of stack address -lui stack, bpr ; Load upper 16 bits of stack address -ldw bpr, bpr ; Load word from address in bpr -``` -#### Store Instructions - -| Mnemonic | Operands | Description | -|----------|----------|-------------| -| **STB** | `src_reg, base_reg [, offset]`
`src_reg, label [, offset]` | Store byte to memory | -| **STH** | `src_reg, base_reg [, offset]`
`src_reg, label [, offset]` | Store half-word to memory | -| **STW** | `src_reg, base_reg [, offset]`
`src_reg, label [, offset]` | Store word to memory | - -**Examples:** -```asm -; Direct register addressing -stb rg0, rg1 ; Store byte from rg0 to address in rg1 -stw rg0, rg1, 12 ; Store word to (rg1 + 12) - -; Label addressing -stb acc, buffer ; Store byte from accumulator to 'buffer' -stw rg1, current ; Store word to 'current' variable -``` -**Label Expansions:** -```asm -; stb acc, buffer expands to: -lli buffer, rgf ; Load lower 16 bits of buffer address -lui buffer, rgf ; Load upper 16 bits of buffer address -stb acc, rgf ; Store byte from acc to address in rgf - -; stw rg1, current expands to: -lli current, rgf ; Load lower 16 bits of current address -lui current, rgf ; Load upper 16 bits of current address -stw rg1, rgf ; Store word from rg1 to address in rgf -``` -### Immediate Load Instructions - -| Mnemonic | Operands | Description | -|----------|----------|------------------------------------------------------------------------| -| **LLI** | `imm, dest_reg` | Load 16-bit immediate into lower 16 bits
**Clears upper 16 bits!** | -| **LUI** | `imm, dest_reg` | Load 16-bit immediate into upper 16 bits | - -**Usage** - -ensure that you always run **Lli** before **Lui** as **Lli** clears the upper 16 bits. - -**Examples:** -```asm -lli 0x1234, rg0 ; Load 0x1234 into lower 16 bits of rg0 -lui 0xABCD, rg0 ; Load 0xABCD into upper 16 bits of rg0 -``` -### Jump Instructions - -| Mnemonic | Operands | Description | -|----------|----------|-------------| -| **JMP** | `addr [, offset_reg]`
`imm, offset_reg` | Unconditional jump | -| **JEQ** | `addr [, offset_reg]` | Jump if equal flag set | -| **JNE** | `addr [, offset_reg]` | Jump if not equal flag set | -| **JGT** | `addr [, offset_reg]` | Jump if greater than flag set | -| **JGE** | `addr [, offset_reg]` | Jump if greater or equal flags set | -| **JLT** | `addr [, offset_reg]` | Jump if less than flag set | -| **JLE** | `addr [, offset_reg]` | Jump if less or equal flags set | - -**Examples:** -```asm -jmp start ; Jump to label 'start' -jmp 4, ret ; Jump to address (4 + ret register) -jeq end ; Jump to 'end' if equal flag set -jgt loop ; Jump to 'loop' if greater than flag set -``` -### Arithmetic Instructions - -| Mnemonic | Operands | Description | -|----------|----------|-------------| -| **ADD** | `src1_reg, src2_reg, dest_reg` | Addition | -| **SUB** | `src1_reg, src2_reg, dest_reg` | Subtraction | -| **IADD** | `src_reg, imm [, dest_reg]` | Immediate addition | -| **ISUB** | `src_reg, imm [, dest_reg]` | Immediate subtraction | -| **INC** | `reg` | Increment register by 1 | -| **DEC** | `reg` | Decrement register by 1 | - -**Examples:** -```asm -add rg0, rg1, rg2 ; rg2 = rg0 + rg1 -sub rg0, rg1, rg2 ; rg2 = rg0 - rg1 -iadd rg0, 10 ; rg0 = rg0 + 10 -// or using alternate syntax -addi rg0, 1 ; rg0 = rg0 + 1 -inc rg0 ; rg0 = rg0 + 1 -``` -### Bitwise Operations - -| Mnemonic | Operands | Description | -|----------|----------|-------------| -| **AND** | `src1_reg, src2_reg, dest_reg` | Bitwise AND | -| **OR** | `src1_reg, src2_reg, dest_reg` | Bitwise OR | -| **XOR** | `src1_reg, src2_reg, dest_reg` | Bitwise XOR | -| **NOT** | `src_reg, dest_reg` | Bitwise NOT | -| **NAND** | `src1_reg, src2_reg, dest_reg` | Bitwise NAND | -| **NOR** | `src1_reg, src2_reg, dest_reg` | Bitwise NOR | -| **XNOR** | `src1_reg, src2_reg, dest_reg` | Bitwise XNOR | - -**Examples:** -```asm -and rg0, rg1, rg2 ; rg2 = rg0 & rg1 -not rg0, rg1 ; rg1 = ~rg0 -``` -### Shift Operations - -| Mnemonic | Operands | Description | -|----------|----------|-------------| -| **SHL** | `reg, shift_amount` | Shift left | -| **SHR** | `reg, shift_amount` | Shift right | - -**Examples:** -```asm -shl rg0, 2 ; Shift rg0 left by 2 bits -shr rg0, 3 ; Shift rg0 right by 3 bits -``` -### Comparison and Control - -| Mnemonic | Operands | Description | -|----------|----------|-------------| -| **CMP** | `reg1, reg2` | Compare registers and set flags | - -**Examples:** -```asm -cmp rg0, zero ; Compare rg0 with zero register -cmp rg1, rg2 ; Compare rg1 with rg2 -``` -### System Instructions - -| Mnemonic | Operands | Description | -|----------|----------|-------------| -| **HLT** | - | Halt processor execution | -| **NOP** | - | No operation | -| **INT** | `interrupt_code` | Trigger interrupt | -| **IRT** | - | Return from interrupt | - -**Examples:** -```asm -hlt ; Stop processor execution -int 0x21 ; Trigger interrupt 0x21 -``` -## Pseudo-Instructions - -### Data Definition - -| Mnemonic | Syntax | Description | -|----------|--------|-------------| -| **DB** | `name: value1 [, value2, ...]` | Define bytes | -| **DH** | `name: value1 [, value2, ...]` | Define half-words | -| **DW** | `name: value1 [, value2, ...]` | Define words | - -**Examples:** -```asm -db message: "Hello World", 0 -dh numbers: 1000, 2000, 3000 -dw stack: 0x10000 -``` -### Memory Reservation - -| Mnemonic | Syntax | Description | -|----------|--------|-------------| -| **RESB** | `name: size` | Reserve bytes | -| **RESH** | `name: size` | Reserve half-words | -| **RESW** | `name: size` | Reserve words | - -**Examples:** -```asm -resb buffer: 256 ; Reserve 256 bytes -resh array: 100 ; Reserve space for 100 half-words -resw heap: 1024 ; Reserve space for 1024 words -``` -### Stack Operations - -| Mnemonic | Operands | Description | -|----------|----------|-------------| -| **PUSH** | `reg` | Push register value onto stack | -| **POP** | `reg` | Pop stack value into register | - -**Examples:** -```asm -push rg0 ; Push rg0 value onto stack -pop ret ; Pop return address -``` -### Memory Access Shortcuts - -| Mnemonic | Operands | Description | -|----------|----------|-------------| -| **LWI** | `name, reg` | Load address into register | - -**Examples:** -```asm -lwi string, rg1 ; Load address of 'string' into rg1 -``` - -### Function Control - -| Mnemonic | Operands | Description | -|----------|----------|-------------| -| **CALL** | `namespace::function` | Call a function with automatic return address management | -| **RETURN** | - | Return from a function to the caller | - -**Examples:** -```asm -call print::print ; Call the print function from the print namespace -return ; Return from the current function -``` - -### Module System - -| Mnemonic | Syntax | Description | -|----------|--------|-------------| -| **INCLUDE** | `module_name "path"` | Include module | - -**Examples:** -```asm -include print "print.dsa" -include fib "fib.dsa" -``` -## Library Examples - -### Multiplication Library (multiply.dsa) - -```asm -// multiply.dsa -// usage: -// -// include multiply "" -// -// usage for multiply: -// push (arg1) -// push (arg0) -// call multiply::multiply -// pop (arg0) -// pop (arg1) - -multiply: - push bpr - mov spr, bpr - - ldw bpr, rg0, 8 // load op 1 - ldw bpr, rg1, 12 // load op 2 - - lli 0, acc // initialize accumulator - -start: - add acc, rg0, acc - dec rg1 - - cmp rg1, zero - jgt start - -end: - stw acc, bpr, 8 // store result for caller - mov bpr, spr - pop bpr - return -``` - -### Print Library (print.dsa) - -```asm -// print.dsa -// usage: -// -// include print "" -// -// usage for print: -// push (register containing address of string) -// call print::print -// pop zero -// -// usage for reset: -// call print::reset - -dw display: 0x20000 -dw current: 0x20000 - -// prints the given text to the screen. -print: - push bpr - mov spr, bpr - - ldw bpr, rg0, 8 // get string address argument - ldw current, rg1 // get current display position - -print_loop: - ldb rg0, acc - stb acc, rg1 - - iadd rg0, 1 - iadd rg1, 1 - - cmp acc, zero - jne print_loop - jmp end - -// return -end: - stw rg1, current - - mov bpr, spr - pop bpr - return - -// resets the cursor position on the screen -reset: - push bpr - mov spr, bpr - ldw display, rg1 - stw rg1, current - mov bpr, spr - pop bpr - return -``` - -### Example Program (main.dsa) - -```asm -include print "./print.dsa" - -dw stack: 0x10000 -db string: "'To confuse your enemy, you must first confuse yourself' - Probably Sun Tzu." - -init: - // set up a stack. - ldw stack, bpr - mov bpr, spr - -start: - lwi string, rg1 - - // push string address argument - push rg1 - // call print function - call print::print - // clean up stack - pop rg1 - - hlt -``` \ No newline at end of file diff --git a/resources/ideas/dsa_binary_format.md b/resources/ideas/dsa_binary_format.md deleted file mode 100644 index 3a372ad..0000000 --- a/resources/ideas/dsa_binary_format.md +++ /dev/null @@ -1,10 +0,0 @@ -# DSA File formatting specification. - -First, a clarification on what formats this document references. - -- .dsb: DSA Binary object, similar to a .o object file -- .dse: DSA Executable file, similar to a .exe/ELF binary - -## Format Specification - -### DSB binary format