Compare commits
40 Commits
a1099249e9
..
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 7b18922cc7 | |||
| a0b02cb955 | |||
| 240f0e553f | |||
| 25a59a6b19 | |||
| c67217a6b8 | |||
| 7ccbd9258f | |||
| 201b18069b | |||
| d66baf6f99 | |||
| 75ad04cf95 | |||
| 8361833b1c | |||
| 5e575e2cd8 | |||
| 931af90789 | |||
| 509b3465f1 | |||
| 22241a5633 | |||
| e2be83414b | |||
| f7ed764e96 | |||
| 328741eb51 | |||
| 9f35fc9415 | |||
| 828f5bfb2d | |||
| 6699333b2c | |||
| e9329eca95 | |||
| 250b780e14 | |||
| bbcef7178f | |||
| 1fcfb3120b | |||
| e69514e46e | |||
| b8abbfd02f | |||
| c2bf9f6667 | |||
| 2f91c4127c | |||
| 89762b54e3 | |||
| a35cfbe864 | |||
| 8d130a870c | |||
| 458661b02a | |||
| c41e5328e6 | |||
| 67ebf48d6f | |||
| 98668c681e | |||
| 05a25447b2 | |||
| 56d2abe17f | |||
| eaaefd1b07 | |||
| 5302ad3876 | |||
| 2280f1e5d9 |
@@ -5,3 +5,7 @@ rustc-wrapper = "sccache"
|
||||
|
||||
[future-incompat-report]
|
||||
frequency = "always"
|
||||
|
||||
[profile.profiling]
|
||||
inherits = "release"
|
||||
debug = true
|
||||
|
||||
Vendored
+4
@@ -8,4 +8,8 @@
|
||||
"files.trimTrailingWhitespace": true,
|
||||
"gitea.owner": "LowLevelDevs",
|
||||
"gitea.repo": "damn_simple_architecture",
|
||||
"[markdown]": {
|
||||
"editor.formatOnSave": true,
|
||||
"editor.formatOnPaste": true
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,15 @@
|
||||
// Folder-specific settings
|
||||
//
|
||||
// For a full list of overridable settings, and general information on folder-specific settings,
|
||||
// see the documentation: https://zed.dev/docs/configuring-zed#settings-files
|
||||
{
|
||||
"lsp": {
|
||||
"rust-analyzer": {
|
||||
"initialization_options": {
|
||||
"check": {
|
||||
"command": "clippy", // rust-analyzer.check.command (default: "check")
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
[
|
||||
{
|
||||
"label": "Run Emulator",
|
||||
"command": "cargo run --bin emulator",
|
||||
"use_new_terminal": true,
|
||||
},
|
||||
{
|
||||
"label": "Run Compiler",
|
||||
"command": "cargo run --bin compiler",
|
||||
"use_new_terminal": true,
|
||||
},
|
||||
{
|
||||
"label": "Run Assembler",
|
||||
"command": "cargo run --bin assembler",
|
||||
"use_new_terminal": true,
|
||||
},
|
||||
{
|
||||
"label": "Run Build System (dsx-build)",
|
||||
"command": "cargo run --bin dsx-build",
|
||||
"use_new_terminal": true,
|
||||
},
|
||||
{
|
||||
"label": "Build All (Release)",
|
||||
"command": "cargo build --release",
|
||||
"use_new_terminal": false,
|
||||
},
|
||||
{
|
||||
"label": "Run Tests",
|
||||
"command": "cargo test",
|
||||
"use_new_terminal": true,
|
||||
},
|
||||
{
|
||||
"label": "Profile Emulator with perf",
|
||||
"command": "cargo build --profile profiling; perf record -g -F 999 target/profiling/emulator; perf script -F +pid | save test.perf",
|
||||
"use_new_terminal": true,
|
||||
},
|
||||
]
|
||||
+7
-3
@@ -1,7 +1,7 @@
|
||||
cargo-features = ["codegen-backend"]
|
||||
|
||||
[workspace]
|
||||
members = ["emulator", "common", "assembler", "dsa_editor", "compiler", "c_compiler"]
|
||||
members = ["emulator", "common", "assembler", "dsa_editor", "compiler", "dsx-build"]
|
||||
resolver = "3"
|
||||
|
||||
[workspace.package]
|
||||
@@ -11,7 +11,11 @@ authors = ["zxq5", "nullndvoid"]
|
||||
|
||||
[profile.dev]
|
||||
codegen-backend = "cranelift"
|
||||
panic = "abort" # Cranelift does not support stack unwinds.
|
||||
panic = "abort" # Cranelift does not support stack unwinds.
|
||||
lto = false
|
||||
debug = true
|
||||
incremental = false # sccache does not support caching incremental crates.
|
||||
incremental = false # sccache does not support caching incremental crates.
|
||||
|
||||
[profile.release]
|
||||
debug = true
|
||||
lto = "fat"
|
||||
|
||||
@@ -5,7 +5,7 @@ edition.workspace = true
|
||||
authors.workspace = true
|
||||
|
||||
[[bin]]
|
||||
name = "assembler_runner"
|
||||
name = "assembler"
|
||||
path = "src/main.rs"
|
||||
|
||||
[lib]
|
||||
|
||||
@@ -223,19 +223,31 @@ fn build_shift_instruction(
|
||||
opcode: Opcode,
|
||||
args: &[crate::assembler::model::Token],
|
||||
) -> Result<Instruction, AssembleError> {
|
||||
let Some(reg_token) = args.first() else {
|
||||
let Some(src_reg) = args.first() else {
|
||||
return Err(AssembleError::MissingArgument(0));
|
||||
};
|
||||
let Some(amount_token) = args.get(1) else {
|
||||
let Some(r_shamt) = args.get(1) else {
|
||||
return Err(AssembleError::MissingArgument(0));
|
||||
};
|
||||
let Some(i_shamt) = args.get(2) else {
|
||||
return Err(AssembleError::MissingArgument(1));
|
||||
};
|
||||
let Some(dest_reg) = args.get(3) else {
|
||||
return Err(AssembleError::MissingArgument(1));
|
||||
};
|
||||
|
||||
let reg = expect_token!(reg_token, Register)?;
|
||||
let amount = expect_token!(amount_token, Immediate)? as u8;
|
||||
let src = expect_token!(src_reg, Register)?;
|
||||
let r_shamt = expect_token!(r_shamt, Register)?;
|
||||
let i_shamt = expect_token!(i_shamt, Immediate)? as u8;
|
||||
let dest = expect_token!(dest_reg, Register)?;
|
||||
|
||||
match opcode {
|
||||
Opcode::Shl => Ok(Instruction::ShiftLeft(args!(R, sr1: reg, shamt: amount))),
|
||||
Opcode::Shr => Ok(Instruction::ShiftRight(args!(R, sr1: reg, shamt: amount))),
|
||||
Opcode::Shl => Ok(Instruction::ShiftLeft(
|
||||
args!(R, sr1: src, sr2: r_shamt, shamt: i_shamt, dr: dest),
|
||||
)),
|
||||
Opcode::Shr => Ok(Instruction::ShiftRight(
|
||||
args!(R, sr1: src, sr2: r_shamt, shamt: i_shamt, dr: dest),
|
||||
)),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,7 +4,8 @@ use crate::assembler::model::{Node, Opcode, Symbol, Token};
|
||||
/// Parse DSA assembly code with optional formatting
|
||||
///
|
||||
/// # Examples
|
||||
/// ```
|
||||
/// ```rs
|
||||
/// use assembler::macros::dsa;
|
||||
/// // With formatting:
|
||||
/// let nodes = dsa!(hash, "mov r1, {}", 42)?;
|
||||
///
|
||||
|
||||
@@ -184,11 +184,11 @@ pub enum Token {
|
||||
impl fmt::Display for Token {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::Symbol(symbol) => write!(f, "{}", symbol),
|
||||
Self::Register(register) => write!(f, "{}", register),
|
||||
Self::Immediate(immediate) => write!(f, "{}", immediate),
|
||||
Self::StringLit(string_lit) => write!(f, "{}", string_lit),
|
||||
Self::Opcode(opcode) => write!(f, "{}", opcode),
|
||||
Self::Symbol(symbol) => write!(f, "{symbol}"),
|
||||
Self::Register(register) => write!(f, "{register}",),
|
||||
Self::Immediate(immediate) => write!(f, "{immediate}",),
|
||||
Self::StringLit(string_lit) => write!(f, "{string_lit}",),
|
||||
Self::Opcode(opcode) => write!(f, "{opcode}",),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use crate::assembler::TokenType;
|
||||
use crate::{assembler::AssembleError, expect_token, expect_type, node};
|
||||
|
||||
use crate::assembler::model::{Node, Opcode, Token};
|
||||
@@ -100,6 +101,7 @@ impl Parser {
|
||||
let opcode = expect_token!(self.next()?, Opcode)?;
|
||||
let args: Vec<Token>;
|
||||
|
||||
#[allow(clippy::match_same_arms)]
|
||||
match opcode {
|
||||
// R-type instructions
|
||||
Opcode::Mov | Opcode::Movs => {
|
||||
@@ -112,22 +114,25 @@ impl Parser {
|
||||
let base = expect_type!(self.next()?, Register, Symbol)?;
|
||||
let dest = expect_type!(self.next()?, Register)?;
|
||||
|
||||
let mut offset = Token::Immediate(0);
|
||||
if let Ok(next) = self.peek_next()
|
||||
&& expect_type!(next, Immediate).is_ok() {
|
||||
offset = self.next()?;
|
||||
let offset = match self.peek_next() {
|
||||
Ok(next) if expect_type!(next.clone(), Immediate).is_ok() => {
|
||||
self.next()?
|
||||
}
|
||||
_ => Token::Immediate(0),
|
||||
};
|
||||
|
||||
args = vec![base, dest, offset];
|
||||
}
|
||||
Opcode::Stb | Opcode::Sth | Opcode::Stw => {
|
||||
let base = expect_type!(self.next()?, Register)?;
|
||||
let dest = expect_type!(self.next()?, Register, Symbol)?;
|
||||
let mut offset = Token::Immediate(0);
|
||||
if let Ok(next) = self.peek_next()
|
||||
&& expect_type!(next, Immediate).is_ok() {
|
||||
offset = self.next()?;
|
||||
|
||||
let offset = match self.peek_next() {
|
||||
Ok(next) if expect_type!(next.clone(), Immediate).is_ok() => {
|
||||
self.next()?
|
||||
}
|
||||
_ => Token::Immediate(0),
|
||||
};
|
||||
args = vec![base, dest, offset];
|
||||
}
|
||||
|
||||
@@ -146,15 +151,49 @@ impl Parser {
|
||||
}
|
||||
|
||||
Opcode::Not | Opcode::Cmp => {
|
||||
let reg1 = expect_type!(self.next()?, Register, Symbol)?;
|
||||
let reg2 = expect_type!(self.next()?, Register, Symbol)?;
|
||||
args = vec![reg1, reg2];
|
||||
let src = expect_type!(self.next()?, Register, Symbol)?;
|
||||
let dest = expect_type!(self.next()?, Register, Symbol)?;
|
||||
args = vec![src, dest];
|
||||
}
|
||||
|
||||
Opcode::Shl | Opcode::Shr => {
|
||||
let reg = expect_type!(self.next()?, Register, Symbol)?;
|
||||
let num = expect_type!(self.next()?, Immediate)?;
|
||||
args = vec![reg, num];
|
||||
let src = expect_type!(self.next()?, Register, Symbol)?;
|
||||
|
||||
// First operand after src: could be immediate or register
|
||||
let first = self.next()?;
|
||||
|
||||
let (r_shamt, i_shamt) = match first {
|
||||
Token::Register(_) => (
|
||||
first,
|
||||
if let Ok(tok) = self.peek_next() {
|
||||
if expect_type!(tok, Immediate).is_ok() {
|
||||
self.next()?
|
||||
} else {
|
||||
Token::Immediate(0)
|
||||
}
|
||||
} else {
|
||||
Token::Immediate(0)
|
||||
},
|
||||
),
|
||||
Token::Immediate(_) => (Token::Register(Register::Zero), first),
|
||||
_ => {
|
||||
return Err(AssembleError::UnexpectedToken(
|
||||
first,
|
||||
TokenType::Immediate,
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
let dest = if let Ok(tok) = self.peek_next() {
|
||||
if expect_type!(tok, Register).is_ok() {
|
||||
self.next()?
|
||||
} else {
|
||||
src.clone() // Default to src if no dest specified
|
||||
}
|
||||
} else {
|
||||
src.clone() // Default to src if no dest specified
|
||||
};
|
||||
|
||||
args = vec![src, r_shamt, i_shamt, dest];
|
||||
}
|
||||
|
||||
Opcode::Inc | Opcode::Dec => {
|
||||
|
||||
@@ -24,5 +24,27 @@ pub mod prelude {
|
||||
pub use crate::tooling::project;
|
||||
}
|
||||
|
||||
use std::{fs, path::Path};
|
||||
|
||||
use num_cpus as _;
|
||||
use threadpool as _;
|
||||
|
||||
use crate::prelude::CompilerEngine;
|
||||
|
||||
pub fn assemble_file(input: &str, output: &str) -> Result<(), std::io::Error> {
|
||||
let mut engine = CompilerEngine::new();
|
||||
engine.start_compilation(Path::new(input));
|
||||
let result = engine.wait_for_result().expect("assembler failed.");
|
||||
|
||||
let buffer: Vec<u8> = result
|
||||
.iter()
|
||||
.flat_map(|instruction| instruction.encode().to_be_bytes())
|
||||
.collect();
|
||||
|
||||
if let Err(e) = fs::write(output, buffer) {
|
||||
eprintln!("Failed to write to output file: {e}");
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
+2
-15
@@ -3,6 +3,7 @@ use num_cpus as _;
|
||||
use threadpool as _;
|
||||
|
||||
use assembler::{
|
||||
assemble_file,
|
||||
prelude::*,
|
||||
tooling::{brainf, project},
|
||||
};
|
||||
@@ -46,19 +47,5 @@ fn main() {
|
||||
|
||||
let input_path = &args[2];
|
||||
let output_path = &args[4];
|
||||
let src = PathBuf::from(input_path);
|
||||
|
||||
// Initialize the compiler engine
|
||||
let mut compiler = CompilerEngine::new();
|
||||
compiler.start_compilation(&src);
|
||||
|
||||
// Or block until done
|
||||
let result = compiler.wait_for_result().unwrap();
|
||||
|
||||
for instruction in result {
|
||||
if let Err(e) = fs::write(output_path, instruction.encode().to_be_bytes()) {
|
||||
eprintln!("Failed to write to output file: {e}");
|
||||
std::process::exit(1);
|
||||
}
|
||||
}
|
||||
assemble_file(input_path, output_path).unwrap();
|
||||
}
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
[package]
|
||||
name = "c_compiler"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
authors.workspace = true
|
||||
|
||||
[dependencies]
|
||||
chrono = "0.4.42"
|
||||
@@ -1,14 +0,0 @@
|
||||
int var_x = 5;
|
||||
|
||||
int factorial(int n) {
|
||||
if (n <= 1) {
|
||||
return 1;
|
||||
}
|
||||
return n * factorial(n - 1);
|
||||
}
|
||||
|
||||
int main() {
|
||||
int result = var_x + factorial(5);
|
||||
print(result);
|
||||
return 0;
|
||||
}
|
||||
@@ -1,926 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Simple C to DSA Assembly Compiler
|
||||
Supports a subset of C including:
|
||||
- int variables and functions
|
||||
- Arithmetic operations (+, -, *, /)
|
||||
- Comparisons (==, !=, <, >, <=, >=)
|
||||
- If/else statements
|
||||
- While loops
|
||||
- Function calls
|
||||
- Return statements
|
||||
"""
|
||||
|
||||
import re
|
||||
import sys
|
||||
from typing import List, Dict, Optional, Tuple
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from pprint import pprint
|
||||
import json
|
||||
|
||||
|
||||
class TokenType(Enum):
|
||||
# Keywords
|
||||
INT = "int"
|
||||
IF = "if"
|
||||
ELSE = "else"
|
||||
WHILE = "while"
|
||||
RETURN = "return"
|
||||
|
||||
# Identifiers and literals
|
||||
IDENTIFIER = "IDENTIFIER"
|
||||
NUMBER = "NUMBER"
|
||||
|
||||
# Operators
|
||||
PLUS = "+"
|
||||
MINUS = "-"
|
||||
STAR = "*"
|
||||
SLASH = "/"
|
||||
ASSIGN = "="
|
||||
EQ = "=="
|
||||
NE = "!="
|
||||
LT = "<"
|
||||
GT = ">"
|
||||
LE = "<="
|
||||
GE = ">="
|
||||
|
||||
# Delimiters
|
||||
LPAREN = "("
|
||||
RPAREN = ")"
|
||||
LBRACE = "{"
|
||||
RBRACE = "}"
|
||||
SEMICOLON = ";"
|
||||
COMMA = ","
|
||||
|
||||
EOF = "EOF"
|
||||
|
||||
|
||||
@dataclass
|
||||
class Token:
|
||||
type: TokenType
|
||||
value: str
|
||||
line: int
|
||||
col: int
|
||||
|
||||
|
||||
class Lexer:
|
||||
def __init__(self, source: str):
|
||||
self.source = source
|
||||
self.pos = 0
|
||||
self.line = 1
|
||||
self.col = 1
|
||||
self.tokens = []
|
||||
|
||||
def error(self, msg: str):
|
||||
raise SyntaxError(f"Lexer error at line {self.line}, col {self.col}: {msg}")
|
||||
|
||||
def peek(self, offset: int = 0) -> Optional[str]:
|
||||
pos = self.pos + offset
|
||||
return self.source[pos] if pos < len(self.source) else None
|
||||
|
||||
def advance(self) -> Optional[str]:
|
||||
if self.pos >= len(self.source):
|
||||
return None
|
||||
char = self.source[self.pos]
|
||||
self.pos += 1
|
||||
if char == "\n":
|
||||
self.line += 1
|
||||
self.col = 1
|
||||
else:
|
||||
self.col += 1
|
||||
return char
|
||||
|
||||
def skip_whitespace(self):
|
||||
while self.peek() and self.peek() in " \t\n\r":
|
||||
self.advance()
|
||||
|
||||
def skip_comment(self):
|
||||
if self.peek() == "/" and self.peek(1) == "/":
|
||||
while self.peek() and self.peek() != "\n":
|
||||
self.advance()
|
||||
self.advance() # skip newline
|
||||
|
||||
def read_number(self) -> str:
|
||||
num = ""
|
||||
while self.peek() and self.peek().isdigit():
|
||||
num += self.advance()
|
||||
return num
|
||||
|
||||
def read_identifier(self) -> str:
|
||||
ident = ""
|
||||
while self.peek() and (self.peek().isalnum() or self.peek() == "_"):
|
||||
ident += self.advance()
|
||||
return ident
|
||||
|
||||
def tokenize(self) -> List[Token]:
|
||||
keywords = {
|
||||
"int": TokenType.INT,
|
||||
"if": TokenType.IF,
|
||||
"else": TokenType.ELSE,
|
||||
"while": TokenType.WHILE,
|
||||
"return": TokenType.RETURN,
|
||||
}
|
||||
|
||||
while self.pos < len(self.source):
|
||||
self.skip_whitespace()
|
||||
self.skip_comment()
|
||||
|
||||
if self.pos >= len(self.source):
|
||||
break
|
||||
|
||||
line, col = self.line, self.col
|
||||
char = self.peek()
|
||||
|
||||
# Numbers
|
||||
if char.isdigit():
|
||||
num = self.read_number()
|
||||
self.tokens.append(Token(TokenType.NUMBER, num, line, col))
|
||||
|
||||
# Identifiers and keywords
|
||||
elif char.isalpha() or char == "_":
|
||||
ident = self.read_identifier()
|
||||
token_type = keywords.get(ident, TokenType.IDENTIFIER)
|
||||
self.tokens.append(Token(token_type, ident, line, col))
|
||||
|
||||
# Two-character operators
|
||||
elif char == "=" and self.peek(1) == "=":
|
||||
self.advance()
|
||||
self.advance()
|
||||
self.tokens.append(Token(TokenType.EQ, "==", line, col))
|
||||
elif char == "!" and self.peek(1) == "=":
|
||||
self.advance()
|
||||
self.advance()
|
||||
self.tokens.append(Token(TokenType.NE, "!=", line, col))
|
||||
elif char == "<" and self.peek(1) == "=":
|
||||
self.advance()
|
||||
self.advance()
|
||||
self.tokens.append(Token(TokenType.LE, "<=", line, col))
|
||||
elif char == ">" and self.peek(1) == "=":
|
||||
self.advance()
|
||||
self.advance()
|
||||
self.tokens.append(Token(TokenType.GE, ">=", line, col))
|
||||
|
||||
# Single-character operators
|
||||
elif char == "+":
|
||||
self.advance()
|
||||
self.tokens.append(Token(TokenType.PLUS, "+", line, col))
|
||||
elif char == "-":
|
||||
self.advance()
|
||||
self.tokens.append(Token(TokenType.MINUS, "-", line, col))
|
||||
elif char == "*":
|
||||
self.advance()
|
||||
self.tokens.append(Token(TokenType.STAR, "*", line, col))
|
||||
elif char == "/":
|
||||
self.advance()
|
||||
self.tokens.append(Token(TokenType.SLASH, "/", line, col))
|
||||
elif char == "=":
|
||||
self.advance()
|
||||
self.tokens.append(Token(TokenType.ASSIGN, "=", line, col))
|
||||
elif char == "<":
|
||||
self.advance()
|
||||
self.tokens.append(Token(TokenType.LT, "<", line, col))
|
||||
elif char == ">":
|
||||
self.advance()
|
||||
self.tokens.append(Token(TokenType.GT, ">", line, col))
|
||||
elif char == "(":
|
||||
self.advance()
|
||||
self.tokens.append(Token(TokenType.LPAREN, "(", line, col))
|
||||
elif char == ")":
|
||||
self.advance()
|
||||
self.tokens.append(Token(TokenType.RPAREN, ")", line, col))
|
||||
elif char == "{":
|
||||
self.advance()
|
||||
self.tokens.append(Token(TokenType.LBRACE, "{", line, col))
|
||||
elif char == "}":
|
||||
self.advance()
|
||||
self.tokens.append(Token(TokenType.RBRACE, "}", line, col))
|
||||
elif char == ";":
|
||||
self.advance()
|
||||
self.tokens.append(Token(TokenType.SEMICOLON, ";", line, col))
|
||||
elif char == ",":
|
||||
self.advance()
|
||||
self.tokens.append(Token(TokenType.COMMA, ",", line, col))
|
||||
else:
|
||||
self.error(f"Unexpected character: {char}")
|
||||
|
||||
self.tokens.append(Token(TokenType.EOF, "", self.line, self.col))
|
||||
return self.tokens
|
||||
|
||||
|
||||
# AST Node classes
|
||||
@dataclass
|
||||
class ASTNode:
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class Program(ASTNode):
|
||||
declarations: List["Declaration"]
|
||||
|
||||
|
||||
@dataclass
|
||||
class Declaration(ASTNode):
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class FunctionDecl(Declaration):
|
||||
name: str
|
||||
params: List[str]
|
||||
body: "CompoundStmt"
|
||||
|
||||
|
||||
@dataclass
|
||||
class VarDecl(Declaration):
|
||||
name: str
|
||||
init: Optional["Expression"] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class Statement(ASTNode):
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class CompoundStmt(Statement):
|
||||
statements: List[Statement]
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExprStmt(Statement):
|
||||
expr: Optional["Expression"]
|
||||
|
||||
|
||||
@dataclass
|
||||
class IfStmt(Statement):
|
||||
condition: "Expression"
|
||||
then_stmt: Statement
|
||||
else_stmt: Optional[Statement] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class WhileStmt(Statement):
|
||||
condition: "Expression"
|
||||
body: Statement
|
||||
|
||||
|
||||
@dataclass
|
||||
class ReturnStmt(Statement):
|
||||
expr: Optional["Expression"]
|
||||
|
||||
|
||||
@dataclass
|
||||
class Expression(ASTNode):
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class BinaryOp(Expression):
|
||||
op: str
|
||||
left: Expression
|
||||
right: Expression
|
||||
|
||||
|
||||
@dataclass
|
||||
class UnaryOp(Expression):
|
||||
op: str
|
||||
operand: Expression
|
||||
|
||||
|
||||
@dataclass
|
||||
class AssignExpr(Expression):
|
||||
name: str
|
||||
value: Expression
|
||||
|
||||
|
||||
@dataclass
|
||||
class VarExpr(Expression):
|
||||
name: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class NumberExpr(Expression):
|
||||
value: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class CallExpr(Expression):
|
||||
name: str
|
||||
args: List[Expression]
|
||||
|
||||
|
||||
class Parser:
|
||||
def __init__(self, tokens: List[Token]):
|
||||
self.tokens = tokens
|
||||
self.pos = 0
|
||||
|
||||
def error(self, msg: str):
|
||||
token = self.current()
|
||||
raise SyntaxError(f"Parser error at line {token.line}, col {token.col}: {msg}")
|
||||
|
||||
def current(self) -> Token:
|
||||
return self.tokens[self.pos] if self.pos < len(self.tokens) else self.tokens[-1]
|
||||
|
||||
def peek(self, offset: int = 0) -> Token:
|
||||
pos = self.pos + offset
|
||||
return self.tokens[pos] if pos < len(self.tokens) else self.tokens[-1]
|
||||
|
||||
def advance(self) -> Token:
|
||||
token = self.current()
|
||||
if self.pos < len(self.tokens) - 1:
|
||||
self.pos += 1
|
||||
return token
|
||||
|
||||
def expect(self, token_type: TokenType) -> Token:
|
||||
token = self.current()
|
||||
if token.type != token_type:
|
||||
self.error(f"Expected {token_type.value}, got {token.type.value}")
|
||||
return self.advance()
|
||||
|
||||
def parse(self) -> Program:
|
||||
declarations = []
|
||||
while self.current().type != TokenType.EOF:
|
||||
declarations.append(self.parse_declaration())
|
||||
return Program(declarations)
|
||||
|
||||
def parse_declaration(self) -> Declaration:
|
||||
self.expect(TokenType.INT)
|
||||
name = self.expect(TokenType.IDENTIFIER).value
|
||||
|
||||
if self.current().type == TokenType.LPAREN:
|
||||
# Function declaration
|
||||
self.advance()
|
||||
params = []
|
||||
|
||||
if self.current().type != TokenType.RPAREN:
|
||||
self.expect(TokenType.INT)
|
||||
params.append(self.expect(TokenType.IDENTIFIER).value)
|
||||
|
||||
while self.current().type == TokenType.COMMA:
|
||||
self.advance()
|
||||
self.expect(TokenType.INT)
|
||||
params.append(self.expect(TokenType.IDENTIFIER).value)
|
||||
|
||||
self.expect(TokenType.RPAREN)
|
||||
body = self.parse_compound_stmt()
|
||||
return FunctionDecl(name, params, body)
|
||||
else:
|
||||
# Variable declaration
|
||||
init = None
|
||||
if self.current().type == TokenType.ASSIGN:
|
||||
self.advance()
|
||||
init = self.parse_expression()
|
||||
self.expect(TokenType.SEMICOLON)
|
||||
return VarDecl(name, init)
|
||||
|
||||
def parse_compound_stmt(self) -> CompoundStmt:
|
||||
self.expect(TokenType.LBRACE)
|
||||
statements = []
|
||||
|
||||
while self.current().type != TokenType.RBRACE:
|
||||
statements.append(self.parse_statement())
|
||||
|
||||
self.expect(TokenType.RBRACE)
|
||||
return CompoundStmt(statements)
|
||||
|
||||
def parse_statement(self) -> Statement:
|
||||
token = self.current()
|
||||
|
||||
if token.type == TokenType.LBRACE:
|
||||
return self.parse_compound_stmt()
|
||||
elif token.type == TokenType.IF:
|
||||
return self.parse_if_stmt()
|
||||
elif token.type == TokenType.WHILE:
|
||||
return self.parse_while_stmt()
|
||||
elif token.type == TokenType.RETURN:
|
||||
return self.parse_return_stmt()
|
||||
elif token.type == TokenType.INT:
|
||||
# Local variable declaration
|
||||
self.advance()
|
||||
name = self.expect(TokenType.IDENTIFIER).value
|
||||
init = None
|
||||
if self.current().type == TokenType.ASSIGN:
|
||||
self.advance()
|
||||
init = self.parse_expression()
|
||||
self.expect(TokenType.SEMICOLON)
|
||||
return ExprStmt(AssignExpr(name, init) if init else None)
|
||||
else:
|
||||
expr = (
|
||||
self.parse_expression()
|
||||
if self.current().type != TokenType.SEMICOLON
|
||||
else None
|
||||
)
|
||||
self.expect(TokenType.SEMICOLON)
|
||||
return ExprStmt(expr)
|
||||
|
||||
def parse_if_stmt(self) -> IfStmt:
|
||||
self.expect(TokenType.IF)
|
||||
self.expect(TokenType.LPAREN)
|
||||
condition = self.parse_expression()
|
||||
self.expect(TokenType.RPAREN)
|
||||
then_stmt = self.parse_statement()
|
||||
|
||||
else_stmt = None
|
||||
if self.current().type == TokenType.ELSE:
|
||||
self.advance()
|
||||
else_stmt = self.parse_statement()
|
||||
|
||||
return IfStmt(condition, then_stmt, else_stmt)
|
||||
|
||||
def parse_while_stmt(self) -> WhileStmt:
|
||||
self.expect(TokenType.WHILE)
|
||||
self.expect(TokenType.LPAREN)
|
||||
condition = self.parse_expression()
|
||||
self.expect(TokenType.RPAREN)
|
||||
body = self.parse_statement()
|
||||
return WhileStmt(condition, body)
|
||||
|
||||
def parse_return_stmt(self) -> ReturnStmt:
|
||||
self.expect(TokenType.RETURN)
|
||||
expr = None
|
||||
if self.current().type != TokenType.SEMICOLON:
|
||||
expr = self.parse_expression()
|
||||
self.expect(TokenType.SEMICOLON)
|
||||
return ReturnStmt(expr)
|
||||
|
||||
def parse_expression(self) -> Expression:
|
||||
return self.parse_assignment()
|
||||
|
||||
def parse_assignment(self) -> Expression:
|
||||
expr = self.parse_comparison()
|
||||
|
||||
if self.current().type == TokenType.ASSIGN:
|
||||
if not isinstance(expr, VarExpr):
|
||||
self.error("Invalid assignment target")
|
||||
self.advance()
|
||||
value = self.parse_assignment()
|
||||
return AssignExpr(expr.name, value)
|
||||
|
||||
return expr
|
||||
|
||||
def parse_comparison(self) -> Expression:
|
||||
expr = self.parse_additive()
|
||||
|
||||
while self.current().type in [
|
||||
TokenType.EQ,
|
||||
TokenType.NE,
|
||||
TokenType.LT,
|
||||
TokenType.GT,
|
||||
TokenType.LE,
|
||||
TokenType.GE,
|
||||
]:
|
||||
op = self.advance().value
|
||||
right = self.parse_additive()
|
||||
expr = BinaryOp(op, expr, right)
|
||||
|
||||
return expr
|
||||
|
||||
def parse_additive(self) -> Expression:
|
||||
expr = self.parse_multiplicative()
|
||||
|
||||
while self.current().type in [TokenType.PLUS, TokenType.MINUS]:
|
||||
op = self.advance().value
|
||||
right = self.parse_multiplicative()
|
||||
expr = BinaryOp(op, expr, right)
|
||||
|
||||
return expr
|
||||
|
||||
def parse_multiplicative(self) -> Expression:
|
||||
expr = self.parse_unary()
|
||||
|
||||
while self.current().type in [TokenType.STAR, TokenType.SLASH]:
|
||||
op = self.advance().value
|
||||
right = self.parse_unary()
|
||||
expr = BinaryOp(op, expr, right)
|
||||
|
||||
return expr
|
||||
|
||||
def parse_unary(self) -> Expression:
|
||||
if self.current().type in [TokenType.PLUS, TokenType.MINUS]:
|
||||
op = self.advance().value
|
||||
operand = self.parse_unary()
|
||||
return UnaryOp(op, operand)
|
||||
|
||||
return self.parse_primary()
|
||||
|
||||
def parse_primary(self) -> Expression:
|
||||
token = self.current()
|
||||
|
||||
if token.type == TokenType.NUMBER:
|
||||
self.advance()
|
||||
return NumberExpr(int(token.value))
|
||||
|
||||
elif token.type == TokenType.IDENTIFIER:
|
||||
name = self.advance().value
|
||||
|
||||
if self.current().type == TokenType.LPAREN:
|
||||
# Function call
|
||||
self.advance()
|
||||
args = []
|
||||
|
||||
if self.current().type != TokenType.RPAREN:
|
||||
args.append(self.parse_expression())
|
||||
while self.current().type == TokenType.COMMA:
|
||||
self.advance()
|
||||
args.append(self.parse_expression())
|
||||
|
||||
self.expect(TokenType.RPAREN)
|
||||
return CallExpr(name, args)
|
||||
else:
|
||||
return VarExpr(name)
|
||||
|
||||
elif token.type == TokenType.LPAREN:
|
||||
self.advance()
|
||||
expr = self.parse_expression()
|
||||
self.expect(TokenType.RPAREN)
|
||||
return expr
|
||||
|
||||
else:
|
||||
self.error(f"Unexpected token: {token.type.value}")
|
||||
|
||||
|
||||
class CodeGenerator:
|
||||
def __init__(self):
|
||||
self.output = []
|
||||
self.label_counter = 0
|
||||
self.string_counter = 0
|
||||
self.functions = {}
|
||||
self.current_function = None
|
||||
self.local_vars = {}
|
||||
self.global_vars = {}
|
||||
self.register_pool = [f"rg{i:x}" for i in range(16)]
|
||||
self.used_registers = set()
|
||||
|
||||
def new_label(self, prefix: str = "L") -> str:
|
||||
label = f"{prefix}{self.label_counter}"
|
||||
self.label_counter += 1
|
||||
return label
|
||||
|
||||
def allocate_register(self) -> str:
|
||||
for reg in self.register_pool:
|
||||
if reg not in self.used_registers:
|
||||
self.used_registers.add(reg)
|
||||
return reg
|
||||
raise RuntimeError("Out of registers")
|
||||
|
||||
def free_register(self, reg: str):
|
||||
self.used_registers.discard(reg)
|
||||
|
||||
def emit(self, code: str):
|
||||
self.output.append(code)
|
||||
|
||||
def generate(self, program: Program) -> str:
|
||||
# Emit data section
|
||||
self.emit("// Global variables")
|
||||
for decl in program.declarations:
|
||||
if isinstance(decl, VarDecl):
|
||||
self.global_vars[decl.name] = f"var_{decl.name}"
|
||||
if decl.init:
|
||||
if isinstance(decl.init, NumberExpr):
|
||||
self.emit(f"dw var_{decl.name}: {decl.init.value}")
|
||||
else:
|
||||
self.emit(f"dw var_{decl.name}: 0")
|
||||
else:
|
||||
self.emit(f"dw var_{decl.name}: 0")
|
||||
|
||||
self.emit("")
|
||||
self.emit("// Entry point")
|
||||
self.emit("dw stack_bottom: 0x10000")
|
||||
self.emit("")
|
||||
self.emit("init:")
|
||||
self.emit(" ldw stack_bottom, spr")
|
||||
self.emit(" mov spr, bpr")
|
||||
|
||||
self.emit(" push zero")
|
||||
self.emit(" call main")
|
||||
self.emit(" pop rg0")
|
||||
self.emit(" hlt")
|
||||
self.emit("")
|
||||
|
||||
# Emit functions
|
||||
for decl in program.declarations:
|
||||
if isinstance(decl, FunctionDecl):
|
||||
self.generate_function(decl)
|
||||
|
||||
return "\n".join(self.output)
|
||||
|
||||
def generate_function(self, func: FunctionDecl):
|
||||
self.current_function = func.name
|
||||
self.functions[func.name] = func
|
||||
self.local_vars = {}
|
||||
|
||||
# Map parameters to stack offsets
|
||||
# Parameters start at bpr+8 (after return addr at bpr+4)
|
||||
for i, param in enumerate(func.params):
|
||||
self.local_vars[param] = 8 + (i * 4)
|
||||
|
||||
self.emit(f"{func.name}:")
|
||||
self.emit(" push bpr")
|
||||
self.emit(" mov spr, bpr")
|
||||
self.emit("")
|
||||
|
||||
# Generate function body
|
||||
self.generate_compound_stmt(func.body)
|
||||
|
||||
# Default return if no explicit return
|
||||
self.emit("// default return")
|
||||
self.emit(f"{func.name}_end:")
|
||||
self.emit(" mov bpr, spr")
|
||||
self.emit(" pop bpr")
|
||||
self.emit(" return")
|
||||
self.emit("")
|
||||
|
||||
def generate_compound_stmt(self, stmt: CompoundStmt):
|
||||
for s in stmt.statements:
|
||||
self.generate_statement(s)
|
||||
|
||||
def generate_statement(self, stmt: Statement):
|
||||
if isinstance(stmt, CompoundStmt):
|
||||
self.generate_compound_stmt(stmt)
|
||||
elif isinstance(stmt, ExprStmt):
|
||||
if stmt.expr:
|
||||
reg = self.generate_expression(stmt.expr)
|
||||
self.free_register(reg)
|
||||
elif isinstance(stmt, IfStmt):
|
||||
self.generate_if_stmt(stmt)
|
||||
elif isinstance(stmt, WhileStmt):
|
||||
self.generate_while_stmt(stmt)
|
||||
elif isinstance(stmt, ReturnStmt):
|
||||
self.generate_return_stmt(stmt)
|
||||
|
||||
def generate_if_stmt(self, stmt: IfStmt):
|
||||
else_label = self.new_label("else")
|
||||
end_label = self.new_label("endif")
|
||||
|
||||
# Evaluate condition
|
||||
cond_reg = self.generate_expression(stmt.condition)
|
||||
self.emit(f" cmp {cond_reg}, zero")
|
||||
self.free_register(cond_reg)
|
||||
|
||||
if stmt.else_stmt:
|
||||
self.emit(f" jeq {else_label}")
|
||||
else:
|
||||
self.emit(f" jeq {end_label}")
|
||||
|
||||
# Then branch
|
||||
self.generate_statement(stmt.then_stmt)
|
||||
|
||||
if stmt.else_stmt:
|
||||
self.emit(f" jmp {end_label}")
|
||||
self.emit(f"{else_label}:")
|
||||
self.generate_statement(stmt.else_stmt)
|
||||
|
||||
self.emit(f"{end_label}:")
|
||||
|
||||
def generate_while_stmt(self, stmt: WhileStmt):
|
||||
start_label = self.new_label("while_start")
|
||||
end_label = self.new_label("while_end")
|
||||
|
||||
self.emit(f"{start_label}:")
|
||||
|
||||
# Evaluate condition
|
||||
cond_reg = self.generate_expression(stmt.condition)
|
||||
self.emit(f" cmp {cond_reg}, zero")
|
||||
self.free_register(cond_reg)
|
||||
self.emit(f" jeq {end_label}")
|
||||
|
||||
# Loop body
|
||||
self.generate_statement(stmt.body)
|
||||
self.emit(f" jmp {start_label}")
|
||||
|
||||
self.emit(f"{end_label}:")
|
||||
|
||||
def generate_return_stmt(self, stmt: ReturnStmt):
|
||||
if stmt.expr:
|
||||
reg = self.generate_expression(stmt.expr)
|
||||
# Store return value at spr+8 according to calling convention
|
||||
self.emit(f" stw {reg}, spr, 8")
|
||||
self.free_register(reg)
|
||||
self.emit(f" jmp {self.current_function}_end")
|
||||
|
||||
def generate_expression(self, expr: Expression) -> str:
|
||||
if isinstance(expr, NumberExpr):
|
||||
reg = self.allocate_register()
|
||||
if expr.value <= 0xFFFF and expr.value >= 0:
|
||||
self.emit(f" lli {expr.value}, {reg}")
|
||||
if expr.value > 0xFF:
|
||||
self.emit(f" lui {expr.value >> 16}, {reg}")
|
||||
else:
|
||||
self.emit(f" lli {expr.value & 0xFFFF}, {reg}")
|
||||
self.emit(f" lui {(expr.value >> 16) & 0xFFFF}, {reg}")
|
||||
return reg
|
||||
|
||||
elif isinstance(expr, VarExpr):
|
||||
reg = self.allocate_register()
|
||||
if expr.name in self.local_vars:
|
||||
offset = self.local_vars[expr.name]
|
||||
self.emit(f" ldw bpr, {reg}, {offset}")
|
||||
elif expr.name in self.global_vars:
|
||||
label = self.global_vars[expr.name]
|
||||
self.emit(f" ldw {label}, {reg}")
|
||||
else:
|
||||
raise RuntimeError(f"Undefined variable: {expr.name}")
|
||||
return reg
|
||||
|
||||
elif isinstance(expr, AssignExpr):
|
||||
value_reg = self.generate_expression(expr.value)
|
||||
|
||||
if expr.name in self.local_vars:
|
||||
offset = self.local_vars[expr.name]
|
||||
self.emit(f" stw {value_reg}, bpr, {offset}")
|
||||
elif expr.name in self.global_vars:
|
||||
label = self.global_vars[expr.name]
|
||||
self.emit(f" stw {value_reg}, {label}")
|
||||
else:
|
||||
# New local variable - allocate after params and return value space
|
||||
# Start local variables at offset -4 from bpr (growing downward)
|
||||
offset = -(len([v for v in self.local_vars.values() if v < 0]) + 1) * 4
|
||||
self.local_vars[expr.name] = offset
|
||||
self.emit(f" stw {value_reg}, bpr, {offset}")
|
||||
|
||||
return value_reg
|
||||
|
||||
elif isinstance(expr, BinaryOp):
|
||||
return self.generate_binary_op(expr)
|
||||
|
||||
elif isinstance(expr, UnaryOp):
|
||||
operand_reg = self.generate_expression(expr.operand)
|
||||
result_reg = self.allocate_register()
|
||||
|
||||
if expr.op == "-":
|
||||
self.emit(f" lwi 0, {result_reg}")
|
||||
self.emit(f" sub {result_reg}, {operand_reg}, {result_reg}")
|
||||
else: # +
|
||||
self.emit(f" mov {operand_reg}, {result_reg}")
|
||||
|
||||
self.free_register(operand_reg)
|
||||
return result_reg
|
||||
|
||||
elif isinstance(expr, CallExpr):
|
||||
# First, make space for return value (must be pushed BEFORE arguments)
|
||||
temp_reg = self.allocate_register()
|
||||
|
||||
# Then push arguments in reverse order
|
||||
arg_regs = []
|
||||
for arg in reversed(expr.args):
|
||||
reg = self.generate_expression(arg)
|
||||
self.emit(f" push {reg}")
|
||||
arg_regs.append(reg)
|
||||
|
||||
# Call function
|
||||
self.emit(f" call {expr.name}")
|
||||
|
||||
# Get return value (it's now on top of stack)
|
||||
self.emit(f" pop {temp_reg}")
|
||||
|
||||
# Clean up remaining args
|
||||
for i in range(len(arg_regs) - 1):
|
||||
self.emit(f" pop zero")
|
||||
|
||||
# Free the arg registers
|
||||
for reg in arg_regs:
|
||||
self.free_register(reg)
|
||||
|
||||
return temp_reg
|
||||
|
||||
else:
|
||||
raise RuntimeError(f"Unknown expression type: {type(expr)}")
|
||||
|
||||
def generate_binary_op(self, expr: BinaryOp) -> str:
|
||||
# For operations that might contain function calls, we need to be careful
|
||||
# about register allocation. Evaluate left, save it, evaluate right.
|
||||
left_reg = self.generate_expression(expr.left)
|
||||
|
||||
# If right side contains a function call, we need to save left_reg
|
||||
# For now, always save to be safe
|
||||
saved_reg = self.allocate_register()
|
||||
self.emit(f" mov {left_reg}, {saved_reg}")
|
||||
self.free_register(left_reg)
|
||||
|
||||
right_reg = self.generate_expression(expr.right)
|
||||
result_reg = self.allocate_register()
|
||||
|
||||
if expr.op == "+":
|
||||
self.emit(f" add {left_reg}, {right_reg}, {result_reg}")
|
||||
elif expr.op == "-":
|
||||
self.emit(f" sub {left_reg}, {right_reg}, {result_reg}")
|
||||
elif expr.op == "*":
|
||||
# Simple multiplication using loop
|
||||
temp_label = self.new_label("mult")
|
||||
end_label = self.new_label("mult_end")
|
||||
self.emit(f" lli 0, {result_reg}")
|
||||
self.emit(f"{temp_label}:")
|
||||
self.emit(f" cmp {right_reg}, zero")
|
||||
self.emit(f" jeq {end_label}")
|
||||
self.emit(f" add {result_reg}, {left_reg}, {result_reg}")
|
||||
self.emit(f" dec {right_reg}")
|
||||
self.emit(f" jmp {temp_label}")
|
||||
self.emit(f"{end_label}:")
|
||||
elif expr.op == "/":
|
||||
# Simple division using loop
|
||||
temp_label = self.new_label("div")
|
||||
end_label = self.new_label("div_end")
|
||||
self.emit(f" lli 0, {result_reg}")
|
||||
self.emit(f"{temp_label}:")
|
||||
self.emit(f" cmp {left_reg}, {right_reg}")
|
||||
self.emit(f" jlt {end_label}")
|
||||
self.emit(f" sub {left_reg}, {right_reg}, {left_reg}")
|
||||
self.emit(f" inc {result_reg}")
|
||||
self.emit(f" jmp {temp_label}")
|
||||
self.emit(f"{end_label}:")
|
||||
elif expr.op in ["==", "!=", "<", ">", "<=", ">="]:
|
||||
self.emit(f" cmp {left_reg}, {right_reg}")
|
||||
|
||||
# Result is 1 if condition true, 0 otherwise
|
||||
self.emit(f" lli 0, {result_reg}")
|
||||
true_label = self.new_label("cmp_true")
|
||||
end_label = self.new_label("cmp_end")
|
||||
|
||||
if expr.op == "==":
|
||||
self.emit(f" jeq {true_label}")
|
||||
elif expr.op == "!=":
|
||||
self.emit(f" jne {true_label}")
|
||||
elif expr.op == "<":
|
||||
self.emit(f" jlt {true_label}")
|
||||
elif expr.op == ">":
|
||||
self.emit(f" jgt {true_label}")
|
||||
elif expr.op == "<=":
|
||||
self.emit(f" jle {true_label}")
|
||||
elif expr.op == ">=":
|
||||
self.emit(f" jge {true_label}")
|
||||
|
||||
self.emit(f" jmp {end_label}")
|
||||
self.emit(f"{true_label}:")
|
||||
self.emit(f" lli 1, {result_reg}")
|
||||
self.emit(f"{end_label}:")
|
||||
|
||||
self.free_register(left_reg)
|
||||
self.free_register(right_reg)
|
||||
return result_reg
|
||||
|
||||
|
||||
def compile_c_to_asm(source: str) -> str:
|
||||
"""Compile C source code to DSA assembly."""
|
||||
lexer = Lexer(source)
|
||||
tokens = lexer.tokenize()
|
||||
|
||||
parser = Parser(tokens)
|
||||
ast = parser.parse()
|
||||
|
||||
codegen = CodeGenerator()
|
||||
assembly = codegen.generate(ast)
|
||||
|
||||
return assembly
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python compiler.py <input.c> [output.dsa]")
|
||||
sys.exit(1)
|
||||
|
||||
input_file = sys.argv[1]
|
||||
output_file = sys.argv[2] if len(sys.argv) > 2 else input_file.replace(".c", ".dsa")
|
||||
|
||||
with open(input_file, "r") as f:
|
||||
source = f.read()
|
||||
|
||||
try:
|
||||
assembly = compile_c_to_asm(source)
|
||||
|
||||
with open(output_file, "w") as f:
|
||||
f.write(assembly)
|
||||
|
||||
print(f"Successfully compiled {input_file} to {output_file}")
|
||||
except (SyntaxError, RuntimeError) as e:
|
||||
print(f"Compilation error: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
# # Example usage
|
||||
# if len(sys.argv) > 1:
|
||||
# example_c = sys.argv[1]
|
||||
|
||||
# else:
|
||||
# example_c = """
|
||||
# int factorial(int n) {
|
||||
# if (n <= 1) {
|
||||
# return 1;
|
||||
# }
|
||||
# return n * factorial(n - 1);
|
||||
# }
|
||||
|
||||
# int main() {
|
||||
# int result;
|
||||
# result = factorial(5);
|
||||
# return result;
|
||||
# }
|
||||
# """
|
||||
|
||||
# print("Example C program:")
|
||||
# print(example_c)
|
||||
# print("\n" + "="*60 + "\n")
|
||||
# print("Generated DSA assembly:")
|
||||
# print(compile_c_to_asm(example_c))
|
||||
@@ -1,25 +0,0 @@
|
||||
include print: "lib/io/print.dsa"
|
||||
|
||||
int factorial(int n) {
|
||||
if (n <= 1) {
|
||||
return 1;
|
||||
}
|
||||
return n * factorial(n - 1);
|
||||
}
|
||||
|
||||
int add_(int a, int b) {
|
||||
return a + b;
|
||||
}
|
||||
|
||||
int greater(int a, int b) {
|
||||
if (a + a > b + b) {
|
||||
return a;
|
||||
} else {
|
||||
return b + a;
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
printnum(-5);
|
||||
return 0;
|
||||
}
|
||||
@@ -1,5 +0,0 @@
|
||||
// Imports
|
||||
include maths: "./lib/maths/core.dsa"
|
||||
|
||||
// Reserved Memory
|
||||
|
||||
@@ -1,106 +0,0 @@
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
#[non_exhaustive]
|
||||
pub enum Register {
|
||||
// general purpose registers
|
||||
Rg0,
|
||||
Rg1,
|
||||
Rg2,
|
||||
Rg3,
|
||||
Rg4,
|
||||
Rg5,
|
||||
Rg6,
|
||||
Rg7,
|
||||
Rg8,
|
||||
Rg9,
|
||||
Rga,
|
||||
Rgb,
|
||||
Rgc,
|
||||
Rgd,
|
||||
Rge,
|
||||
Rgf,
|
||||
|
||||
// special purpose registers
|
||||
Acc,
|
||||
Spr,
|
||||
Bpr,
|
||||
Ret,
|
||||
Idr,
|
||||
Mmr,
|
||||
Zero,
|
||||
NoReg,
|
||||
|
||||
// system registers - can't be written to by instructions.
|
||||
Mar,
|
||||
Mdr,
|
||||
Sts,
|
||||
Cir,
|
||||
Pcx,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
#[repr(u8)]
|
||||
#[non_exhaustive]
|
||||
/// A list of all current instructions in the DSA Assembly language.
|
||||
pub enum Instruction {
|
||||
// No-op
|
||||
Nop = 0x0,
|
||||
|
||||
// Data transfer instructions
|
||||
Mov(Register, Register) = 0x1,
|
||||
Movs(Register, Register) = 0x2,
|
||||
|
||||
Ldb(Register, Register, Option<u32>) = 0x3,
|
||||
Ldbs(Register, Register, Option<u32>) = 0x4,
|
||||
Ldh(Register, Register, Option<u32>) = 0x5,
|
||||
Ldhs(Register, Register, Option<u32>) = 0x6,
|
||||
Ldw(Register, Register, Option<u32>) = 0x7,
|
||||
|
||||
Stb(Register, Register, Option<u32>) = 0x8,
|
||||
Sth(Register, Register, Option<u32>) = 0x9,
|
||||
Stw(Register, Register, Option<u32>) = 0xA,
|
||||
|
||||
Lli(u16, Register) = 0xB,
|
||||
Lui(u16, Register) = 0xC,
|
||||
|
||||
// Jump Instructions
|
||||
Jump(u16, Register) = 0xD,
|
||||
JumpEq(u16, Register) = 0xE,
|
||||
JumpNeq(u16, Register) = 0xF,
|
||||
JumpGt(u16, Register) = 0x10,
|
||||
JumpGe(u16, Register) = 0x11,
|
||||
JumpLt(u16, Register) = 0x12,
|
||||
JumpLe(u16, Register) = 0x13,
|
||||
|
||||
// Comparison
|
||||
Compare(Register, Register) = 0x14,
|
||||
|
||||
// // Arithmetic
|
||||
// Add(args::RTypeArgs) = 0x19,
|
||||
// Sub(args::RTypeArgs) = 0x1A,
|
||||
// Increment(args::RTypeArgs) = 0x15,
|
||||
// Decrement(args::RTypeArgs) = 0x16,
|
||||
// ShiftLeft(args::RTypeArgs) = 0x17,
|
||||
// ShiftRight(args::RTypeArgs) = 0x18,
|
||||
|
||||
// // Logical
|
||||
// And(args::RTypeArgs) = 0x1B,
|
||||
// Or(args::RTypeArgs) = 0x1C,
|
||||
// Not(args::RTypeArgs) = 0x1D,
|
||||
// Xor(args::RTypeArgs) = 0x1E,
|
||||
// Nand(args::RTypeArgs) = 0x1F,
|
||||
// Nor(args::RTypeArgs) = 0x20,
|
||||
// Xnor(args::RTypeArgs) = 0x21,
|
||||
|
||||
// // Misc
|
||||
// Interrupt(Interrupt) = 0x22,
|
||||
// IntReturn = 0x23,
|
||||
// Halt = 0x24,
|
||||
|
||||
// // Immediate Arithmetic
|
||||
// AddImmediate(args::ITypeArgs) = 0x25,
|
||||
// SubImmediate(args::ITypeArgs) = 0x26,
|
||||
|
||||
// Fake Instructions
|
||||
Data(u32) = 0x3E,
|
||||
Segment(u32) = 0x3F,
|
||||
}
|
||||
@@ -1,599 +0,0 @@
|
||||
use std::collections::HashMap;
|
||||
use std::hash::Hash;
|
||||
use std::sync::LazyLock;
|
||||
use std::sync::atomic::AtomicU32;
|
||||
use std::time::SystemTime;
|
||||
|
||||
use chrono::{DateTime, Local};
|
||||
|
||||
use crate::registers::RegisterAllocator;
|
||||
use crate::{block, cmd, comment, dsa};
|
||||
|
||||
use crate::parser::{
|
||||
BinaryOperator, ConstExpr, Declaration, Expression, Parameter, Program, Statement,
|
||||
UnaryOperator,
|
||||
};
|
||||
|
||||
pub struct CodeGenerator {
|
||||
ast: Program,
|
||||
imports: HashMap<String, String>,
|
||||
globals: Vec<String>,
|
||||
functions: Vec<String>,
|
||||
symbols: Vec<String>,
|
||||
allocator: RegisterAllocator,
|
||||
}
|
||||
|
||||
static GLOBAL_METHODS: LazyLock<HashMap<&str, &str>> = LazyLock::new(|| {
|
||||
HashMap::from([("print", "print::print"), ("printnum", "print::print_num")])
|
||||
});
|
||||
|
||||
fn import(name: &str, path: &str) -> String {
|
||||
format!("include {name}: \"{}\"", path)
|
||||
}
|
||||
|
||||
impl CodeGenerator {
|
||||
const RET: &'static str = "\tjmp _ret";
|
||||
|
||||
pub fn new(ast: Program) -> Self {
|
||||
CodeGenerator {
|
||||
ast,
|
||||
imports: HashMap::new(),
|
||||
globals: Vec::new(),
|
||||
functions: Vec::new(),
|
||||
symbols: Vec::new(),
|
||||
allocator: RegisterAllocator::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn include(&mut self, name: &str, path: &str) {
|
||||
self.imports.insert(name.to_string(), path.to_string());
|
||||
}
|
||||
|
||||
pub fn generate(&mut self) -> Result<String, String> {
|
||||
// always include the print library for debugging!
|
||||
self.include("print", "./lib/io/print.dsa");
|
||||
|
||||
for block in self.ast.clone().declarations {
|
||||
match block {
|
||||
Declaration::Variable { name, .. } => self.symbols.push(name),
|
||||
Declaration::Function { name, .. } => self.symbols.push(name),
|
||||
Declaration::Import { name, .. } => self.symbols.push(name),
|
||||
}
|
||||
}
|
||||
|
||||
for block in self.ast.clone().declarations {
|
||||
self.generate_block(block.clone())?;
|
||||
}
|
||||
|
||||
self.generate_layout()
|
||||
}
|
||||
|
||||
fn generate_layout(&mut self) -> Result<String, String> {
|
||||
let datetime: DateTime<Local> = SystemTime::now().into();
|
||||
Ok(dsa![
|
||||
"",
|
||||
comment!("GENERATED BY DSA-C COMPILER"),
|
||||
comment!(format!(
|
||||
"Generated at {}",
|
||||
datetime.format("%Y-%m-%d %H:%M:%S")
|
||||
)),
|
||||
"",
|
||||
// imports
|
||||
comment!("Imports"),
|
||||
self.imports
|
||||
.iter()
|
||||
.map(|(k, v)| import(k, v))
|
||||
.collect::<Vec<String>>()
|
||||
.join("\n"),
|
||||
"",
|
||||
// reserved memory
|
||||
comment!("Globals & Reserved Memory"),
|
||||
self.globals.join("\n"),
|
||||
"",
|
||||
// entry point
|
||||
comment!("Entry Point"),
|
||||
"dw stack: 0x10000",
|
||||
"db message: \"Process Exited with code:\"",
|
||||
block! [ "_init"
|
||||
dsa![ldw stack, bpr],
|
||||
dsa![mov bpr, spr],
|
||||
dsa![push zero],
|
||||
dsa![call main],
|
||||
dsa![call print::print_newline],
|
||||
dsa![lwi message, rg0],
|
||||
dsa![push rg0],
|
||||
dsa![call print::print],
|
||||
dsa![pop zero],
|
||||
dsa![call print::print_hex_word],
|
||||
dsa![pop zero],
|
||||
dsa![hlt]
|
||||
],
|
||||
"",
|
||||
comment!("Function return boilerplate"),
|
||||
block! [ "_ret"
|
||||
dsa![mov bpr, spr],
|
||||
dsa![pop bpr],
|
||||
dsa![return]
|
||||
],
|
||||
// block! [ "main"
|
||||
// dsa![push bpr],
|
||||
// dsa![mov spr, bpr],
|
||||
// dsa![lwi 67, rg1],
|
||||
// dsa![stw rg1, spr, 8],
|
||||
// dsa![mov bpr, spr],
|
||||
// dsa![pop bpr],
|
||||
// dsa![return]
|
||||
// ],
|
||||
"",
|
||||
self.functions.join("\n"),
|
||||
])
|
||||
}
|
||||
|
||||
fn generate_global(&mut self, name: &str, init: Option<ConstExpr>) {
|
||||
self.globals.push(format!(
|
||||
"dw {}: {}",
|
||||
name,
|
||||
init.unwrap_or(ConstExpr::Number(0))
|
||||
))
|
||||
}
|
||||
|
||||
fn generate_block(&mut self, block: Declaration) -> Result<(), String> {
|
||||
match block {
|
||||
Declaration::Variable { name, init } => self.generate_global(&name, init),
|
||||
Declaration::Function {
|
||||
name,
|
||||
return_type,
|
||||
params,
|
||||
body,
|
||||
} => {
|
||||
let func = self.generate_function(&name, ¶ms, &body).join("\n");
|
||||
|
||||
self.functions.push(format!("{func}\n"));
|
||||
}
|
||||
Declaration::Import { name, path } => {
|
||||
self.imports.insert(name, path);
|
||||
}
|
||||
};
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Example: Generate code for a function
|
||||
fn generate_function(
|
||||
&mut self,
|
||||
name: &str,
|
||||
params: &[Parameter],
|
||||
body: &[Statement],
|
||||
) -> Vec<String> {
|
||||
let mut code = Vec::new();
|
||||
|
||||
// Reset allocator for new function
|
||||
self.allocator.reset();
|
||||
|
||||
// Function prologue
|
||||
code.push(format!("{}:", name));
|
||||
code.push("\tpush bpr".to_string());
|
||||
code.push("\tmov spr, bpr".to_string());
|
||||
code.push(String::new());
|
||||
|
||||
// Allocate parameters to registers or stack locations
|
||||
for (i, param) in params.iter().enumerate() {
|
||||
let offset = 8 + (i as i32 * 4); // Parameters start at bpr+8
|
||||
// Track that this parameter is at a stack location
|
||||
let (reg, load_code) = self.allocator.alloc_var(¶m.name).unwrap();
|
||||
code.extend(load_code);
|
||||
code.push(format!("\tldw bpr, {}, {}", reg, offset));
|
||||
}
|
||||
|
||||
// Generate code for function body
|
||||
for stmt in body {
|
||||
let stmt_code = self.generate_statement(stmt).unwrap();
|
||||
code.extend(stmt_code);
|
||||
}
|
||||
|
||||
// automatically return at function end
|
||||
if let Some(x) = code.last()
|
||||
&& x == Self::RET
|
||||
{
|
||||
} else {
|
||||
code.push(Self::RET.to_string());
|
||||
}
|
||||
|
||||
code
|
||||
}
|
||||
|
||||
// Example: Generate code for a statement
|
||||
fn generate_statement(&mut self, stmt: &Statement) -> Result<Vec<String>, String> {
|
||||
let mut code = Vec::new();
|
||||
|
||||
match stmt {
|
||||
Statement::Assign {
|
||||
name,
|
||||
declare_type,
|
||||
value,
|
||||
} => {
|
||||
if let Some(expr) = value {
|
||||
// Evaluate expression
|
||||
let (result_reg, expr_code) = self.generate_expression(expr)?;
|
||||
code.extend(expr_code);
|
||||
|
||||
// Store result in variable
|
||||
let store_code = self.allocator.store_var(name, &result_reg);
|
||||
code.extend(store_code);
|
||||
|
||||
// Free temporary register
|
||||
self.allocator.free_temp(&result_reg);
|
||||
} else {
|
||||
// Just declaring variable without initialization
|
||||
self.allocator.alloc_var(name)?;
|
||||
}
|
||||
}
|
||||
|
||||
Statement::Return { expr } => {
|
||||
if let Some(e) = expr {
|
||||
let (result_reg, expr_code) = self.generate_expression(e)?;
|
||||
code.extend(expr_code);
|
||||
code.push(format!("\tstw {}, bpr, 8", result_reg));
|
||||
code.push(format!("\tjmp _ret"));
|
||||
self.allocator.free_temp(&result_reg);
|
||||
}
|
||||
}
|
||||
|
||||
Statement::If {
|
||||
condition,
|
||||
then_stmt,
|
||||
else_stmt,
|
||||
} => {
|
||||
// Generate condition
|
||||
let (cond_reg, cond_code) = self.generate_expression(condition)?;
|
||||
code.extend(cond_code);
|
||||
|
||||
// Compare with zero
|
||||
code.push(format!("\tcmp {}, zero", cond_reg));
|
||||
self.allocator.free_temp(&cond_reg);
|
||||
|
||||
// Generate unique labels
|
||||
let then_label = format!("_then_{}", self.get_unique_label());
|
||||
let else_label = format!("_else_{}", self.get_unique_label());
|
||||
let end_label = format!("_end_{}", self.get_unique_label());
|
||||
|
||||
// Jump to else if condition is false (equal to zero)
|
||||
code.push(format!("\tjeq {}", else_label));
|
||||
|
||||
// Then block
|
||||
code.push(format!("{}:", then_label));
|
||||
for s in then_stmt {
|
||||
code.extend(self.generate_statement(s)?);
|
||||
}
|
||||
|
||||
if then_stmt.len() == 0 {
|
||||
code.push("\tnop".to_string());
|
||||
}
|
||||
|
||||
code.push(format!("\tjmp {}", end_label));
|
||||
|
||||
// Else block
|
||||
code.push(format!("{}:", else_label));
|
||||
for s in else_stmt {
|
||||
code.extend(self.generate_statement(s)?);
|
||||
}
|
||||
|
||||
if else_stmt.len() == 0 {
|
||||
code.push("\tnop".to_string());
|
||||
}
|
||||
|
||||
code.push(format!("{}:", end_label));
|
||||
}
|
||||
|
||||
Statement::While { condition, body } => {
|
||||
let loop_start = format!("_while_start_{}", self.get_unique_label());
|
||||
let loop_end = format!("_while_end_{}", self.get_unique_label());
|
||||
|
||||
code.push(format!("{}:", loop_start));
|
||||
|
||||
// Generate condition
|
||||
let (cond_reg, cond_code) = self.generate_expression(condition)?;
|
||||
code.extend(cond_code);
|
||||
|
||||
code.push(format!("\tcmp {}, zero", cond_reg));
|
||||
self.allocator.free_temp(&cond_reg);
|
||||
|
||||
code.push(format!("\tjeq {}", loop_end));
|
||||
|
||||
// Loop body
|
||||
for s in body {
|
||||
code.extend(self.generate_statement(s)?);
|
||||
}
|
||||
|
||||
code.push(format!("\tjmp {}", loop_start));
|
||||
code.push(format!("{}:", loop_end));
|
||||
}
|
||||
|
||||
Statement::Expression { expr } => {
|
||||
let (result_reg, expr_code) = self.generate_expression(expr)?;
|
||||
code.extend(expr_code);
|
||||
self.allocator.free_temp(&result_reg);
|
||||
}
|
||||
|
||||
Statement::Block(statements) => {
|
||||
for s in statements {
|
||||
code.extend(self.generate_statement(s)?);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(code)
|
||||
}
|
||||
|
||||
// Example: Generate code for an expression
|
||||
// Returns (register containing result, assembly code)
|
||||
fn generate_expression(
|
||||
&mut self,
|
||||
expr: &Expression,
|
||||
) -> Result<(String, Vec<String>), String> {
|
||||
let mut code = Vec::new();
|
||||
|
||||
match expr {
|
||||
Expression::Number { value } => {
|
||||
let (reg, alloc_code) = self.allocator.alloc_temp()?;
|
||||
code.extend(alloc_code);
|
||||
|
||||
// Load immediate value
|
||||
code.push(format!("\tlli {}, {}", value & 0xFFFF, reg));
|
||||
if *value > 0xFFFF || *value < 0 {
|
||||
code.push(format!("\tlui {}, {}", (value >> 16) & 0xFFFF, reg));
|
||||
}
|
||||
|
||||
Ok((reg, code))
|
||||
}
|
||||
|
||||
Expression::Variable { name, .. } => {
|
||||
let (reg, load_code) = self.allocator.load_var(name)?;
|
||||
code.extend(load_code);
|
||||
Ok((reg, code))
|
||||
}
|
||||
|
||||
Expression::Binary { op, left, right } => {
|
||||
// Evaluate left operand
|
||||
let (left_reg, left_code) = self.generate_expression(left)?;
|
||||
code.extend(left_code);
|
||||
|
||||
// Evaluate right operand
|
||||
let (right_reg, right_code) = self.generate_expression(right)?;
|
||||
code.extend(right_code);
|
||||
|
||||
// Allocate result register
|
||||
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
|
||||
code.extend(result_alloc);
|
||||
|
||||
// Generate operation
|
||||
match op {
|
||||
BinaryOperator::Add => {
|
||||
code.push(format!(
|
||||
"\tadd {}, {}, {}",
|
||||
left_reg, right_reg, result_reg
|
||||
));
|
||||
}
|
||||
BinaryOperator::Sub => {
|
||||
code.push(format!(
|
||||
"\tsub {}, {}, {}",
|
||||
left_reg, right_reg, result_reg
|
||||
));
|
||||
}
|
||||
BinaryOperator::Mul => {
|
||||
self.include("maths", "./lib/maths/core.dsa");
|
||||
// Call multiply function
|
||||
code.push(format!("\tpush {}", right_reg));
|
||||
code.push(format!("\tpush {}", left_reg));
|
||||
code.push("\tcall maths::multiply".to_string());
|
||||
code.push(format!("\tpop {}", result_reg));
|
||||
code.push("\tpop zero".to_string());
|
||||
}
|
||||
// Comparison operators - return 1 (true) or 0 (false)
|
||||
BinaryOperator::Eq => {
|
||||
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
|
||||
code.push(format!("\tlli 0, {}", result_reg));
|
||||
let end_label = format!("_cmp_end_{}", self.get_unique_label());
|
||||
code.push(format!("\tjne {}", end_label)); // If not equal, skip setting to 1
|
||||
code.push(format!("\tlli 1, {}", result_reg));
|
||||
code.push(format!("{}:", end_label));
|
||||
}
|
||||
BinaryOperator::Ne => {
|
||||
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
|
||||
code.push(format!("\tlli 0, {}", result_reg));
|
||||
let end_label = format!("_cmp_end_{}", self.get_unique_label());
|
||||
code.push(format!("\tjeq {}", end_label)); // If equal, skip setting to 1
|
||||
code.push(format!("\tlli 1, {}", result_reg));
|
||||
code.push(format!("{}:", end_label));
|
||||
}
|
||||
BinaryOperator::Lt => {
|
||||
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
|
||||
code.push(format!("\tlli 0, {}", result_reg));
|
||||
let end_label = format!("_cmp_end_{}", self.get_unique_label());
|
||||
code.push(format!("\tjge {}", end_label)); // If greater or equal, skip setting to 1
|
||||
code.push(format!("\tlli 1, {}", result_reg));
|
||||
code.push(format!("{}:", end_label));
|
||||
}
|
||||
BinaryOperator::Le => {
|
||||
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
|
||||
code.push(format!("\tlli 0, {}", result_reg));
|
||||
let end_label = format!("_cmp_end_{}", self.get_unique_label());
|
||||
code.push(format!("\tjgt {}", end_label)); // If greater than, skip setting to 1
|
||||
code.push(format!("\tlli 1, {}", result_reg));
|
||||
code.push(format!("{}:", end_label));
|
||||
}
|
||||
BinaryOperator::Gt => {
|
||||
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
|
||||
code.push(format!("\tlli 0, {}", result_reg));
|
||||
let end_label = format!("_cmp_end_{}", self.get_unique_label());
|
||||
code.push(format!("\tjle {}", end_label)); // If less or equal, skip setting to 1
|
||||
code.push(format!("\tlli 1, {}", result_reg));
|
||||
code.push(format!("{}:", end_label));
|
||||
}
|
||||
BinaryOperator::Ge => {
|
||||
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
|
||||
code.push(format!("\tlli 0, {}", result_reg));
|
||||
let end_label = format!("_cmp_end_{}", self.get_unique_label());
|
||||
code.push(format!("\tjlt {}", end_label)); // If less than, skip setting to 1
|
||||
code.push(format!("\tlli 1, {}", result_reg));
|
||||
code.push(format!("{}:", end_label));
|
||||
}
|
||||
_ => return Err(format!("Unsupported binary operator: {:?}", op)),
|
||||
}
|
||||
|
||||
// Free operand registers (allocator will protect variables)
|
||||
self.allocator.free_temp(&left_reg);
|
||||
self.allocator.free_temp(&right_reg);
|
||||
|
||||
Ok((result_reg, code))
|
||||
}
|
||||
|
||||
Expression::Call { name, args } => {
|
||||
// Save caller-saved registers and track which ones we saved
|
||||
let saved_regs = self.allocator.get_caller_saved_registers();
|
||||
for reg in &saved_regs {
|
||||
code.push(format!("\tpush {}", reg));
|
||||
}
|
||||
|
||||
// Evaluate and push arguments in reverse order
|
||||
let mut arg_regs = Vec::new();
|
||||
for arg in args.iter().rev() {
|
||||
let (arg_reg, arg_code) = self.generate_expression(arg)?;
|
||||
code.extend(arg_code);
|
||||
code.push(format!("\tpush {}", arg_reg));
|
||||
arg_regs.push(arg_reg);
|
||||
}
|
||||
|
||||
if GLOBAL_METHODS.contains_key(name.as_str()) {
|
||||
code.push(format!("\tcall {}", GLOBAL_METHODS[name.as_str()]));
|
||||
} else if self.symbols.contains(name) {
|
||||
// Call local function
|
||||
code.push(format!("\tcall {}", name));
|
||||
} else {
|
||||
return Err(format!("undefined function {name}"));
|
||||
}
|
||||
|
||||
// Result is in rg0, allocate a register and move it
|
||||
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
|
||||
|
||||
code.extend(result_alloc);
|
||||
code.push(format!("\tpop {}", result_reg));
|
||||
|
||||
// Clean up arguments
|
||||
if args.len() > 1 {
|
||||
for _ in 0..(args.len() - 1) {
|
||||
code.push("\tpop zero".to_string());
|
||||
}
|
||||
}
|
||||
|
||||
// Restore caller-saved registers in reverse order (LIFO)
|
||||
for reg in saved_regs.iter().rev() {
|
||||
code.push(format!("\tpop {}", reg));
|
||||
}
|
||||
|
||||
// Free argument registers
|
||||
for reg in arg_regs {
|
||||
self.allocator.free_temp(®);
|
||||
}
|
||||
|
||||
Ok((result_reg, code))
|
||||
}
|
||||
|
||||
Expression::Unary { op, operand } => {
|
||||
let (operand_reg, operand_code) = self.generate_expression(operand)?;
|
||||
code.extend(operand_code);
|
||||
|
||||
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
|
||||
code.extend(result_alloc);
|
||||
|
||||
match op {
|
||||
UnaryOperator::Minus => {
|
||||
// Negate: result = 0 - operand
|
||||
code.push(format!("\tsub zero, {}, {}", operand_reg, result_reg));
|
||||
}
|
||||
UnaryOperator::Plus => {
|
||||
// Just move
|
||||
code.push(format!("\tmov {}, {}", operand_reg, result_reg));
|
||||
}
|
||||
}
|
||||
|
||||
self.allocator.free_temp(&operand_reg);
|
||||
Ok((result_reg, code))
|
||||
}
|
||||
|
||||
Expression::Empty => Ok(("zero".to_string(), code)),
|
||||
}
|
||||
}
|
||||
|
||||
// Helper for generating unique labels
|
||||
fn get_unique_label(&mut self) -> String {
|
||||
// You'd implement a counter here
|
||||
static COUNTER: AtomicU32 = AtomicU32::new(0);
|
||||
|
||||
let val = COUNTER.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
|
||||
(val + 1).to_string()
|
||||
}
|
||||
}
|
||||
|
||||
/// Build a single string from any number of arguments.
|
||||
/// Each argument must implement `Display` or be convertible to a string.
|
||||
#[macro_export]
|
||||
macro_rules! dsa {
|
||||
($($arg:expr),* $(,)?) => {{
|
||||
// Start with an empty String – we’ll grow it as we go.
|
||||
use std::fmt::Write;
|
||||
let mut s = ::std::string::String::new();
|
||||
$(
|
||||
// `write!` is cheaper than `format!` for each element
|
||||
// because it re‑uses the same buffer.
|
||||
|
||||
write!(s, "{}\n", $arg).expect("write to String failed");
|
||||
)*
|
||||
s
|
||||
}};
|
||||
}
|
||||
|
||||
// ──────────────────────── dsa! ────────────────────────
|
||||
// A tiny helper that just turns its token‑stream into a string.
|
||||
// The trailing comma is kept – it’s part of the syntax you want.
|
||||
#[macro_export]
|
||||
macro_rules! cmd {
|
||||
($($tokens:tt)*) => {{
|
||||
// We’ll just stringify the tokens and return a String.
|
||||
format!("{}", concat!(stringify!($tokens), "\n"))
|
||||
}};
|
||||
}
|
||||
|
||||
// ──────────────────────── block! ────────────────────────
|
||||
// Usage:
|
||||
//
|
||||
// let asm = block![ "name"
|
||||
// dsa![mov rg0, rg1],
|
||||
// dsa![add rg1, rg1]
|
||||
// ];
|
||||
//
|
||||
// `asm` is a `&'static str` containing:
|
||||
//
|
||||
// name:
|
||||
// mov rg0, rg1
|
||||
// add rg1, rg1
|
||||
//
|
||||
#[macro_export]
|
||||
macro_rules! block {
|
||||
// The first token must be a string literal – that’s the label.
|
||||
($label:literal $(dsa![$($ins:tt)*]),* ) => {{
|
||||
// Build a single string at compile time.
|
||||
const CODE: &str = concat!(
|
||||
$label, ":\n",
|
||||
// Each `dsa!` call yields a string like `"mov rg0, rg1"`.
|
||||
// We add a newline after each one to get the desired layout.
|
||||
$(concat!("\t", stringify!($($ins)*), "\n")),*
|
||||
);
|
||||
CODE
|
||||
}};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! comment {
|
||||
($text:expr) => {{ format!("// {}", $text) }};
|
||||
}
|
||||
@@ -1,74 +0,0 @@
|
||||
use std::fmt;
|
||||
|
||||
use crate::{codegen::CodeGenerator, lexer::Lexer, parser::Parser};
|
||||
|
||||
// mod assembly;
|
||||
pub mod codegen;
|
||||
pub mod lexer;
|
||||
pub mod parser;
|
||||
mod registers;
|
||||
|
||||
// ============================================================================
|
||||
// Main & Tests
|
||||
// ============================================================================
|
||||
|
||||
fn main() {
|
||||
// read from input file: syntax "c_compiler <src.c> [output.dsa]"
|
||||
let args: Vec<String> = std::env::args().collect();
|
||||
if args.len() < 2 {
|
||||
eprintln!("Usage: c_compiler <src.c> [output.dsa]");
|
||||
return;
|
||||
}
|
||||
|
||||
let input_file = &args[1];
|
||||
let output_file = if args.len() > 2 {
|
||||
&args[2]
|
||||
} else {
|
||||
"output.dsa"
|
||||
};
|
||||
|
||||
// read input
|
||||
let input = std::fs::read_to_string(input_file).expect("Failed to read input file");
|
||||
|
||||
// Lexing
|
||||
let mut lexer = Lexer::new(&input);
|
||||
let tokens = match lexer.tokenize() {
|
||||
Ok(tokens) => tokens,
|
||||
Err(e) => {
|
||||
eprintln!("Lexing error: {}", e);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
println!("Tokens:");
|
||||
for token in &tokens {
|
||||
println!(" {:?}", token.token_type);
|
||||
}
|
||||
println!();
|
||||
|
||||
// Parsing
|
||||
let mut parser = Parser::new(tokens);
|
||||
let ast = match parser.parse() {
|
||||
Ok(ast) => ast,
|
||||
Err(e) => {
|
||||
eprintln!("Parsing error: {}", e);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
println!("AST:");
|
||||
println!("{:#?}", ast);
|
||||
|
||||
// Code Gen
|
||||
let mut generator = CodeGenerator::new(ast);
|
||||
let result = match generator.generate() {
|
||||
Ok(code) => code,
|
||||
Err(e) => {
|
||||
eprintln!("Parsing error: {}", e);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
std::fs::write(output_file, &result).expect("Failed to write output");
|
||||
println!("Result written to {}", output_file);
|
||||
}
|
||||
@@ -1,344 +0,0 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Register allocator for DSA assembly generation
|
||||
/// Manages general-purpose registers (rg0-rgf) and handles stack spilling
|
||||
pub struct RegisterAllocator {
|
||||
/// Available general-purpose registers
|
||||
available_registers: Vec<String>,
|
||||
|
||||
/// Maps variable names to their current location (register or stack offset)
|
||||
variable_locations: HashMap<String, Location>,
|
||||
|
||||
/// Maps registers to the variables they currently hold
|
||||
register_contents: HashMap<String, String>,
|
||||
|
||||
/// Current stack offset for local variables (relative to bpr)
|
||||
/// Starts at -4 (going downward from base pointer)
|
||||
stack_offset: i32,
|
||||
|
||||
/// Track which registers are currently in use
|
||||
in_use: HashMap<String, bool>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Location {
|
||||
Register(String),
|
||||
Stack(i32), // offset from bpr
|
||||
}
|
||||
|
||||
impl RegisterAllocator {
|
||||
pub fn new() -> Self {
|
||||
// Initialize with available GP registers (rg0-rgf = 16 registers)
|
||||
let registers = vec![
|
||||
"rg0", "rg1", "rg2", "rg3", "rg4", "rg5", "rg6", "rg7", "rg8", "rg9", "rga",
|
||||
"rgb", "rgc", "rgd", "rge", "rgf",
|
||||
]
|
||||
.into_iter()
|
||||
.map(String::from)
|
||||
.collect();
|
||||
|
||||
RegisterAllocator {
|
||||
available_registers: registers,
|
||||
variable_locations: HashMap::new(),
|
||||
register_contents: HashMap::new(),
|
||||
stack_offset: -4, // Start at -4 (first local below saved bpr)
|
||||
in_use: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Allocate a temporary register for expression evaluation
|
||||
/// Returns the register name and optionally assembly code to save it
|
||||
pub fn alloc_temp(&mut self) -> Result<(String, Vec<String>), String> {
|
||||
let mut code = Vec::new();
|
||||
|
||||
// Try to find an unused register
|
||||
for reg in &self.available_registers {
|
||||
if !self.in_use.get(reg).unwrap_or(&false) {
|
||||
self.in_use.insert(reg.clone(), true);
|
||||
return Ok((reg.clone(), code));
|
||||
}
|
||||
}
|
||||
|
||||
// All registers in use - need to spill one
|
||||
// Choose the first register with a variable we can spill
|
||||
// Find a register to spill
|
||||
let reg_to_spill = self
|
||||
.available_registers
|
||||
.iter()
|
||||
.find(|reg| self.register_contents.contains_key(*reg))
|
||||
.cloned();
|
||||
|
||||
if let Some(reg) = reg_to_spill {
|
||||
// Spill this variable to stack
|
||||
let spill_code = self.spill_register(®)?;
|
||||
code.extend(spill_code);
|
||||
|
||||
self.in_use.insert(reg.clone(), true);
|
||||
return Ok((reg, code));
|
||||
}
|
||||
|
||||
Err("No registers available and nothing to spill".to_string())
|
||||
}
|
||||
|
||||
/// Free a temporary register after use
|
||||
/// NOTE: This will NOT free registers that contain variables!
|
||||
/// Variables persist throughout their scope and must not be freed
|
||||
pub fn free_temp(&mut self, reg: &str) {
|
||||
// Check if this register contains a variable
|
||||
if self.register_contents.contains_key(reg) {
|
||||
// This register holds a variable - don't free it!
|
||||
// Variables are only freed when they go out of scope via free_var()
|
||||
return;
|
||||
}
|
||||
|
||||
// This is a true temporary - safe to free
|
||||
self.in_use.insert(reg.to_string(), false);
|
||||
}
|
||||
|
||||
/// Allocate a register for a named variable
|
||||
/// Returns the register and any necessary assembly code
|
||||
pub fn alloc_var(&mut self, var_name: &str) -> Result<(String, Vec<String>), String> {
|
||||
// Check if variable already has a location
|
||||
if let Some(location) = self.variable_locations.get(var_name).cloned() {
|
||||
match location {
|
||||
Location::Register(reg) => {
|
||||
return Ok((reg.clone(), Vec::new()));
|
||||
}
|
||||
Location::Stack(offset) => {
|
||||
// Variable is on stack, load it into a register
|
||||
let (reg, mut code) = self.alloc_temp()?;
|
||||
code.push(format!("\tldw bpr, {}, {}", reg, offset));
|
||||
|
||||
// Update location to register
|
||||
self.variable_locations
|
||||
.insert(var_name.to_string(), Location::Register(reg.clone()));
|
||||
self.register_contents
|
||||
.insert(reg.clone(), var_name.to_string());
|
||||
|
||||
return Ok((reg, code));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Variable doesn't have a location yet, allocate a new register
|
||||
let (reg, code) = self.alloc_temp()?;
|
||||
self.variable_locations
|
||||
.insert(var_name.to_string(), Location::Register(reg.clone()));
|
||||
self.register_contents
|
||||
.insert(reg.clone(), var_name.to_string());
|
||||
|
||||
Ok((reg, code))
|
||||
}
|
||||
|
||||
/// Get the current location of a variable
|
||||
pub fn get_var_location(&self, var_name: &str) -> Option<&Location> {
|
||||
self.variable_locations.get(var_name)
|
||||
}
|
||||
|
||||
/// Load a variable into a register (allocating if necessary)
|
||||
/// Returns the register and assembly code to load it
|
||||
pub fn load_var(&mut self, var_name: &str) -> Result<(String, Vec<String>), String> {
|
||||
self.alloc_var(var_name)
|
||||
}
|
||||
|
||||
/// Store a value from a register into a variable
|
||||
/// Updates tracking and returns any necessary assembly code
|
||||
pub fn store_var(&mut self, var_name: &str, source_reg: &str) -> Vec<String> {
|
||||
let mut code = Vec::new();
|
||||
|
||||
// Check if variable already has a location
|
||||
if let Some(location) = self.variable_locations.get(var_name) {
|
||||
match location {
|
||||
Location::Register(dest_reg) => {
|
||||
if dest_reg != source_reg {
|
||||
code.push(format!("\tmov {}, {}", source_reg, dest_reg));
|
||||
}
|
||||
}
|
||||
Location::Stack(offset) => {
|
||||
code.push(format!("\tstw {}, bpr, {}", source_reg, offset));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Variable doesn't exist yet - try to allocate a register
|
||||
if let Some(free_reg) = self.find_free_register() {
|
||||
if &free_reg != source_reg {
|
||||
code.push(format!("\tmov {}, {}", source_reg, free_reg));
|
||||
}
|
||||
self.variable_locations
|
||||
.insert(var_name.to_string(), Location::Register(free_reg.clone()));
|
||||
self.register_contents
|
||||
.insert(free_reg.clone(), var_name.to_string());
|
||||
self.in_use.insert(free_reg, true);
|
||||
} else {
|
||||
// No free registers - allocate on stack
|
||||
code.push(format!("\tstw {}, bpr, {}", source_reg, self.stack_offset));
|
||||
self.variable_locations
|
||||
.insert(var_name.to_string(), Location::Stack(self.stack_offset));
|
||||
self.stack_offset -= 4; // Move to next stack slot
|
||||
}
|
||||
}
|
||||
|
||||
code
|
||||
}
|
||||
|
||||
/// Spill a register to the stack
|
||||
/// Returns assembly code to perform the spill
|
||||
fn spill_register(&mut self, reg: &str) -> Result<Vec<String>, String> {
|
||||
let mut code = Vec::new();
|
||||
|
||||
if let Some(var_name) = self.register_contents.get(reg).cloned() {
|
||||
// Store register content to stack
|
||||
code.push(format!("\tstw {}, bpr, {}", reg, self.stack_offset));
|
||||
|
||||
// Update variable location
|
||||
self.variable_locations
|
||||
.insert(var_name.clone(), Location::Stack(self.stack_offset));
|
||||
|
||||
// Remove from register tracking
|
||||
self.register_contents.remove(reg);
|
||||
|
||||
// Move to next stack slot
|
||||
self.stack_offset -= 4;
|
||||
}
|
||||
|
||||
Ok(code)
|
||||
}
|
||||
|
||||
/// Find a free register (not currently in use)
|
||||
fn find_free_register(&self) -> Option<String> {
|
||||
for reg in &self.available_registers {
|
||||
if !self.in_use.get(reg).unwrap_or(&false) {
|
||||
return Some(reg.clone());
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Spill all registers to stack (useful before function calls)
|
||||
pub fn spill_all(&mut self) -> Vec<String> {
|
||||
let mut code = Vec::new();
|
||||
|
||||
let regs_to_spill: Vec<String> = self.register_contents.keys().cloned().collect();
|
||||
|
||||
for reg in regs_to_spill {
|
||||
if let Ok(spill_code) = self.spill_register(®) {
|
||||
code.extend(spill_code);
|
||||
}
|
||||
}
|
||||
|
||||
code
|
||||
}
|
||||
|
||||
/// Get the total stack space needed for local variables
|
||||
pub fn get_stack_size(&self) -> i32 {
|
||||
-self.stack_offset // Convert negative offset to positive size
|
||||
}
|
||||
|
||||
/// Reset allocator for a new function
|
||||
pub fn reset(&mut self) {
|
||||
self.variable_locations.clear();
|
||||
self.register_contents.clear();
|
||||
self.stack_offset = -4;
|
||||
self.in_use.clear();
|
||||
}
|
||||
|
||||
/// Mark a variable as dead (no longer needed)
|
||||
/// Frees its register if it's in one
|
||||
pub fn free_var(&mut self, var_name: &str) {
|
||||
if let Some(Location::Register(reg)) = self.variable_locations.get(var_name) {
|
||||
let reg = reg.clone();
|
||||
self.register_contents.remove(®);
|
||||
self.in_use.insert(reg, false);
|
||||
}
|
||||
self.variable_locations.remove(var_name);
|
||||
}
|
||||
|
||||
/// Get list of registers that contain variables and are in use
|
||||
/// These need to be saved before function calls
|
||||
pub fn get_caller_saved_registers(&self) -> Vec<String> {
|
||||
self.register_contents
|
||||
.iter()
|
||||
.filter(|(reg, _)| *self.in_use.get(*reg).unwrap_or(&false))
|
||||
.map(|(reg, _)| reg.clone())
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Save caller-saved registers before a function call
|
||||
/// Returns assembly code to save them
|
||||
pub fn save_caller_saved(&mut self) -> Vec<String> {
|
||||
let mut code = Vec::new();
|
||||
|
||||
// For simplicity, save all currently used registers
|
||||
// In a more sophisticated compiler, you'd only save registers that are live
|
||||
for (reg, var_name) in self.register_contents.clone() {
|
||||
if *self.in_use.get(®).unwrap_or(&false) {
|
||||
code.push(format!("\tpush {}", reg));
|
||||
}
|
||||
}
|
||||
|
||||
code
|
||||
}
|
||||
|
||||
/// Restore caller-saved registers after a function call
|
||||
/// Returns assembly code to restore them
|
||||
pub fn restore_caller_saved(&mut self, saved_regs: &[String]) -> Vec<String> {
|
||||
let mut code = Vec::new();
|
||||
|
||||
// Restore in reverse order (LIFO)
|
||||
for reg in saved_regs.iter().rev() {
|
||||
code.push(format!("\tpop {}", reg));
|
||||
}
|
||||
|
||||
code
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_basic_allocation() {
|
||||
let mut allocator = RegisterAllocator::new();
|
||||
|
||||
let (reg1, code1) = allocator.alloc_temp().unwrap();
|
||||
assert_eq!(code1.len(), 0); // No spill needed
|
||||
assert_eq!(reg1, "rg0");
|
||||
|
||||
let (reg2, code2) = allocator.alloc_temp().unwrap();
|
||||
assert_eq!(code2.len(), 0);
|
||||
assert_eq!(reg2, "rg1");
|
||||
|
||||
allocator.free_temp(®1);
|
||||
|
||||
let (reg3, code3) = allocator.alloc_temp().unwrap();
|
||||
assert_eq!(code3.len(), 0);
|
||||
assert_eq!(reg3, "rg0"); // Reuses freed register
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_variable_allocation() {
|
||||
let mut allocator = RegisterAllocator::new();
|
||||
|
||||
let (reg, _) = allocator.alloc_var("x").unwrap();
|
||||
assert_eq!(reg, "rg0");
|
||||
|
||||
// Requesting same variable again should return same register
|
||||
let (reg2, _) = allocator.alloc_var("x").unwrap();
|
||||
assert_eq!(reg2, "rg0");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stack_allocation() {
|
||||
let mut allocator = RegisterAllocator::new();
|
||||
|
||||
// Allocate all 16 registers
|
||||
for i in 0..16 {
|
||||
allocator.alloc_var(&format!("var{}", i)).unwrap();
|
||||
}
|
||||
|
||||
// Next allocation should spill to stack
|
||||
let (reg, code) = allocator.alloc_var("var16").unwrap();
|
||||
assert!(code.len() > 0); // Should have spill code
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
disallowed-types = ["std::collections::HashMap", "std::collections::HashSet"]
|
||||
@@ -40,7 +40,7 @@ pub enum InstructionType {
|
||||
Immediate,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, Default)]
|
||||
#[non_exhaustive]
|
||||
pub enum Register {
|
||||
// general purpose registers
|
||||
@@ -69,7 +69,9 @@ pub enum Register {
|
||||
Idr,
|
||||
Mmr,
|
||||
Zero,
|
||||
NoReg,
|
||||
|
||||
#[default]
|
||||
Null, // Invalid - Triggers a fault if accessed
|
||||
|
||||
// system registers - can't be written to by instructions.
|
||||
Mar,
|
||||
@@ -104,12 +106,6 @@ impl Register {
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Register {
|
||||
fn default() -> Self {
|
||||
Self::NoReg
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<u8> for Register {
|
||||
type Error = RegisterParseError;
|
||||
|
||||
@@ -144,7 +140,7 @@ impl TryFrom<u8> for Register {
|
||||
0x14 => Self::Idr,
|
||||
0x15 => Self::Mmr,
|
||||
0x16 => Self::Zero,
|
||||
0x17 => Self::NoReg,
|
||||
0x17 => Self::Null,
|
||||
0x18 => Self::Mar,
|
||||
0x19 => Self::Mdr,
|
||||
0x1A => Self::Sts,
|
||||
@@ -183,7 +179,7 @@ impl TryFrom<&str> for Register {
|
||||
"idr" => Ok(Self::Idr),
|
||||
"mmr" => Ok(Self::Mmr),
|
||||
"zero" => Ok(Self::Zero),
|
||||
"null" => Ok(Self::NoReg),
|
||||
"null" => Ok(Self::Null),
|
||||
"pcx" => Ok(Self::Pcx),
|
||||
_ => Err(RegisterParseError::InvalidName(value.to_string())),
|
||||
}
|
||||
@@ -216,7 +212,7 @@ impl std::fmt::Display for Register {
|
||||
Self::Idr => write!(f, "idr"),
|
||||
Self::Mmr => write!(f, "mmr"),
|
||||
Self::Zero => write!(f, "zero"),
|
||||
Self::NoReg => write!(f, "noreg"),
|
||||
Self::Null => write!(f, "null"),
|
||||
Self::Mar => write!(f, "mar"),
|
||||
Self::Mdr => write!(f, "mdr"),
|
||||
Self::Sts => write!(f, "sts"),
|
||||
|
||||
@@ -8,9 +8,9 @@ pub trait Encode {
|
||||
/// Encodes a zero argument instruction.
|
||||
fn encode_no_args(opcode: u8) -> u32 {
|
||||
let opcode = u32::from(opcode);
|
||||
let sr1 = Register::NoReg as u32;
|
||||
let sr2 = Register::NoReg as u32;
|
||||
let dr = Register::NoReg as u32;
|
||||
let sr1 = Register::Null as u32;
|
||||
let sr2 = Register::Null as u32;
|
||||
let dr = Register::Null as u32;
|
||||
let shamt = 0;
|
||||
|
||||
(opcode << 26) | (sr1 << 21) | (sr2 << 16) | (dr << 11) | (shamt << 6)
|
||||
|
||||
@@ -2,7 +2,7 @@ use crate::prelude::*;
|
||||
|
||||
#[test]
|
||||
fn test_encode_nop() {
|
||||
let no_reg = Register::NoReg as u32;
|
||||
let no_reg = Register::Null as u32;
|
||||
let no_op = u32::from(Instruction::Nop.opcode());
|
||||
|
||||
let expected = (no_op << 26) | (no_reg << 21) | (no_reg << 16) | (no_reg << 11);
|
||||
@@ -15,7 +15,7 @@ fn test_encode_nop() {
|
||||
fn test_encode_mov() {
|
||||
let rg0 = Register::Rg0 as u32;
|
||||
let rg1 = Register::Rg1 as u32;
|
||||
let no_reg = Register::NoReg as u32;
|
||||
let no_reg = Register::Null as u32;
|
||||
|
||||
let instruction = Instruction::Mov(RTypeArgs::new(
|
||||
Some(Register::Rg0),
|
||||
@@ -53,7 +53,7 @@ fn test_encode_load_byte() {
|
||||
#[test]
|
||||
fn test_encode_shift_left_shamt() {
|
||||
let rg0 = Register::Rg0 as u32;
|
||||
let no_reg = Register::NoReg as u32;
|
||||
let no_reg = Register::Null as u32;
|
||||
|
||||
let shift_amount = 5;
|
||||
|
||||
@@ -80,7 +80,7 @@ fn test_encode_shift_left_shamt() {
|
||||
fn test_encode_shift_left_reg() {
|
||||
let rg0 = Register::Rg0 as u32;
|
||||
let rg1 = Register::Rg1 as u32;
|
||||
let no_reg = Register::NoReg as u32;
|
||||
let no_reg = Register::Null as u32;
|
||||
|
||||
let instruction = Instruction::ShiftLeft(RTypeArgs::new(
|
||||
Some(Register::Rg0),
|
||||
|
||||
@@ -7,3 +7,4 @@ authors.workspace = true
|
||||
[dependencies]
|
||||
chrono = "0.4.43"
|
||||
common = { path = "../common" }
|
||||
uuid = { version = "1.20.0", features = ["v4"] }
|
||||
|
||||
@@ -0,0 +1,955 @@
|
||||
use std::collections::HashMap;
|
||||
use std::sync::atomic::AtomicU32;
|
||||
use std::time::SystemTime;
|
||||
|
||||
use chrono::{DateTime, Local};
|
||||
|
||||
use super::registers::RegisterAllocator;
|
||||
use crate::backend::dsa::instruction::{InsBlock as IB, Instruction as I, Label};
|
||||
use crate::backend::dsa::registers::Register;
|
||||
|
||||
use crate::model::{
|
||||
AssignmentOperator, BinaryOperator, Call, CompilerError, ConstExpr, Declaration,
|
||||
Dependency, Expression, Number, Program, Statement, TypeId, UnaryOperator, Variable,
|
||||
};
|
||||
|
||||
pub struct CodeGenerator {
|
||||
ast: Program,
|
||||
imports: HashMap<String, I>,
|
||||
globals: HashMap<String, I>,
|
||||
functions: Vec<IB>,
|
||||
symbols: Vec<String>,
|
||||
allocator: RegisterAllocator,
|
||||
}
|
||||
|
||||
impl CodeGenerator {
|
||||
pub fn new(ast: Program) -> Self {
|
||||
CodeGenerator {
|
||||
ast,
|
||||
imports: HashMap::new(),
|
||||
globals: HashMap::new(),
|
||||
functions: Vec::new(),
|
||||
symbols: Vec::new(),
|
||||
allocator: RegisterAllocator::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn include(&mut self, name: impl Into<String>, path: impl Into<String>) {
|
||||
let name = name.into();
|
||||
self.imports.insert(name.clone(), I::include(name, path));
|
||||
}
|
||||
|
||||
fn is_global(&self, name: &str) -> bool {
|
||||
// Check if this variable is in the globals list
|
||||
self.globals.contains_key(name)
|
||||
}
|
||||
|
||||
pub fn generate(&mut self) -> Result<String, CompilerError> {
|
||||
// always include the print library for debugging!
|
||||
self.include("print", "./lib/io/print.dsa");
|
||||
|
||||
for block in self.ast.clone().declarations {
|
||||
match block {
|
||||
Declaration::Variable {
|
||||
var: Variable { name, .. },
|
||||
..
|
||||
} => self.symbols.push(name),
|
||||
Declaration::Function { name, .. } => self.symbols.push(name),
|
||||
Declaration::Dependency(Dependency { name, .. }) => {
|
||||
self.symbols.push(name)
|
||||
}
|
||||
Declaration::Struct { .. } => {} /* we can't do any code generation for
|
||||
* a struct yet. we may need to later
|
||||
* once these become class-like
|
||||
* objects with implementations */
|
||||
}
|
||||
}
|
||||
|
||||
for block in self.ast.clone().declarations {
|
||||
self.generate_block(block.clone())?;
|
||||
}
|
||||
|
||||
let assembly = self.generate_layout()?;
|
||||
Ok(assembly
|
||||
.iter()
|
||||
.map(|i| i.to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n"))
|
||||
}
|
||||
|
||||
fn generate_layout(&mut self) -> Result<IB, CompilerError> {
|
||||
let datetime: DateTime<Local> = SystemTime::now().into();
|
||||
|
||||
let mut block = IB::new();
|
||||
|
||||
block.extend(vec![
|
||||
I::global_comment(format!(
|
||||
"GENERATED BY DSC COMPILER
|
||||
Generated at {}",
|
||||
datetime.format("%Y-%m-%d %H:%M:%S")
|
||||
)),
|
||||
I::Newline,
|
||||
I::global_comment("Imports"),
|
||||
]);
|
||||
|
||||
block.extend(self.imports.values().cloned().collect::<Vec<_>>());
|
||||
|
||||
block.extend(vec![
|
||||
I::Newline,
|
||||
I::global_comment("Globals & Reserved Memory"),
|
||||
]);
|
||||
|
||||
block.extend(self.globals.values().cloned().collect::<Vec<_>>());
|
||||
|
||||
block.extend(vec![
|
||||
I::Newline,
|
||||
I::global_comment("Entry Point"),
|
||||
I::db_word("stack", 0x10000),
|
||||
I::db_string("message", "Process Exited with code:"),
|
||||
// init function for stack setup.
|
||||
I::label("_init"),
|
||||
I::ldw_label("stack", Register::Bpr),
|
||||
I::mov(Register::Bpr, Register::Spr),
|
||||
I::push(Register::Zero),
|
||||
I::call("main"),
|
||||
I::call("print::print_newline"),
|
||||
I::lwi_label("message", Register::Rg0),
|
||||
I::push(Register::Rg0),
|
||||
I::call("print::print"),
|
||||
I::pop(Register::Zero),
|
||||
I::call("print::print_hex_word"),
|
||||
I::pop(Register::Zero),
|
||||
I::Hlt,
|
||||
I::Newline,
|
||||
// default return block boilerplate
|
||||
I::global_comment("Return"),
|
||||
I::label("_ret"),
|
||||
I::mov(Register::Bpr, Register::Spr),
|
||||
I::pop(Register::Bpr),
|
||||
I::Return,
|
||||
]);
|
||||
|
||||
for function in self.functions.iter() {
|
||||
block.extend(function.iter().cloned());
|
||||
}
|
||||
|
||||
Ok(block)
|
||||
}
|
||||
|
||||
fn generate_global(&mut self, name: &str, init: Option<ConstExpr>) {
|
||||
let init = init.unwrap_or(ConstExpr::Number(0));
|
||||
match init {
|
||||
ConstExpr::Number(value) => {
|
||||
self.globals
|
||||
.insert(name.to_string(), I::db_word(name, value as u32));
|
||||
}
|
||||
ConstExpr::String(str) => {
|
||||
self.globals
|
||||
.insert(name.to_string(), I::db_string(name, str));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn generate_block(&mut self, block: Declaration) -> Result<(), CompilerError> {
|
||||
match block {
|
||||
Declaration::Variable { var, init, .. } => {
|
||||
self.generate_global(&var.name, init)
|
||||
}
|
||||
Declaration::Function {
|
||||
name,
|
||||
params,
|
||||
body,
|
||||
return_type,
|
||||
} => {
|
||||
let func = self.generate_function(&name, ¶ms, &body, return_type);
|
||||
self.functions.push(func);
|
||||
}
|
||||
Declaration::Dependency(Dependency { name, path }) => {
|
||||
self.include(name, path);
|
||||
}
|
||||
Declaration::Struct { .. } => {} /* can't do any codegen for these yet,
|
||||
* they're just types. */
|
||||
};
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Example: Generate code for a function
|
||||
fn generate_function(
|
||||
&mut self,
|
||||
name: &str,
|
||||
params: &[Variable],
|
||||
body: &[Statement],
|
||||
return_type: TypeId,
|
||||
) -> IB {
|
||||
let mut code = IB::new();
|
||||
|
||||
// Reset allocator for new function
|
||||
self.allocator.reset();
|
||||
|
||||
let fmtparams = params
|
||||
.iter()
|
||||
.map(|p| format!("{}: {}", p.name, p.type_id))
|
||||
.collect::<Vec<String>>()
|
||||
.join(", ");
|
||||
|
||||
code.extend(vec![
|
||||
I::global_comment(format!("fn {name}({fmtparams}) -> {return_type}")),
|
||||
I::label(name),
|
||||
I::push(Register::Bpr),
|
||||
I::mov(Register::Spr, Register::Bpr),
|
||||
]);
|
||||
|
||||
// Allocate parameters to registers or stack locations
|
||||
for (i, param) in params.iter().enumerate() {
|
||||
let offset = 8 + (i as i32 * 4); // Parameters start at bpr+8
|
||||
// Track that this parameter is at a stack location
|
||||
let (reg, load_code) = self.allocator.alloc_var(¶m.name).unwrap();
|
||||
code.append(load_code);
|
||||
code.push(I::ldw_reg_offset(Register::Bpr, reg, offset));
|
||||
}
|
||||
|
||||
// Generate code for function body
|
||||
for stmt in body {
|
||||
let stmt_code = self.generate_statement(stmt, &mut code).unwrap();
|
||||
code.append(stmt_code);
|
||||
}
|
||||
|
||||
// automatically return at function end
|
||||
if let Some(x) = code.iter().last()
|
||||
&& let I::Jmp { target: Label(val) } = x
|
||||
&& val == "_ret"
|
||||
{
|
||||
} else {
|
||||
code.push(I::jmp("_ret"));
|
||||
}
|
||||
|
||||
code.insert(0, I::Newline);
|
||||
|
||||
code
|
||||
}
|
||||
|
||||
// Example: Generate code for a statement
|
||||
fn generate_statement(
|
||||
&mut self,
|
||||
stmt: &Statement,
|
||||
func_body: &mut IB,
|
||||
) -> Result<IB, CompilerError> {
|
||||
let mut code = IB::new();
|
||||
|
||||
match stmt {
|
||||
Statement::Declaration { var, value } => {
|
||||
if let Some(expr) = value {
|
||||
// Evaluate expression
|
||||
let (result_reg, expr_code) =
|
||||
self.generate_expression(expr, true, func_body)?;
|
||||
code.append(expr_code);
|
||||
|
||||
// Store result in variable
|
||||
let store_code = self.allocator.store_var(&var.name, &result_reg);
|
||||
code.append(store_code);
|
||||
|
||||
// Free temporary register
|
||||
self.allocator.free_temp(result_reg);
|
||||
} else {
|
||||
// Just declaring variable without initialization
|
||||
self.allocator.alloc_var(&var.name)?;
|
||||
}
|
||||
}
|
||||
|
||||
Statement::Break => unimplemented!("need scope tracking first!"),
|
||||
Statement::Continue => unimplemented!("need scope tracking first!"),
|
||||
Statement::Defer(_func) => unimplemented!("we need scope tracking first!"),
|
||||
|
||||
Statement::PtrWrite { ptr, value } => {
|
||||
let (result_reg, expr_code) =
|
||||
self.generate_expression(value, true, func_body)?;
|
||||
code.append(expr_code);
|
||||
|
||||
let (ptr_reg, ptr_code) =
|
||||
self.generate_expression(ptr, true, func_body)?;
|
||||
code.append(ptr_code);
|
||||
|
||||
code.push(I::stw_reg(result_reg, ptr_reg));
|
||||
|
||||
self.allocator.free_temp(result_reg);
|
||||
self.allocator.free_temp(ptr_reg);
|
||||
}
|
||||
|
||||
Statement::Assign {
|
||||
varname,
|
||||
value,
|
||||
operator,
|
||||
} => {
|
||||
// Evaluate expression
|
||||
let (result_reg, expr_code) =
|
||||
self.generate_expression(value, true, func_body)?;
|
||||
code.append(expr_code);
|
||||
|
||||
if *operator == AssignmentOperator::Assign {
|
||||
// Check if this is a global variable
|
||||
if self.is_global(varname) {
|
||||
// Store to global label
|
||||
code.push(I::stw_label(result_reg, varname.clone()))
|
||||
} else {
|
||||
// Store result in local variable
|
||||
let store_code = self.allocator.store_var(varname, &result_reg);
|
||||
code.append(store_code);
|
||||
}
|
||||
|
||||
// Free temporary register
|
||||
self.allocator.free_temp(result_reg);
|
||||
|
||||
return Ok(code);
|
||||
}
|
||||
|
||||
// for more complex assignment cases we need an intermediate register.
|
||||
let (temp_reg, temp_code) = self.allocator.alloc_temp()?;
|
||||
code.append(temp_code);
|
||||
|
||||
// fetch the value of the variable
|
||||
let var_reg = if self.is_global(varname) {
|
||||
let instruction = I::ldw_label(varname.clone(), temp_reg);
|
||||
code.push(instruction);
|
||||
temp_reg
|
||||
} else {
|
||||
let (rg, block) = self.allocator.load_var(varname)?;
|
||||
code.append(block);
|
||||
rg
|
||||
};
|
||||
|
||||
let assign_code = match operator {
|
||||
AssignmentOperator::Assign => {
|
||||
unreachable!("assignment was already checked earlier.")
|
||||
}
|
||||
AssignmentOperator::AddAssign => {
|
||||
I::add(var_reg, result_reg, temp_reg)
|
||||
}
|
||||
AssignmentOperator::SubAssign => {
|
||||
I::sub(var_reg, result_reg, temp_reg)
|
||||
}
|
||||
AssignmentOperator::MulAssign => {
|
||||
return Err(CompilerError::Unimplemented(
|
||||
"TODO: implement multiplication for assignment".to_string(),
|
||||
));
|
||||
}
|
||||
AssignmentOperator::DivAssign => {
|
||||
return Err(CompilerError::Unimplemented(
|
||||
"TODO: write proper div function for DSA".to_string(),
|
||||
));
|
||||
}
|
||||
AssignmentOperator::ModAssign => {
|
||||
return Err(CompilerError::Unimplemented(
|
||||
"TODO: write proper mod function for DSA".to_string(),
|
||||
));
|
||||
}
|
||||
AssignmentOperator::AndAssign => {
|
||||
I::and(var_reg, result_reg, temp_reg)
|
||||
}
|
||||
AssignmentOperator::OrAssign => I::or(var_reg, result_reg, temp_reg),
|
||||
AssignmentOperator::XorAssign => {
|
||||
I::xor(var_reg, result_reg, temp_reg)
|
||||
}
|
||||
AssignmentOperator::LeftShiftAssign => {
|
||||
// this is only useful if we optimise out the register allocation
|
||||
// inside value.
|
||||
// if let Expression::Number { value, .. } = *value {
|
||||
// I::shl(var_reg, value, temp_reg)
|
||||
// }
|
||||
I::shl(var_reg, result_reg, 0, temp_reg)
|
||||
}
|
||||
AssignmentOperator::RightShiftAssign => {
|
||||
// this is only useful if we optimise out the register allocation
|
||||
// if let Expression::Number { value, .. } = *value {
|
||||
// I::shr(var_reg, value, temp_reg)
|
||||
// }
|
||||
I::shr(var_reg, result_reg, 0, temp_reg)
|
||||
}
|
||||
};
|
||||
code.push(assign_code);
|
||||
|
||||
// Check if this is a global variable
|
||||
if self.is_global(varname) {
|
||||
// Store to global label
|
||||
code.push(I::stw_label(temp_reg, varname.clone()))
|
||||
} else {
|
||||
// Store result in local variable
|
||||
let store_code = self.allocator.store_var(varname, &temp_reg);
|
||||
code.append(store_code);
|
||||
}
|
||||
|
||||
self.allocator.free_temp(result_reg);
|
||||
self.allocator.free_temp(temp_reg);
|
||||
}
|
||||
|
||||
Statement::Return(expr) => {
|
||||
if let Some(e) = expr {
|
||||
let (result_reg, expr_code) =
|
||||
self.generate_expression(e, true, func_body)?;
|
||||
code.append(expr_code);
|
||||
code.push(I::stw_reg_offset(result_reg, Register::Bpr, 8));
|
||||
code.push(I::jmp("_ret"));
|
||||
self.allocator.free_temp(result_reg);
|
||||
}
|
||||
}
|
||||
|
||||
Statement::If {
|
||||
condition,
|
||||
then_stmt,
|
||||
else_stmt,
|
||||
} => {
|
||||
// Generate condition
|
||||
let (cond_reg, cond_code) =
|
||||
self.generate_expression(condition, true, func_body)?;
|
||||
code.append(cond_code);
|
||||
|
||||
// Compare with zero
|
||||
code.push(I::cmp(cond_reg, Register::Zero));
|
||||
self.allocator.free_temp(cond_reg);
|
||||
|
||||
// Generate unique labels
|
||||
let then_label = format!("_then_{}", self.get_unique_label());
|
||||
let else_label = format!("_else_{}", self.get_unique_label());
|
||||
let end_label = format!("_end_{}", self.get_unique_label());
|
||||
|
||||
// Jump to else if condition is false (equal to zero)
|
||||
code.push(I::jeq(else_label.clone()));
|
||||
|
||||
// Then block
|
||||
code.push(I::label(then_label));
|
||||
for s in then_stmt {
|
||||
code.append(self.generate_statement(s, func_body)?);
|
||||
}
|
||||
|
||||
if then_stmt.is_empty() {
|
||||
code.push(I::Nop);
|
||||
}
|
||||
|
||||
code.push(I::jmp(end_label.clone()));
|
||||
|
||||
// Else block
|
||||
code.push(I::label(else_label));
|
||||
for s in else_stmt {
|
||||
code.append(self.generate_statement(s, func_body)?);
|
||||
}
|
||||
|
||||
if else_stmt.is_empty() {
|
||||
code.push(I::Nop);
|
||||
}
|
||||
|
||||
code.push(I::label(end_label));
|
||||
}
|
||||
|
||||
Statement::While { condition, body } => {
|
||||
let loop_start = format!("_while_start_{}", self.get_unique_label());
|
||||
let loop_end = format!("_while_end_{}", self.get_unique_label());
|
||||
|
||||
code.push(I::label(&loop_start));
|
||||
|
||||
// Generate condition
|
||||
let (cond_reg, cond_code) =
|
||||
self.generate_expression(condition, true, func_body)?;
|
||||
code.append(cond_code);
|
||||
|
||||
code.push(I::cmp(cond_reg, Register::Zero));
|
||||
self.allocator.free_temp(cond_reg);
|
||||
|
||||
code.push(I::jeq(loop_end.clone()));
|
||||
|
||||
// Loop body
|
||||
for s in body {
|
||||
code.append(self.generate_statement(s, func_body)?);
|
||||
}
|
||||
|
||||
code.push(I::jmp(loop_start));
|
||||
code.push(I::label(loop_end));
|
||||
}
|
||||
|
||||
Statement::Loop(body) => {
|
||||
let loop_start = format!("_loop_start_{}", self.get_unique_label());
|
||||
code.push(I::label(&loop_start));
|
||||
|
||||
for s in body {
|
||||
code.append(self.generate_statement(s, func_body)?);
|
||||
}
|
||||
|
||||
code.push(I::jmp(loop_start));
|
||||
}
|
||||
|
||||
Statement::Expression { expr } => {
|
||||
let (result_reg, expr_code) =
|
||||
self.generate_expression(expr, false, func_body)?;
|
||||
code.append(expr_code);
|
||||
self.allocator.free_temp(result_reg);
|
||||
}
|
||||
|
||||
Statement::Block(statements) => {
|
||||
for s in statements {
|
||||
code.append(self.generate_statement(s, func_body)?);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(code)
|
||||
}
|
||||
|
||||
// Example: Generate code for an expression
|
||||
// Returns (register containing result, assembly code)
|
||||
fn generate_expression(
|
||||
&mut self,
|
||||
expr: &Expression,
|
||||
use_result: bool,
|
||||
func_body: &mut IB,
|
||||
) -> Result<(Register, IB), CompilerError> {
|
||||
let mut code = IB::new();
|
||||
|
||||
match expr {
|
||||
Expression::Empty => Ok((Register::Null, code)),
|
||||
|
||||
Expression::Number(n) => match n {
|
||||
Number::Signed(value, _) => {
|
||||
let (reg, alloc_code) = self.allocator.alloc_temp()?;
|
||||
code.append(alloc_code);
|
||||
|
||||
// Load immediate value
|
||||
code.push(I::lwi(*value as u32, reg));
|
||||
|
||||
Ok((reg, code))
|
||||
}
|
||||
Number::Unsigned(value, _) => {
|
||||
let (reg, alloc_code) = self.allocator.alloc_temp()?;
|
||||
code.append(alloc_code);
|
||||
|
||||
// Load immediate value
|
||||
code.push(I::lwi(*value as u32, reg));
|
||||
|
||||
Ok((reg, code))
|
||||
}
|
||||
},
|
||||
|
||||
Expression::CharLiteral(value) => {
|
||||
let (reg, alloc_code) = self.allocator.alloc_temp()?;
|
||||
code.append(alloc_code);
|
||||
|
||||
// Load immediate value
|
||||
code.push(I::comment(format!("char literal '{value}'")));
|
||||
code.push(I::lwi(*value as u32, reg));
|
||||
|
||||
Ok((reg, code))
|
||||
}
|
||||
|
||||
Expression::StringLiteral(value) => {
|
||||
let (reg, alloc_code) = self.allocator.alloc_temp()?;
|
||||
code.append(alloc_code);
|
||||
|
||||
// write string into memory
|
||||
let uuid = self.get_unique_label();
|
||||
|
||||
func_body.insert(0, I::db_string(format!("str_{uuid}"), value));
|
||||
|
||||
// Load pointer to string
|
||||
code.push(I::lwi_label(format!("str_{uuid}"), reg));
|
||||
|
||||
Ok((reg, code))
|
||||
}
|
||||
|
||||
Expression::ArrayLiteral { elements, type_id } => todo!(),
|
||||
Expression::StructLiteral {
|
||||
name,
|
||||
fields,
|
||||
type_id,
|
||||
} => todo!(),
|
||||
|
||||
Expression::Variable { name, .. } => {
|
||||
if self.is_global(&name.name) {
|
||||
// Allocate a temporary register for the global
|
||||
let (reg, alloc_code) = self.allocator.alloc_temp()?;
|
||||
code.append(alloc_code);
|
||||
|
||||
// Load from global label
|
||||
code.push(I::ldw_label(name.name.clone(), reg));
|
||||
|
||||
Ok((reg, code))
|
||||
} else {
|
||||
// Local variable - use existing allocator logic
|
||||
let (reg, load_code) = self.allocator.load_var(&name.name)?;
|
||||
code.append(load_code);
|
||||
Ok((reg, code))
|
||||
}
|
||||
}
|
||||
|
||||
Expression::Binary {
|
||||
op, left, right, ..
|
||||
} => {
|
||||
// Evaluate left operand
|
||||
let (left_reg, left_code) =
|
||||
self.generate_expression(left, true, func_body)?;
|
||||
code.append(left_code);
|
||||
|
||||
// Evaluate right operand
|
||||
let (right_reg, right_code) =
|
||||
self.generate_expression(right, true, func_body)?;
|
||||
code.append(right_code);
|
||||
|
||||
// Allocate result register
|
||||
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
|
||||
code.append(result_alloc);
|
||||
|
||||
// Generate operation
|
||||
match op {
|
||||
BinaryOperator::Add => {
|
||||
code.push(I::add(left_reg, right_reg, result_reg));
|
||||
}
|
||||
BinaryOperator::Sub => {
|
||||
code.push(I::sub(left_reg, right_reg, result_reg));
|
||||
}
|
||||
BinaryOperator::Mul => {
|
||||
self.include("maths", "./lib/maths/core.dsa");
|
||||
// Call multiply function
|
||||
code.push(I::push(right_reg));
|
||||
code.push(I::push(left_reg));
|
||||
code.push(I::call("maths::multiply"));
|
||||
code.push(I::pop(result_reg));
|
||||
code.push(I::pop(Register::Zero));
|
||||
}
|
||||
BinaryOperator::Div => {
|
||||
return Err(CompilerError::Unimplemented(
|
||||
"TODO: write proper div function for DSA".to_string(),
|
||||
));
|
||||
// self.include("maths", "./lib/maths/core.dsa");
|
||||
// // Call divide function
|
||||
// code.push(format!("\tpush {}", right_reg));
|
||||
// code.push(format!("\tpush {}", left_reg));
|
||||
// code.push("\tcall maths::divide".to_string());
|
||||
// code.push(format!("\tpop {}", result_reg));
|
||||
// code.push("\tpop zero".to_string());
|
||||
}
|
||||
BinaryOperator::Mod => {
|
||||
return Err(CompilerError::Unimplemented(
|
||||
"TODO: write proper mod function for DSA".to_string(),
|
||||
));
|
||||
// self.include("maths", "./lib/maths/core.dsa");
|
||||
// // Call modulo function
|
||||
// code.push(format!("\tpush {}", right_reg));
|
||||
// code.push(format!("\tpush {}", left_reg));
|
||||
// code.push("\tcall maths::modulo".to_string());
|
||||
// code.push(format!("\tpop {}", result_reg));
|
||||
// code.push("\tpop zero".to_string());
|
||||
}
|
||||
BinaryOperator::BitwiseAnd => {
|
||||
code.push(I::and(left_reg, right_reg, result_reg));
|
||||
}
|
||||
BinaryOperator::BitwiseOr => {
|
||||
code.push(I::or(left_reg, right_reg, result_reg));
|
||||
}
|
||||
BinaryOperator::BitwiseXor => {
|
||||
code.push(I::xor(left_reg, right_reg, result_reg));
|
||||
}
|
||||
BinaryOperator::LogicalAnd => {
|
||||
return Err(CompilerError::Unimplemented(
|
||||
"assembler/ISA does not yet support logical and!".to_string(),
|
||||
));
|
||||
}
|
||||
BinaryOperator::LogicalOr => {
|
||||
return Err(CompilerError::Unimplemented(
|
||||
"assembler/ISA does not yet support logical or!".to_string(),
|
||||
));
|
||||
}
|
||||
BinaryOperator::LeftShift => {
|
||||
code.push(I::shl(left_reg, right_reg, 0, result_reg));
|
||||
}
|
||||
BinaryOperator::RightShift => {
|
||||
code.push(I::shr(left_reg, right_reg, 0, result_reg));
|
||||
}
|
||||
// Comparison operators - return 1 (true) or 0 (false)
|
||||
BinaryOperator::Equal => {
|
||||
code.push(I::cmp(left_reg, right_reg));
|
||||
code.push(I::lwi(1, result_reg));
|
||||
let end_label = format!("_cmp_end_{}", self.get_unique_label());
|
||||
code.push(I::jeq(end_label.clone()));
|
||||
code.push(I::lwi(0, result_reg));
|
||||
code.push(I::label(end_label));
|
||||
}
|
||||
BinaryOperator::NotEqual => {
|
||||
code.push(I::cmp(left_reg, right_reg));
|
||||
code.push(I::lwi(1, result_reg));
|
||||
let end_label = format!("_cmp_end_{}", self.get_unique_label());
|
||||
code.push(I::Jne {
|
||||
target: Label(end_label.clone()),
|
||||
});
|
||||
code.push(I::lwi(0, result_reg));
|
||||
code.push(I::label(&end_label));
|
||||
}
|
||||
BinaryOperator::LessThan => {
|
||||
code.push(I::cmp(left_reg, right_reg));
|
||||
code.push(I::lwi(1, result_reg));
|
||||
let end_label = format!("_cmp_end_{}", self.get_unique_label());
|
||||
code.push(I::Jlt {
|
||||
target: Label(end_label.clone()),
|
||||
});
|
||||
code.push(I::lwi(0, result_reg));
|
||||
code.push(I::label(&end_label));
|
||||
}
|
||||
BinaryOperator::LessOrEqual => {
|
||||
code.push(I::cmp(left_reg, right_reg));
|
||||
code.push(I::lwi(1, result_reg));
|
||||
let end_label = format!("_cmp_end_{}", self.get_unique_label());
|
||||
code.push(I::Jle {
|
||||
target: Label(end_label.clone()),
|
||||
});
|
||||
code.push(I::lwi(0, result_reg));
|
||||
code.push(I::label(&end_label));
|
||||
}
|
||||
BinaryOperator::GreaterThan => {
|
||||
code.push(I::cmp(left_reg, right_reg));
|
||||
code.push(I::lwi(1, result_reg));
|
||||
let end_label = format!("_cmp_end_{}", self.get_unique_label());
|
||||
code.push(I::Jgt {
|
||||
target: Label(end_label.clone()),
|
||||
});
|
||||
code.push(I::lwi(0, result_reg));
|
||||
code.push(I::label(&end_label));
|
||||
}
|
||||
BinaryOperator::GreaterOrEqual => {
|
||||
code.push(I::cmp(left_reg, right_reg));
|
||||
code.push(I::lwi(1, result_reg));
|
||||
let end_label = format!("_cmp_end_{}", self.get_unique_label());
|
||||
code.push(I::Jge {
|
||||
target: Label(end_label.clone()),
|
||||
});
|
||||
code.push(I::lwi(0, result_reg));
|
||||
code.push(I::label(&end_label));
|
||||
} // _ => unimplemented!(),
|
||||
}
|
||||
|
||||
// Free operand registers (allocator will protect variables)
|
||||
self.allocator.free_temp(left_reg);
|
||||
self.allocator.free_temp(right_reg);
|
||||
|
||||
Ok((result_reg, code))
|
||||
}
|
||||
|
||||
Expression::UnaryPostfix { op, operand, .. } => {
|
||||
let (operand_reg, operand_code) =
|
||||
self.generate_expression(operand, true, func_body)?;
|
||||
code.append(operand_code);
|
||||
|
||||
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
|
||||
code.append(result_alloc);
|
||||
|
||||
match op {
|
||||
UnaryOperator::Increment => {
|
||||
// postfix increment - return old value
|
||||
code.push(I::mov(operand_reg, result_reg));
|
||||
}
|
||||
UnaryOperator::Decrement => {
|
||||
// postfix decrement - return old value
|
||||
code.push(I::mov(operand_reg, result_reg));
|
||||
}
|
||||
_ => {
|
||||
return Err(CompilerError::Generic(format!(
|
||||
"{op} is prefix only!"
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
self.allocator.free_temp(operand_reg);
|
||||
Ok((result_reg, code))
|
||||
}
|
||||
|
||||
Expression::Unary { op, operand, .. } => {
|
||||
let (operand_reg, operand_code) =
|
||||
self.generate_expression(operand, true, func_body)?;
|
||||
code.append(operand_code);
|
||||
|
||||
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
|
||||
code.append(result_alloc);
|
||||
|
||||
match op {
|
||||
UnaryOperator::Minus => {
|
||||
// Negate: result = 0 - operand
|
||||
code.push(I::sub(Register::Zero, operand_reg, result_reg));
|
||||
}
|
||||
UnaryOperator::Plus => {
|
||||
// Just move
|
||||
code.push(I::mov(operand_reg, result_reg));
|
||||
}
|
||||
UnaryOperator::Dereference => {
|
||||
code.push(I::ldw_reg(operand_reg, result_reg));
|
||||
}
|
||||
UnaryOperator::AddressOf => {
|
||||
// ensure the referenced variable is on the stack and return its
|
||||
// address.
|
||||
let (offset, alloc_code) =
|
||||
self.allocator.free_register(&operand_reg)?;
|
||||
code.push(alloc_code);
|
||||
code.push(I::iadd_dest(
|
||||
Register::Spr,
|
||||
offset - self.allocator.get_stack_offset(),
|
||||
result_reg,
|
||||
));
|
||||
}
|
||||
UnaryOperator::SizeOf => {
|
||||
if let Ok(id) = operand.type_id() {
|
||||
let size = id.size();
|
||||
code.push(I::lwi(size as u32, result_reg));
|
||||
}
|
||||
}
|
||||
UnaryOperator::Increment => {
|
||||
// prefix increment
|
||||
code.push(I::mov(operand_reg, result_reg));
|
||||
code.push(I::iadd_dest(operand_reg, 1, result_reg));
|
||||
}
|
||||
UnaryOperator::Decrement => {
|
||||
// prefix decrement
|
||||
code.push(I::mov(operand_reg, result_reg));
|
||||
code.push(I::iadd_dest(operand_reg, -1, result_reg));
|
||||
}
|
||||
UnaryOperator::BitwiseNot => {
|
||||
code.push(I::not(operand_reg, result_reg));
|
||||
}
|
||||
UnaryOperator::LogicalNot => {
|
||||
return Err(CompilerError::Unimplemented(
|
||||
"Assembler/ISA does not yet support logical not".to_string(),
|
||||
));
|
||||
}
|
||||
_ => {
|
||||
return Err(CompilerError::Generic(format!(
|
||||
"{op} is postfix only!"
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
self.allocator.free_temp(operand_reg);
|
||||
Ok((result_reg, code))
|
||||
}
|
||||
|
||||
Expression::Call {
|
||||
func: Call { name, args },
|
||||
..
|
||||
} => {
|
||||
// first evaluate all the args we're going to need
|
||||
let mut arg_regs = Vec::new();
|
||||
for arg in args.iter().rev() {
|
||||
let (arg_reg, arg_code) =
|
||||
self.generate_expression(arg, true, func_body)?;
|
||||
code.append(arg_code);
|
||||
arg_regs.push(arg_reg);
|
||||
}
|
||||
|
||||
// Save caller-saved registers and track which ones we saved
|
||||
let saved_regs = self.allocator.get_caller_saved_registers();
|
||||
for reg in &saved_regs {
|
||||
// spill variables to stack
|
||||
code.push(self.allocator.free_register(reg).unwrap().1);
|
||||
}
|
||||
|
||||
// Evaluate and push arguments in reverse order
|
||||
for (i, arg_reg) in arg_regs.iter().enumerate() {
|
||||
code.push(I::comment(format!("push arg {}", args.len() - 1 - i)));
|
||||
code.push(I::push(*arg_reg));
|
||||
}
|
||||
|
||||
if self.symbols.contains(&name.name) {
|
||||
// Call local function
|
||||
code.push(I::call(name.to_string()));
|
||||
} else if let Some(ns) = name.namespace.clone()
|
||||
&& self.imports.contains_key(&ns)
|
||||
{
|
||||
code.push(I::call(name.to_string()));
|
||||
} else {
|
||||
return Err(CompilerError::Undefined(name.clone()));
|
||||
}
|
||||
|
||||
let result_reg: Register;
|
||||
|
||||
if use_result {
|
||||
let (temp_result_reg, result_alloc) = self.allocator.alloc_temp()?;
|
||||
result_reg = temp_result_reg;
|
||||
|
||||
code.append(result_alloc);
|
||||
code.push(I::pop(result_reg));
|
||||
|
||||
// Clean up arguments
|
||||
if args.len() > 1 {
|
||||
for _ in 0..(args.len() - 1) {
|
||||
code.push(I::pop(Register::Zero));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
result_reg = Register::Zero;
|
||||
|
||||
// Clean up arguments
|
||||
if args.len() > 0 {
|
||||
for _ in 0..(args.len()) {
|
||||
code.push(I::pop(Register::Zero));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Free argument registers
|
||||
for reg in arg_regs {
|
||||
self.allocator.free_temp(reg);
|
||||
}
|
||||
|
||||
Ok((result_reg, code))
|
||||
}
|
||||
|
||||
Expression::IndexAccess {
|
||||
expr,
|
||||
index,
|
||||
type_id,
|
||||
} => {
|
||||
let (expr_reg, expr_alloc) =
|
||||
self.generate_expression(expr, true, func_body)?;
|
||||
code.append(expr_alloc);
|
||||
|
||||
let (index_reg, index_alloc) =
|
||||
self.generate_expression(index, true, func_body)?;
|
||||
code.append(index_alloc);
|
||||
|
||||
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
|
||||
code.append(result_alloc);
|
||||
|
||||
// add the expr pointer to the index to get the final address.
|
||||
code.push(I::add(expr_reg, index_reg, result_reg));
|
||||
// load the value at the address.
|
||||
code.push(I::ldw_reg(result_reg, result_reg));
|
||||
|
||||
self.allocator.free_temp(expr_reg);
|
||||
self.allocator.free_temp(index_reg);
|
||||
|
||||
Ok((result_reg, code))
|
||||
}
|
||||
Expression::MemberAccess {
|
||||
expr,
|
||||
field_name,
|
||||
type_id,
|
||||
} => Err(CompilerError::Unimplemented(
|
||||
"Structs are not yet implemented!".to_string(),
|
||||
)),
|
||||
|
||||
Expression::TypeCast {
|
||||
expr,
|
||||
target_type,
|
||||
type_id,
|
||||
} => {
|
||||
let (expr_reg, expr_code) =
|
||||
self.generate_expression(expr, true, func_body)?;
|
||||
|
||||
// not sure if we actually need to do anything here.
|
||||
// for now we just return the previous expression.
|
||||
Ok((expr_reg, expr_code))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Helper for generating unique labels
|
||||
fn get_unique_label(&mut self) -> String {
|
||||
// You'd implement a counter here
|
||||
static COUNTER: AtomicU32 = AtomicU32::new(0);
|
||||
|
||||
let val = COUNTER.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
|
||||
(val + 1).to_string()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,797 @@
|
||||
use std::fmt;
|
||||
|
||||
use crate::backend::dsa::registers::Register;
|
||||
|
||||
pub struct InsBlock {
|
||||
instructions: Vec<Instruction>,
|
||||
}
|
||||
|
||||
impl InsBlock {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
instructions: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn insert(&mut self, index: usize, instr: Instruction) {
|
||||
self.instructions.insert(index, instr);
|
||||
}
|
||||
|
||||
pub fn push(&mut self, instr: Instruction) {
|
||||
self.instructions.push(instr);
|
||||
}
|
||||
|
||||
pub fn append(&mut self, mut other: Self) {
|
||||
self.instructions.append(&mut other.instructions);
|
||||
}
|
||||
|
||||
pub fn extend(&mut self, instrs: impl IntoIterator<Item = Instruction>) {
|
||||
self.instructions.extend(instrs);
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.instructions.is_empty()
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.instructions.len()
|
||||
}
|
||||
|
||||
pub fn iter(&self) -> impl Iterator<Item = &Instruction> {
|
||||
self.instructions.iter()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<Instruction>> for InsBlock {
|
||||
fn from(instructions: Vec<Instruction>) -> Self {
|
||||
Self { instructions }
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Instruction> for InsBlock {
|
||||
fn from(instr: Instruction) -> Self {
|
||||
Self {
|
||||
instructions: vec![instr],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Instruction {
|
||||
// Labels and comments
|
||||
Label(Label),
|
||||
Comment {
|
||||
text: String,
|
||||
top_level: bool,
|
||||
},
|
||||
Newline,
|
||||
|
||||
// Data Directives
|
||||
Db {
|
||||
label: String,
|
||||
data: Vec<u8>,
|
||||
},
|
||||
Dh {
|
||||
label: String,
|
||||
data: Vec<u16>,
|
||||
},
|
||||
Dw {
|
||||
label: String,
|
||||
data: Vec<u32>,
|
||||
},
|
||||
DString {
|
||||
// alias for db.
|
||||
label: String,
|
||||
data: String,
|
||||
},
|
||||
|
||||
Resx {
|
||||
label: String,
|
||||
size: u32,
|
||||
},
|
||||
|
||||
// Include
|
||||
Include {
|
||||
name: String,
|
||||
path: String,
|
||||
},
|
||||
|
||||
// Data movement
|
||||
Mov {
|
||||
src: Register,
|
||||
dest: Register,
|
||||
},
|
||||
Movs {
|
||||
src: Register,
|
||||
dest: Register,
|
||||
},
|
||||
|
||||
// Memory operations
|
||||
Ldb {
|
||||
src: MemOperand,
|
||||
dest: Register,
|
||||
},
|
||||
Ldh {
|
||||
src: MemOperand,
|
||||
dest: Register,
|
||||
},
|
||||
Ldw {
|
||||
src: MemOperand,
|
||||
dest: Register,
|
||||
},
|
||||
Stb {
|
||||
src: Register,
|
||||
dest: MemOperand,
|
||||
},
|
||||
Sth {
|
||||
src: Register,
|
||||
dest: MemOperand,
|
||||
},
|
||||
Stw {
|
||||
src: Register,
|
||||
dest: MemOperand,
|
||||
},
|
||||
|
||||
// Immediate loads
|
||||
Lli {
|
||||
imm: Imm,
|
||||
dest: Register,
|
||||
},
|
||||
Lui {
|
||||
imm: Imm,
|
||||
dest: Register,
|
||||
},
|
||||
Lwi {
|
||||
imm: Imm,
|
||||
dest: Register,
|
||||
},
|
||||
LwiLabel {
|
||||
label: String,
|
||||
dest: Register,
|
||||
},
|
||||
|
||||
// Arithmetic
|
||||
Add {
|
||||
src1: Register,
|
||||
src2: Register,
|
||||
dest: Register,
|
||||
},
|
||||
Sub {
|
||||
src1: Register,
|
||||
src2: Register,
|
||||
dest: Register,
|
||||
},
|
||||
IAdd {
|
||||
src: Register,
|
||||
imm: Imm,
|
||||
dest: Option<Register>,
|
||||
},
|
||||
ISub {
|
||||
src: Register,
|
||||
imm: Imm,
|
||||
dest: Option<Register>,
|
||||
},
|
||||
Inc {
|
||||
reg: Register,
|
||||
},
|
||||
Dec {
|
||||
reg: Register,
|
||||
},
|
||||
|
||||
// Bitwise
|
||||
And {
|
||||
src1: Register,
|
||||
src2: Register,
|
||||
dest: Register,
|
||||
},
|
||||
Or {
|
||||
src1: Register,
|
||||
src2: Register,
|
||||
dest: Register,
|
||||
},
|
||||
Xor {
|
||||
src1: Register,
|
||||
src2: Register,
|
||||
dest: Register,
|
||||
},
|
||||
Not {
|
||||
src: Register,
|
||||
dest: Register,
|
||||
},
|
||||
Nand {
|
||||
src1: Register,
|
||||
src2: Register,
|
||||
dest: Register,
|
||||
},
|
||||
Nor {
|
||||
src1: Register,
|
||||
src2: Register,
|
||||
dest: Register,
|
||||
},
|
||||
Xnor {
|
||||
src1: Register,
|
||||
src2: Register,
|
||||
dest: Register,
|
||||
},
|
||||
|
||||
// Shifts
|
||||
Shl {
|
||||
src1: Register,
|
||||
r_shamt: Register,
|
||||
i_shamt: u16,
|
||||
dest: Register,
|
||||
},
|
||||
Shr {
|
||||
src1: Register,
|
||||
r_shamt: Register,
|
||||
i_shamt: u16,
|
||||
dest: Register,
|
||||
},
|
||||
|
||||
// Comparison
|
||||
Cmp {
|
||||
reg1: Register,
|
||||
reg2: Register,
|
||||
},
|
||||
|
||||
// Jumps
|
||||
Jmp {
|
||||
target: Label,
|
||||
},
|
||||
Jeq {
|
||||
target: Label,
|
||||
},
|
||||
Jne {
|
||||
target: Label,
|
||||
},
|
||||
Jgt {
|
||||
target: Label,
|
||||
},
|
||||
Jge {
|
||||
target: Label,
|
||||
},
|
||||
Jlt {
|
||||
target: Label,
|
||||
},
|
||||
Jle {
|
||||
target: Label,
|
||||
},
|
||||
|
||||
// Stack
|
||||
Push {
|
||||
reg: Register,
|
||||
},
|
||||
Pop {
|
||||
reg: Register,
|
||||
},
|
||||
|
||||
// Function calls
|
||||
Call {
|
||||
target: String,
|
||||
}, // namespace::function
|
||||
Return,
|
||||
|
||||
// System
|
||||
Hlt,
|
||||
Nop,
|
||||
Int {
|
||||
code: u8,
|
||||
},
|
||||
}
|
||||
|
||||
pub enum DataDirective {
|
||||
U8(Vec<u8>),
|
||||
U16(Vec<u16>),
|
||||
U32(Vec<u32>),
|
||||
String(String),
|
||||
Char(char),
|
||||
}
|
||||
|
||||
impl fmt::Display for Instruction {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
Self::Label(l) => write!(f, "{}:", l),
|
||||
Self::Newline => write!(f, ""), /* empty string as newlines are inserted */
|
||||
// automatically.
|
||||
Self::Comment { text, top_level } => write!(
|
||||
f,
|
||||
"{}",
|
||||
text.lines()
|
||||
.map(|line| format!(
|
||||
"{}// {}",
|
||||
if *top_level { "" } else { " " },
|
||||
line.trim(),
|
||||
))
|
||||
.collect::<Vec<String>>()
|
||||
.join("\n")
|
||||
),
|
||||
|
||||
Self::Include { name, path } => write!(f, "include {name}: \"{}\"", path),
|
||||
|
||||
Self::Db { label, data } => write!(
|
||||
f,
|
||||
"db {}: {}",
|
||||
label,
|
||||
data.iter()
|
||||
.map(|&b| format!("{:#04X}", b))
|
||||
.collect::<Vec<String>>()
|
||||
.join(", ")
|
||||
),
|
||||
Self::Dh { label, data } => write!(
|
||||
f,
|
||||
"dh {}: {}",
|
||||
label,
|
||||
data.iter()
|
||||
.map(|&b| format!("{:#06X}", b))
|
||||
.collect::<Vec<String>>()
|
||||
.join(", ")
|
||||
),
|
||||
Self::Dw { label, data } => write!(
|
||||
f,
|
||||
"dw {}: {}",
|
||||
label,
|
||||
data.iter()
|
||||
.map(|&b| format!("{:#08X}", b))
|
||||
.collect::<Vec<String>>()
|
||||
.join(", ")
|
||||
),
|
||||
Self::DString { label, data } => write!(f, "db {}: \"{}\"", label, data),
|
||||
|
||||
Self::Resx { label, size } => write!(f, "resx {}: {}", label, size),
|
||||
|
||||
Self::Mov { src, dest } => write!(f, " mov {}, {}", src, dest),
|
||||
Self::Movs { src, dest } => write!(f, " movs {}, {}", src, dest),
|
||||
|
||||
Self::Ldb { src: addr, dest } => {
|
||||
let (reg, offset) = reg_and_offset(addr);
|
||||
write!(f, " ldb {}, {}, {}", reg, dest, offset)
|
||||
}
|
||||
Self::Ldh { src: addr, dest } => {
|
||||
let (reg, offset) = reg_and_offset(addr);
|
||||
write!(f, " ldh {}, {}, {}", reg, dest, offset)
|
||||
}
|
||||
Self::Ldw { src, dest } => {
|
||||
let (reg, offset) = reg_and_offset(src);
|
||||
write!(f, " ldw {}, {}, {}", reg, dest, offset)
|
||||
}
|
||||
// Self::Ldbs { addr, dest } => {
|
||||
// write!(f, " ldbs {}, {}", format_mem_operand(addr), dest)
|
||||
// }
|
||||
// Self::Ldhs { addr, dest } => {
|
||||
// write!(f, " ldhs {}, {}", format_mem_operand(addr), dest)
|
||||
// }
|
||||
// Self::Ldws { addr, dest } => {
|
||||
// write!(f, " ldws {}, {}", format_mem_operand(addr), dest)
|
||||
// }
|
||||
Self::Stb { src, dest: addr } => {
|
||||
let (reg, offset) = reg_and_offset(addr);
|
||||
write!(f, " stb {}, {}, {}", src, reg, offset)
|
||||
}
|
||||
Self::Sth { src, dest: addr } => {
|
||||
let (reg, offset) = reg_and_offset(addr);
|
||||
write!(f, " sth {}, {}, {}", src, reg, offset)
|
||||
}
|
||||
Self::Stw { src, dest: addr } => {
|
||||
let (reg, offset) = reg_and_offset(addr);
|
||||
write!(f, " stw {}, {}, {}", src, reg, offset)
|
||||
}
|
||||
|
||||
Self::Lli { imm, dest } => write!(f, " lli {}, {}", imm, dest),
|
||||
Self::Lui { imm, dest } => write!(f, " lui {}, {}", imm, dest),
|
||||
Self::Lwi { imm, dest } => write!(f, " lwi {}, {}", imm, dest),
|
||||
Self::LwiLabel { label, dest } => write!(f, " lwi {}, {}", label, dest),
|
||||
|
||||
// arithmetic
|
||||
Self::Add { src1, src2, dest } => {
|
||||
write!(f, " add {}, {}, {}", src1, src2, dest)
|
||||
}
|
||||
Self::Sub { src1, src2, dest } => {
|
||||
write!(f, " sub {}, {}, {}", src1, src2, dest)
|
||||
}
|
||||
Self::And { src1, src2, dest } => {
|
||||
write!(f, " and {}, {}, {}", src1, src2, dest)
|
||||
}
|
||||
Self::Or { src1, src2, dest } => {
|
||||
write!(f, " or {}, {}, {}", src1, src2, dest)
|
||||
}
|
||||
Self::Nand { src1, src2, dest } => {
|
||||
write!(f, " nand {}, {}, {}", src1, src2, dest)
|
||||
}
|
||||
Self::Xor { src1, src2, dest } => {
|
||||
write!(f, " xor {}, {}, {}", src1, src2, dest)
|
||||
}
|
||||
Self::Nor { src1, src2, dest } => {
|
||||
write!(f, " nor {}, {}, {}", src1, src2, dest)
|
||||
}
|
||||
Self::Not { src, dest } => {
|
||||
write!(f, " not {} {}", src, dest)
|
||||
}
|
||||
Self::Xnor { src1, src2, dest } => {
|
||||
write!(f, " xnor {}, {}, {}", src1, src2, dest)
|
||||
}
|
||||
Self::IAdd { src, imm, dest } => {
|
||||
if let Some(d) = dest {
|
||||
write!(f, " addi {}, {}, {}", src, imm, d)
|
||||
} else {
|
||||
write!(f, " addi {}, {}", src, imm)
|
||||
}
|
||||
}
|
||||
Self::ISub { src, imm, dest } => {
|
||||
if let Some(d) = dest {
|
||||
write!(f, " subi {}, {}, {}", src, imm, d)
|
||||
} else {
|
||||
write!(f, " subi {}, {}", src, imm)
|
||||
}
|
||||
}
|
||||
|
||||
// shift instructions
|
||||
Self::Shl {
|
||||
src1,
|
||||
r_shamt,
|
||||
i_shamt,
|
||||
dest,
|
||||
} => {
|
||||
write!(f, " shl {}, {}, {}, {}", src1, r_shamt, i_shamt, dest)
|
||||
}
|
||||
Self::Shr {
|
||||
src1,
|
||||
r_shamt,
|
||||
i_shamt,
|
||||
dest,
|
||||
} => {
|
||||
write!(f, " shl {}, {}, {}, {}", src1, r_shamt, i_shamt, dest)
|
||||
}
|
||||
|
||||
// increment instructions
|
||||
Self::Inc { reg } => write!(f, " inc {}", reg),
|
||||
Self::Dec { reg } => write!(f, " dec {}", reg),
|
||||
|
||||
Self::Cmp { reg1, reg2 } => write!(f, " cmp {}, {}", reg1, reg2),
|
||||
|
||||
// jump instructions
|
||||
Self::Jmp { target } => write!(f, " jmp {}", target),
|
||||
Self::Jeq { target } => write!(f, " jeq {}", target),
|
||||
Self::Jne { target } => write!(f, " jne {}", target),
|
||||
Self::Jgt { target } => write!(f, " jgt {}", target),
|
||||
Self::Jge { target } => write!(f, " jge {}", target),
|
||||
Self::Jlt { target } => write!(f, " jlt {}", target),
|
||||
Self::Jle { target } => write!(f, " jle {}", target),
|
||||
|
||||
// stack pseudoinstructions
|
||||
Self::Push { reg } => write!(f, " push {}", reg),
|
||||
Self::Pop { reg } => write!(f, " pop {}", reg),
|
||||
|
||||
// call & return pseudoinstructions
|
||||
Self::Call { target } => write!(f, " call {}", target),
|
||||
Self::Return => write!(f, " return"),
|
||||
|
||||
// misc instructions
|
||||
Self::Int { code } => write!(f, " int {}", code),
|
||||
Self::Hlt => write!(f, " hlt"),
|
||||
Self::Nop => write!(f, " nop"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Instruction {
|
||||
// data directives
|
||||
pub fn db_string(label: impl Into<String>, data: impl Into<String>) -> Self {
|
||||
Self::DString {
|
||||
label: label.into(),
|
||||
data: data.into(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn db_word(label: impl Into<String>, data: u32) -> Self {
|
||||
Self::Dw {
|
||||
label: label.into(),
|
||||
data: vec![data],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn db_bytes(label: impl Into<String>, data: &[u8]) -> Self {
|
||||
Self::Db {
|
||||
label: label.into(),
|
||||
data: data.to_vec(),
|
||||
}
|
||||
}
|
||||
|
||||
// Movement
|
||||
pub fn mov<R1, R2>(src: R1, dest: R2) -> Self
|
||||
where
|
||||
R1: Into<Register>,
|
||||
R2: Into<Register>,
|
||||
{
|
||||
Self::Mov {
|
||||
src: src.into(),
|
||||
dest: dest.into(),
|
||||
}
|
||||
}
|
||||
|
||||
// Memory loads
|
||||
pub fn ldw_reg<R>(base: R, dest: Register) -> Self
|
||||
where
|
||||
R: Into<Register>,
|
||||
{
|
||||
Self::Ldw {
|
||||
src: MemOperand::RegIndirect(base.into()),
|
||||
dest,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ldw_reg_offset<R>(base: R, dest: Register, offset: i32) -> Self
|
||||
where
|
||||
R: Into<Register>,
|
||||
{
|
||||
Self::Ldw {
|
||||
src: MemOperand::RegOffset(base.into(), offset),
|
||||
dest,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ldw_label(label: impl Into<Label>, dest: Register) -> Self {
|
||||
Self::Ldw {
|
||||
src: MemOperand::Label(label.into()),
|
||||
dest,
|
||||
}
|
||||
}
|
||||
|
||||
// Memory stores
|
||||
pub fn stw_reg<R>(src: Register, base: R) -> Self
|
||||
where
|
||||
R: Into<Register>,
|
||||
{
|
||||
Self::Stw {
|
||||
src,
|
||||
dest: MemOperand::RegIndirect(base.into()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn stw_reg_offset<R>(src: Register, base: R, offset: i32) -> Self
|
||||
where
|
||||
R: Into<Register>,
|
||||
{
|
||||
Self::Stw {
|
||||
src,
|
||||
dest: MemOperand::RegOffset(base.into(), offset),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn stw_label(src: Register, label: impl Into<Label>) -> Self {
|
||||
Self::Stw {
|
||||
src,
|
||||
dest: MemOperand::Label(label.into()),
|
||||
}
|
||||
}
|
||||
|
||||
// Arithmetic
|
||||
pub fn add(src1: Register, src2: Register, dest: Register) -> Self {
|
||||
Self::Add { src1, src2, dest }
|
||||
}
|
||||
|
||||
pub fn sub(src1: Register, src2: Register, dest: Register) -> Self {
|
||||
Self::Sub { src1, src2, dest }
|
||||
}
|
||||
|
||||
pub fn and(src1: Register, src2: Register, dest: Register) -> Self {
|
||||
Self::And { src1, src2, dest }
|
||||
}
|
||||
|
||||
pub fn or(src1: Register, src2: Register, dest: Register) -> Self {
|
||||
Self::Or { src1, src2, dest }
|
||||
}
|
||||
|
||||
pub fn xor(src1: Register, src2: Register, dest: Register) -> Self {
|
||||
Self::Xor { src1, src2, dest }
|
||||
}
|
||||
|
||||
pub fn not(src: Register, dest: Register) -> Self {
|
||||
Self::Not { src, dest }
|
||||
}
|
||||
|
||||
pub fn shl(src1: Register, r_shamt: Register, i_shamt: u16, dest: Register) -> Self {
|
||||
Self::Shl {
|
||||
src1,
|
||||
r_shamt,
|
||||
i_shamt,
|
||||
dest,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn shr(src1: Register, r_shamt: Register, i_shamt: u16, dest: Register) -> Self {
|
||||
Self::Shr {
|
||||
src1,
|
||||
r_shamt,
|
||||
i_shamt,
|
||||
dest,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn iadd(src: Register, value: i64) -> Self {
|
||||
let imm = Imm(value.unsigned_abs() as u32);
|
||||
|
||||
if value < 0 {
|
||||
Self::ISub {
|
||||
src,
|
||||
imm,
|
||||
dest: None,
|
||||
}
|
||||
} else {
|
||||
Self::IAdd {
|
||||
src,
|
||||
imm,
|
||||
dest: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn iadd_dest(src: Register, value: i32, dest: Register) -> Self {
|
||||
let imm = Imm(value.unsigned_abs());
|
||||
|
||||
if value < 0 {
|
||||
Self::ISub {
|
||||
src,
|
||||
imm,
|
||||
dest: Some(dest),
|
||||
}
|
||||
} else {
|
||||
Self::IAdd {
|
||||
src,
|
||||
imm,
|
||||
dest: Some(dest),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn inc(reg: Register) -> Self {
|
||||
Self::Inc { reg }
|
||||
}
|
||||
|
||||
pub fn dec(reg: Register) -> Self {
|
||||
Self::Dec { reg }
|
||||
}
|
||||
|
||||
// Immediate loads
|
||||
pub fn lwi(value: u32, dest: Register) -> Self {
|
||||
if value > 0xFFFF {
|
||||
Self::Lwi {
|
||||
imm: Imm(value),
|
||||
dest,
|
||||
}
|
||||
} else {
|
||||
Self::Lli {
|
||||
imm: Imm(value),
|
||||
dest,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn lwi_label(label: impl Into<String>, dest: Register) -> Self {
|
||||
Self::LwiLabel {
|
||||
label: label.into(),
|
||||
dest,
|
||||
}
|
||||
}
|
||||
|
||||
// Control flow
|
||||
pub fn label(name: impl Into<String>) -> Self {
|
||||
Self::Label(Label(name.into()))
|
||||
}
|
||||
|
||||
pub fn jmp(target: impl Into<Label>) -> Self {
|
||||
Self::Jmp {
|
||||
target: target.into(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn jeq(target: impl Into<Label>) -> Self {
|
||||
Self::Jeq {
|
||||
target: target.into(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn cmp(reg1: Register, reg2: Register) -> Self {
|
||||
Self::Cmp { reg1, reg2 }
|
||||
}
|
||||
|
||||
// Stack
|
||||
pub fn push(reg: Register) -> Self {
|
||||
Self::Push { reg }
|
||||
}
|
||||
|
||||
pub fn pop(reg: Register) -> Self {
|
||||
Self::Pop { reg }
|
||||
}
|
||||
|
||||
// Functions
|
||||
pub fn call(target: impl Into<String>) -> Self {
|
||||
Self::Call {
|
||||
target: target.into(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn int(code: u8) -> Self {
|
||||
Self::Int { code }
|
||||
}
|
||||
|
||||
pub fn ret() -> Self {
|
||||
Self::Return
|
||||
}
|
||||
|
||||
// Utilities
|
||||
pub fn comment(text: impl Into<String>) -> Self {
|
||||
Self::Comment {
|
||||
text: text.into(),
|
||||
top_level: false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn global_comment(text: impl Into<String>) -> Self {
|
||||
Self::Comment {
|
||||
text: text.into(),
|
||||
top_level: true,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn include(name: impl Into<String>, path: impl Into<String>) -> Self {
|
||||
Self::Include {
|
||||
name: name.into(),
|
||||
path: path.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Convenience trait for Label conversion
|
||||
impl From<String> for Label {
|
||||
fn from(s: String) -> Self {
|
||||
Label(s)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&str> for Label {
|
||||
fn from(s: &str) -> Self {
|
||||
Label(s.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
fn reg_and_offset(op: &MemOperand) -> (String, i32) {
|
||||
match op {
|
||||
MemOperand::RegIndirect(reg) => (reg.to_string(), 0),
|
||||
MemOperand::RegOffset(reg, offset) => (reg.to_string(), *offset),
|
||||
MemOperand::Label(label) => (label.to_string(), 0),
|
||||
MemOperand::LabelOffset(label, offset) => (label.to_string(), *offset),
|
||||
}
|
||||
}
|
||||
|
||||
/// Memory operand for loads/stores
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum MemOperand {
|
||||
/// Register indirect: [reg]
|
||||
RegIndirect(Register),
|
||||
/// Register with offset: [reg + offset]
|
||||
RegOffset(Register, i32),
|
||||
/// Label: [label]
|
||||
Label(Label),
|
||||
/// Label with offset: [label + offset]
|
||||
LabelOffset(Label, i32),
|
||||
}
|
||||
|
||||
/// Immediate value (16-bit or 32-bit)
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub struct Imm(pub u32);
|
||||
|
||||
impl fmt::Display for Imm {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}", self.0)
|
||||
}
|
||||
}
|
||||
|
||||
/// Label reference
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Label(pub String);
|
||||
|
||||
impl fmt::Display for Label {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}", self.0)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
use crate::model::{CompilerError, Program};
|
||||
|
||||
mod codegen;
|
||||
mod instruction;
|
||||
mod registers;
|
||||
mod scope;
|
||||
mod variable;
|
||||
|
||||
pub fn generate_code(ast: &Program) -> Result<String, CompilerError> {
|
||||
let mut codegen = codegen::CodeGenerator::new(ast.clone());
|
||||
codegen.generate()
|
||||
}
|
||||
@@ -0,0 +1,560 @@
|
||||
use std::{collections::HashMap, fmt};
|
||||
|
||||
use crate::{
|
||||
backend::dsa::instruction::{InsBlock, Instruction},
|
||||
model::CompilerError,
|
||||
};
|
||||
|
||||
/// Register allocator for DSA assembly generation
|
||||
/// Manages general-purpose registers (rg0-rgf) and handles stack spilling
|
||||
pub struct RegisterAllocator {
|
||||
/// Available general-purpose registers
|
||||
/// Maps variable names to their current location (register or stack offset)
|
||||
variable_locations: HashMap<String, Location>,
|
||||
|
||||
/// Maps registers to the variables they currently hold
|
||||
register_contents: HashMap<Register, String>,
|
||||
|
||||
/// Current stack offset for local variables (relative to bpr)
|
||||
/// Starts at -4 (going downward from base pointer)
|
||||
stack_offset: i32,
|
||||
|
||||
/// Track which registers are currently in use
|
||||
in_use: Vec<(Register, bool)>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Location {
|
||||
register: Option<Register>,
|
||||
stack: Option<i32>,
|
||||
}
|
||||
|
||||
impl Location {
|
||||
pub fn stack(offset: i32) -> Self {
|
||||
Location {
|
||||
register: None,
|
||||
stack: Some(offset),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn register(register: Register) -> Self {
|
||||
Location {
|
||||
register: Some(register),
|
||||
stack: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RegisterAllocator {
|
||||
pub fn new() -> Self {
|
||||
// Initialize with available GP registers (rg0-rgf = 16 registers)
|
||||
let in_use = vec![
|
||||
Register::Rg0,
|
||||
Register::Rg1,
|
||||
Register::Rg2,
|
||||
Register::Rg3,
|
||||
Register::Rg4,
|
||||
Register::Rg5,
|
||||
Register::Rg6,
|
||||
Register::Rg7,
|
||||
Register::Rg8,
|
||||
Register::Rg9,
|
||||
Register::Rga,
|
||||
Register::Rgb,
|
||||
Register::Rgc,
|
||||
Register::Rgd,
|
||||
Register::Rge,
|
||||
Register::Rgf,
|
||||
]
|
||||
.iter()
|
||||
.map(|®| (reg, false))
|
||||
.collect();
|
||||
|
||||
RegisterAllocator {
|
||||
// available_registers: registers,
|
||||
variable_locations: HashMap::new(),
|
||||
register_contents: HashMap::new(),
|
||||
stack_offset: -4, // Start at -4 (first local below saved bpr)
|
||||
in_use,
|
||||
}
|
||||
}
|
||||
|
||||
/// Allocate a temporary register for expression evaluation
|
||||
/// Returns the register name and optionally assembly code to save it
|
||||
pub fn alloc_temp(&mut self) -> Result<(Register, InsBlock), CompilerError> {
|
||||
// Try to find an unused register
|
||||
|
||||
// println!("finding! {:#?}", self.in_use);
|
||||
|
||||
if let Some(reg) = self.find_free_register() {
|
||||
self.in_use[reg as usize].1 = true;
|
||||
return Ok((reg, InsBlock::new()));
|
||||
}
|
||||
|
||||
// All registers in use - need to spill one
|
||||
// Choose the first register with a variable we can spill
|
||||
// Find a register to spill
|
||||
|
||||
// let reg_to_spill = self
|
||||
// .available_registers
|
||||
// .iter()
|
||||
// .find(|reg| self.register_contents.contains_key(*reg))
|
||||
// .cloned();
|
||||
|
||||
// if let Some(reg) = reg_to_spill {
|
||||
// // Spill this variable to stack
|
||||
// let spill_code = self.spill_register(®)?;
|
||||
// code.extend(spill_code);
|
||||
|
||||
// self.in_use.insert(reg.clone(), true);
|
||||
// return Ok((reg, code));
|
||||
// }
|
||||
|
||||
todo!("an efficient stack spilling algorithm. needs scope awareness.");
|
||||
|
||||
Err(CompilerError::Generic(
|
||||
"All registers are used up yet there are no variables to spill to the stack"
|
||||
.to_string(),
|
||||
))
|
||||
}
|
||||
|
||||
// fn set_in_use(&mut self, reg: Register, in_use: bool) {
|
||||
// self.in_use[reg as usize].1 = in_use;
|
||||
// }
|
||||
|
||||
/// Free a temporary register after use
|
||||
/// NOTE: This will NOT free registers that contain variables!
|
||||
/// Variables persist throughout their scope and must not be freed
|
||||
pub fn free_temp(&mut self, reg: Register) {
|
||||
// Check if this register contains a variable
|
||||
if self.register_contents.contains_key(®) {
|
||||
// This register holds a variable - don't free it!
|
||||
// Variables are only freed when they go out of scope via free_var()
|
||||
return;
|
||||
}
|
||||
|
||||
// This is a true temporary - safe to free
|
||||
if !matches!(reg, Register::Zero | Register::Null) {
|
||||
self.in_use[reg as usize].1 = false;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn free_var(&mut self, var: &str) {
|
||||
// Check if this variable is in a register
|
||||
if let Some(location) = self.variable_locations.get(var).cloned() {
|
||||
if let Some(reg) = location.register
|
||||
&& !matches!(reg, Register::Zero | Register::Null)
|
||||
{
|
||||
self.register_contents.remove(®);
|
||||
self.in_use[reg as usize].1 = false;
|
||||
}
|
||||
|
||||
self.variable_locations.remove(var);
|
||||
}
|
||||
}
|
||||
|
||||
/// Allocate a register for a named variable
|
||||
/// Returns the register and any necessary assembly code
|
||||
pub fn alloc_var(
|
||||
&mut self,
|
||||
var_name: &str,
|
||||
) -> Result<(Register, InsBlock), CompilerError> {
|
||||
if let Some(mut location) = self.variable_locations.get(var_name).cloned() {
|
||||
// if the var is in a register we can use it already.
|
||||
if let Some(reg) = location.register {
|
||||
return Ok((reg, InsBlock::new()));
|
||||
}
|
||||
|
||||
// if the variable is on the stack only, we need to get it in a register.
|
||||
if let Some(offset) = location.stack {
|
||||
// Variable was pushed, need to calculate actual position and update its
|
||||
// location.
|
||||
let (reg, mut code) = self.alloc_temp()?;
|
||||
|
||||
// acknowledge var is now in a reg as well.
|
||||
location.register = Some(reg);
|
||||
|
||||
// Load from bpr + offset (offset is negative)
|
||||
// code.push(format!("\tsubi bpr {} {}", -(offset + 4), reg));
|
||||
|
||||
code.push(Instruction::ldw_reg_offset(
|
||||
Register::Spr,
|
||||
reg,
|
||||
offset - self.stack_offset,
|
||||
));
|
||||
|
||||
// Update location to register
|
||||
self.variable_locations
|
||||
.insert(var_name.to_string(), location);
|
||||
self.register_contents.insert(reg, var_name.to_string());
|
||||
|
||||
return Ok((reg, code));
|
||||
}
|
||||
}
|
||||
|
||||
// Variable doesn't have a location yet, allocate a new register
|
||||
let (reg, code) = self.alloc_temp()?;
|
||||
self.variable_locations
|
||||
.insert(var_name.to_string(), Location::register(reg));
|
||||
self.register_contents.insert(reg, var_name.to_string());
|
||||
|
||||
Ok((reg, code))
|
||||
}
|
||||
|
||||
/// Get the current location of a variable
|
||||
pub fn _get_var_location(&self, var_name: &str) -> Option<&Location> {
|
||||
self.variable_locations.get(var_name)
|
||||
}
|
||||
|
||||
/// Load a variable into a register (allocating if necessary)
|
||||
/// Returns the register and assembly code to load it
|
||||
pub fn load_var(
|
||||
&mut self,
|
||||
var_name: &str,
|
||||
) -> Result<(Register, InsBlock), CompilerError> {
|
||||
self.alloc_var(var_name)
|
||||
}
|
||||
|
||||
/// Store a value from a register into a variable
|
||||
/// Updates tracking and returns any necessary assembly code
|
||||
pub fn store_var(&mut self, var_name: &str, source_reg: &Register) -> InsBlock {
|
||||
let mut block = InsBlock::new();
|
||||
|
||||
// Check if variable already has a location
|
||||
if let Some(location) = self.variable_locations.get(var_name) {
|
||||
// if the variable exists in a register we write to that.
|
||||
match location.register {
|
||||
Some(reg) if reg == *source_reg => {
|
||||
block.push(Instruction::mov(*source_reg, reg));
|
||||
return block;
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
// if the variable exists on the stack but not a register we write here.
|
||||
if let Some(offset) = location.stack {
|
||||
block.push(Instruction::stw_reg_offset(
|
||||
*source_reg,
|
||||
Register::Spr,
|
||||
offset - self.stack_offset,
|
||||
));
|
||||
return block;
|
||||
}
|
||||
}
|
||||
|
||||
// Variable doesn't exist yet, we can just use the same reg.
|
||||
// if we can avoid a move, absolutely do that.
|
||||
|
||||
// if this is true then there's no permanent variable here so it's safe to use.
|
||||
if !self.register_contents.contains_key(source_reg) {
|
||||
self.variable_locations
|
||||
.insert(var_name.to_string(), Location::register(*source_reg));
|
||||
self.register_contents
|
||||
.insert(*source_reg, var_name.to_string());
|
||||
self.in_use[*source_reg as usize].1 = true;
|
||||
|
||||
return block;
|
||||
}
|
||||
|
||||
// if current register isn't free, (eg is another variable) we assign somewhere
|
||||
// else.
|
||||
if let Some(free_reg) = self.find_free_register() {
|
||||
self.variable_locations
|
||||
.insert(var_name.to_string(), Location::register(free_reg));
|
||||
self.register_contents
|
||||
.insert(free_reg, var_name.to_string());
|
||||
self.in_use[free_reg as usize].1 = true;
|
||||
|
||||
block.push(Instruction::mov(*source_reg, free_reg));
|
||||
return block;
|
||||
}
|
||||
|
||||
// No free registers - allocate on stack
|
||||
// code.push(format!("\tstw {}, bpr, {}", source_reg, self.stack_offset));
|
||||
// self.variable_locations
|
||||
// .insert(var_name.to_string(), Location::Stack(self.stack_offset));
|
||||
// self.stack_offset -= 4; // Move to next stack slot
|
||||
//
|
||||
todo!("an efficient stack spilling algorithm. needs scope awareness.");
|
||||
}
|
||||
|
||||
/// spill a register to the stack (WITHOUT FREEING)
|
||||
/// DO NOT USE this if it's for a pointer!!!!
|
||||
pub fn _spill_register(&mut self, reg: &Register) -> Result<InsBlock, CompilerError> {
|
||||
let mut code = InsBlock::new();
|
||||
|
||||
// check if the variable is declared.
|
||||
if let Some(var_name) = self.register_contents.get(reg).cloned()
|
||||
&& let Some(location) = self.variable_locations.get_mut(&var_name)
|
||||
{
|
||||
// check if var is on the stack
|
||||
if let Some(offset) = location.stack {
|
||||
code.push(Instruction::stw_reg_offset(
|
||||
*reg,
|
||||
Register::Spr,
|
||||
offset - self.stack_offset,
|
||||
));
|
||||
return Ok(code);
|
||||
}
|
||||
|
||||
// Track that we pushed one word
|
||||
self.stack_offset -= 4;
|
||||
|
||||
// if the variable is not on the stack:
|
||||
// push register to stack (spr decrements automatically)
|
||||
let offset = self.stack_offset;
|
||||
code.push(Instruction::push(*reg));
|
||||
|
||||
// Update variable location - it's now at current spr
|
||||
// Note: We track offset from bpr for consistency
|
||||
location.stack = Some(offset);
|
||||
|
||||
Ok(code)
|
||||
} else {
|
||||
Err(CompilerError::Generic(format!(
|
||||
"Register {} does not contain a variable to spill!",
|
||||
reg
|
||||
)))
|
||||
}
|
||||
}
|
||||
|
||||
/// free a register by spilling it to the stack.
|
||||
/// Returns assembly code to perform the spill
|
||||
pub fn free_register(
|
||||
&mut self,
|
||||
reg: &Register,
|
||||
) -> Result<(i32, Instruction), CompilerError> {
|
||||
// check if the variable is declared.
|
||||
if let Some(var_name) = self.register_contents.get(reg).cloned()
|
||||
&& let Some(location) = self.variable_locations.get_mut(&var_name)
|
||||
{
|
||||
// check if var name is on the stack
|
||||
if let Some(offset) = location.stack {
|
||||
// store current register value in stack location
|
||||
let code = Instruction::stw_reg_offset(
|
||||
*reg,
|
||||
Register::Spr,
|
||||
offset - self.stack_offset,
|
||||
);
|
||||
|
||||
// free the register.
|
||||
location.register = None;
|
||||
self.register_contents.remove(reg);
|
||||
return Ok((offset, code));
|
||||
}
|
||||
|
||||
// Track that we pushed one word
|
||||
self.stack_offset -= 4;
|
||||
|
||||
let offset = self.stack_offset;
|
||||
let code = Instruction::push(*reg);
|
||||
|
||||
// Update variable location
|
||||
// Note: We track offset from bpr for consistency
|
||||
location.stack = Some(offset);
|
||||
location.register = None;
|
||||
self.register_contents.remove(reg);
|
||||
|
||||
Ok((offset, code))
|
||||
} else {
|
||||
Err(CompilerError::Generic(format!(
|
||||
"Register {} does not contain a variable to spill!",
|
||||
reg
|
||||
)))
|
||||
}
|
||||
}
|
||||
|
||||
/// Find a free register (not currently in use)
|
||||
fn find_free_register(&self) -> Option<Register> {
|
||||
self.in_use
|
||||
.iter()
|
||||
.filter(|(_, in_use)| !*in_use)
|
||||
.map(|(reg, _)| *reg)
|
||||
.next()
|
||||
}
|
||||
|
||||
/// Spill all registers to stack (useful before function calls)
|
||||
pub fn _spill_all(&mut self) -> InsBlock {
|
||||
let mut code = InsBlock::new();
|
||||
|
||||
let regs_to_spill: Vec<Register> =
|
||||
self.register_contents.keys().cloned().collect();
|
||||
|
||||
for reg in regs_to_spill {
|
||||
if let Ok(spill_code) = self.free_register(®) {
|
||||
code.push(spill_code.1);
|
||||
}
|
||||
}
|
||||
|
||||
code
|
||||
}
|
||||
|
||||
/// Get the total stack offset
|
||||
pub fn get_stack_offset(&self) -> i32 {
|
||||
self.stack_offset
|
||||
}
|
||||
|
||||
/// Get the total stack space needed for local variables
|
||||
pub fn _get_stack_size(&self) -> i32 {
|
||||
-self.stack_offset // Convert negative offset to positive size
|
||||
}
|
||||
|
||||
/// Reset allocator for a new function
|
||||
pub fn reset(&mut self) {
|
||||
self.variable_locations.clear();
|
||||
self.register_contents.clear();
|
||||
self.stack_offset = -4;
|
||||
self.in_use = vec![
|
||||
Register::Rg0,
|
||||
Register::Rg1,
|
||||
Register::Rg2,
|
||||
Register::Rg3,
|
||||
Register::Rg4,
|
||||
Register::Rg5,
|
||||
Register::Rg6,
|
||||
Register::Rg7,
|
||||
Register::Rg8,
|
||||
Register::Rg9,
|
||||
Register::Rga,
|
||||
Register::Rgb,
|
||||
Register::Rgc,
|
||||
Register::Rgd,
|
||||
Register::Rge,
|
||||
Register::Rgf,
|
||||
]
|
||||
.iter()
|
||||
.map(|®| (reg, false))
|
||||
.collect();
|
||||
}
|
||||
|
||||
/// Get list of registers that contain variables and are in use
|
||||
/// These need to be saved before function calls
|
||||
pub fn get_caller_saved_registers(&self) -> Vec<Register> {
|
||||
self.register_contents
|
||||
.iter()
|
||||
.filter(|(reg, _)| {
|
||||
self.in_use
|
||||
.get(**reg as usize)
|
||||
.unwrap_or(&(Register::Null, false))
|
||||
.1
|
||||
})
|
||||
.map(|(reg, _)| *reg)
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum Register {
|
||||
// general purpose
|
||||
Rg0 = 0,
|
||||
Rg1 = 1,
|
||||
Rg2 = 2,
|
||||
Rg3 = 3,
|
||||
Rg4 = 4,
|
||||
Rg5 = 5,
|
||||
Rg6 = 6,
|
||||
Rg7 = 7,
|
||||
Rg8 = 8,
|
||||
Rg9 = 9,
|
||||
Rga = 10,
|
||||
Rgb = 11,
|
||||
Rgc = 12,
|
||||
Rgd = 13,
|
||||
Rge = 14,
|
||||
Rgf = 15,
|
||||
|
||||
// special
|
||||
Bpr,
|
||||
Spr,
|
||||
Ret,
|
||||
Acc,
|
||||
|
||||
// read only
|
||||
Pcx,
|
||||
Zero,
|
||||
|
||||
// null
|
||||
Null,
|
||||
}
|
||||
|
||||
impl Register {
|
||||
pub fn get_gp() -> [Register; 16] {
|
||||
[
|
||||
Register::Rg0,
|
||||
Register::Rg1,
|
||||
Register::Rg2,
|
||||
Register::Rg3,
|
||||
Register::Rg4,
|
||||
Register::Rg5,
|
||||
Register::Rg6,
|
||||
Register::Rg7,
|
||||
Register::Rg8,
|
||||
Register::Rg9,
|
||||
Register::Rga,
|
||||
Register::Rgb,
|
||||
Register::Rgc,
|
||||
Register::Rgd,
|
||||
Register::Rge,
|
||||
Register::Rgf,
|
||||
]
|
||||
}
|
||||
|
||||
pub fn is_gp(&self) -> bool {
|
||||
(*self as u8) < 16
|
||||
}
|
||||
|
||||
pub fn from_index(idx: usize) -> Register {
|
||||
match idx {
|
||||
0 => Register::Rg0,
|
||||
1 => Register::Rg1,
|
||||
2 => Register::Rg2,
|
||||
3 => Register::Rg3,
|
||||
4 => Register::Rg4,
|
||||
5 => Register::Rg5,
|
||||
6 => Register::Rg6,
|
||||
7 => Register::Rg7,
|
||||
8 => Register::Rg8,
|
||||
9 => Register::Rg9,
|
||||
10 => Register::Rga,
|
||||
11 => Register::Rgb,
|
||||
12 => Register::Rgc,
|
||||
13 => Register::Rgd,
|
||||
14 => Register::Rge,
|
||||
15 => Register::Rgf,
|
||||
_ => unreachable!("this function shouldn't ever be called with idx>15"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Register {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::Rg0 => write!(f, "rg0"),
|
||||
Self::Rg1 => write!(f, "rg1"),
|
||||
Self::Rg2 => write!(f, "rg2"),
|
||||
Self::Rg3 => write!(f, "rg3"),
|
||||
Self::Rg4 => write!(f, "rg4"),
|
||||
Self::Rg5 => write!(f, "rg5"),
|
||||
Self::Rg6 => write!(f, "rg6"),
|
||||
Self::Rg7 => write!(f, "rg7"),
|
||||
Self::Rg8 => write!(f, "rg8"),
|
||||
Self::Rg9 => write!(f, "rg9"),
|
||||
Self::Rga => write!(f, "rga"),
|
||||
Self::Rgb => write!(f, "rgb"),
|
||||
Self::Rgc => write!(f, "rgc"),
|
||||
Self::Rgd => write!(f, "rgd"),
|
||||
Self::Rge => write!(f, "rge"),
|
||||
Self::Rgf => write!(f, "rgf"),
|
||||
|
||||
Self::Acc => write!(f, "acc"),
|
||||
Self::Ret => write!(f, "ret"),
|
||||
Self::Bpr => write!(f, "bpr"),
|
||||
Self::Spr => write!(f, "spr"),
|
||||
|
||||
Self::Zero => write!(f, "zero"),
|
||||
Self::Pcx => write!(f, "pcx"),
|
||||
|
||||
Self::Null => write!(f, "null"),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,287 @@
|
||||
use std::{cell::RefCell, collections::HashMap, ops::Deref, rc::Rc};
|
||||
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{
|
||||
backend::dsa::{
|
||||
instruction::{InsBlock, Instruction},
|
||||
registers::{Register, RegisterAllocator},
|
||||
variable::Variable,
|
||||
},
|
||||
model::CompilerError,
|
||||
};
|
||||
|
||||
pub struct Allocator {
|
||||
stack_offset: i32,
|
||||
in_use: [(Register, bool); 16],
|
||||
}
|
||||
|
||||
pub struct TempReg(Register);
|
||||
pub struct AssignedReg(Register);
|
||||
pub struct StackSlot(i32);
|
||||
|
||||
impl Deref for TempReg {
|
||||
type Target = Register;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for AssignedReg {
|
||||
type Target = Register;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for StackSlot {
|
||||
type Target = i32;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl Allocator {
|
||||
pub fn new() -> Self {
|
||||
let mut in_use = [(Register::Null, false); 16];
|
||||
in_use.copy_from_slice(&Register::get_gp().map(|r| (r, false))[0..16]);
|
||||
|
||||
Self {
|
||||
stack_offset: 0,
|
||||
in_use,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_stack_offset(&self) -> i32 {
|
||||
self.stack_offset
|
||||
}
|
||||
|
||||
pub fn destroy_scope(&mut self, scope: &mut Scope) {
|
||||
self.stack_offset = scope.entry_stack_offset;
|
||||
|
||||
for var in scope.variables.drain() {
|
||||
if let Some(assigned) = var.1.register {
|
||||
self.free_assigned(&assigned);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// what we need:
|
||||
|
||||
// - create var in register from temporary register. free temp and use it.
|
||||
//
|
||||
// - create var on stack from struct/array literal. return stack offset to write to.
|
||||
//
|
||||
// - spill var from register to stack. return stack offset to write to.
|
||||
//
|
||||
// - read/write var from stack+offset into register to use while preserving the stack
|
||||
// slot.
|
||||
//
|
||||
// - read / write bytes from the stack+offset in a larger variable into a register.
|
||||
|
||||
pub fn read_var(&mut self, var: &mut Variable) -> Result<InsBlock, CompilerError> {
|
||||
if let Some(slot) = &mut var.stack_slot {
|
||||
if var.register.is_none() {
|
||||
var.register = Some(self.allocate_var()?);
|
||||
}
|
||||
|
||||
if let Some(reg) = &var.register {
|
||||
return Ok(InsBlock::from(Instruction::ldw_reg_offset(
|
||||
**reg,
|
||||
Register::Spr,
|
||||
**slot - self.stack_offset,
|
||||
)));
|
||||
}
|
||||
|
||||
unreachable!()
|
||||
}
|
||||
|
||||
Err(CompilerError::Generic(format!(
|
||||
"Tried to write var {} to stack but var was not assigned a reg and/or stack slot",
|
||||
var.name
|
||||
)))
|
||||
}
|
||||
|
||||
pub fn write_var(&mut self, var: &mut Variable) -> Result<InsBlock, CompilerError> {
|
||||
if let Some(slot) = &var.stack_slot {
|
||||
if let Some(reg) = &var.register {
|
||||
return Ok(InsBlock::from(Instruction::stw_reg_offset(
|
||||
**reg,
|
||||
Register::Spr,
|
||||
**slot - self.stack_offset,
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
Err(CompilerError::Generic(format!(
|
||||
"Tried to write var {} to stack but var was not assigned a reg and/or stack slot",
|
||||
var.name
|
||||
)))
|
||||
}
|
||||
|
||||
pub fn spill_var(&mut self, var: &mut Variable) -> Result<InsBlock, CompilerError> {
|
||||
if let Some(slot) = &var.stack_slot {
|
||||
let block = self.write_var(var)?;
|
||||
if let Some(reg) = &var.register {
|
||||
self.free_assigned(reg);
|
||||
var.register = None;
|
||||
}
|
||||
|
||||
return Ok(block);
|
||||
}
|
||||
|
||||
// var doesn't have a stack slot so we need to create one
|
||||
if let Some(reg) = &var.register {
|
||||
let slot = self.allocate_stack_slot(var.size);
|
||||
let block = InsBlock::from(Instruction::push(**reg));
|
||||
|
||||
self.free_assigned(reg);
|
||||
var.register = None;
|
||||
var.stack_slot = Some(slot);
|
||||
return Ok(block);
|
||||
}
|
||||
|
||||
return Err(CompilerError::Generic(
|
||||
"spill_var called on a variable without a register".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
pub fn allocate_stack_slot(&mut self, size: usize) -> StackSlot {
|
||||
self.stack_offset -= size as i32;
|
||||
let offset = self.stack_offset;
|
||||
StackSlot(offset)
|
||||
}
|
||||
|
||||
pub fn allocate_var(&mut self) -> Result<AssignedReg, CompilerError> {
|
||||
if let Some(reg) = self.find_free_register() {
|
||||
Ok(AssignedReg(reg))
|
||||
} else {
|
||||
Err(CompilerError::Generic(
|
||||
"No free registers available".to_string(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn allocate_temp(&mut self) -> Result<TempReg, CompilerError> {
|
||||
// allocates a temporary register
|
||||
if let Some(reg) = self.find_free_register() {
|
||||
Ok(TempReg(reg))
|
||||
} else {
|
||||
todo!("an efficient stack spilling algorithm. needs scope awareness.");
|
||||
}
|
||||
}
|
||||
|
||||
pub fn free_temp(&mut self, temp: &TempReg) {
|
||||
// frees a temporary register.
|
||||
self.in_use[**temp as usize].1 = false;
|
||||
}
|
||||
|
||||
fn free_assigned(&mut self, reg: &AssignedReg) {
|
||||
// frees a register.
|
||||
self.in_use[**reg as usize].1 = false;
|
||||
}
|
||||
|
||||
// if we have register(s) free, return the first one.
|
||||
fn find_free_register(&mut self) -> Option<Register> {
|
||||
self.in_use.iter_mut().find_map(|(reg, used)| {
|
||||
if !*used {
|
||||
*used = true;
|
||||
Some(*reg)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub struct FunctionContext {
|
||||
name: String,
|
||||
allocator: RefCell<Allocator>,
|
||||
}
|
||||
|
||||
impl FunctionContext {
|
||||
pub fn new(name: String) -> Self {
|
||||
Self {
|
||||
name,
|
||||
allocator: RefCell::new(Allocator::new()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_stack_offset(&self) -> i32 {
|
||||
self.allocator.borrow().get_stack_offset()
|
||||
}
|
||||
}
|
||||
|
||||
/// scope object
|
||||
pub struct Scope<'a> {
|
||||
/// outer scope, for a function this will be the global scope.
|
||||
parent: Option<&'a mut Scope<'a>>,
|
||||
|
||||
context: Rc<FunctionContext>,
|
||||
|
||||
/// is the scope a function body or just a loop?
|
||||
/// depending on the type, ending a scope will have different behaviour
|
||||
r#type: ScopeType,
|
||||
|
||||
/// variables
|
||||
variables: HashMap<Uuid, Variable>,
|
||||
|
||||
entry_stack_offset: i32,
|
||||
}
|
||||
|
||||
impl<'a> Scope<'a> {
|
||||
pub fn new(parent: &'a mut Scope<'a>, r#type: ScopeType) -> Scope<'a> {
|
||||
Self {
|
||||
entry_stack_offset: parent.context.get_stack_offset(),
|
||||
context: Rc::clone(&parent.context),
|
||||
parent: Some(parent),
|
||||
r#type,
|
||||
variables: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn close(&mut self) -> Result<(), CompilerError> {
|
||||
// closing a scope means we need to drop all variables in scope and free
|
||||
// registers.
|
||||
for (name, var) in self.variables.iter() {
|
||||
todo!()
|
||||
// if let Some(reg) = var.allocated_register {}
|
||||
|
||||
// if let Some(offset) = var.bpr_offset {
|
||||
// self.stack_offset -= offset;
|
||||
// }
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn alloc_temp_reg(&mut self) -> Result<(Register, InsBlock), CompilerError> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
pub fn alloc_var_reg(&mut self) -> Result<(Register, InsBlock), CompilerError> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
pub fn alloc_var_stack(&mut self) -> Result<(Register, InsBlock), CompilerError> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
pub fn free_var_stack(&mut self) -> Result<(Register, InsBlock), CompilerError> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
pub fn free_temp_reg(&mut self) -> Result<(Register, InsBlock), CompilerError> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Copy, Clone, Debug)]
|
||||
pub enum ScopeType {
|
||||
Function,
|
||||
IfBlock,
|
||||
LoopBlock,
|
||||
}
|
||||
@@ -0,0 +1,93 @@
|
||||
use std::{collections::HashMap, hash::Hash, rc::Rc};
|
||||
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{
|
||||
backend::dsa::{
|
||||
instruction::InsBlock,
|
||||
registers::Register,
|
||||
scope::{AssignedReg, FunctionContext, Scope, StackSlot},
|
||||
},
|
||||
model::{CompilerError, TypeId},
|
||||
};
|
||||
|
||||
pub struct Variable {
|
||||
pub name: String,
|
||||
pub uuid: Uuid,
|
||||
|
||||
/// the type of the variable.
|
||||
r#type: TypeId,
|
||||
|
||||
/// size taken up in bytes.
|
||||
/// if size > 4, value must be stored on the stack.
|
||||
pub size: usize,
|
||||
|
||||
pub stack_slot: Option<StackSlot>,
|
||||
pub register: Option<AssignedReg>,
|
||||
}
|
||||
|
||||
impl Variable {
|
||||
pub fn new_uninit(name: String, r#type: TypeId) -> Self {
|
||||
Self {
|
||||
name,
|
||||
uuid: Uuid::new_v4(),
|
||||
size: r#type.size(),
|
||||
r#type,
|
||||
stack_slot: None,
|
||||
register: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new(
|
||||
name: String,
|
||||
r#type: TypeId,
|
||||
scope: &'_ mut Scope,
|
||||
) -> Result<Self, CompilerError> {
|
||||
let mut var = Self::new_uninit(name, r#type);
|
||||
var.alloc_default(scope);
|
||||
|
||||
Ok(var)
|
||||
}
|
||||
|
||||
fn alloc_default(&mut self, scope: &'_ mut Scope) {
|
||||
if self.size > 4 {
|
||||
self.alloc_stack(scope).unwrap();
|
||||
} else {
|
||||
self.alloc_register(scope).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
fn alloc_register(
|
||||
&mut self,
|
||||
scope: &'_ mut Scope,
|
||||
) -> Result<Register, CompilerError> {
|
||||
if self.size > 4 {
|
||||
return Err(CompilerError::Generic(format!(
|
||||
"Type {} cannot be allocated a register as it has a size of {} bytes",
|
||||
self.r#type, self.size
|
||||
)));
|
||||
}
|
||||
|
||||
todo!("integrate with register alloc logic")
|
||||
|
||||
// self.allocated_register = Some(...)
|
||||
}
|
||||
|
||||
fn alloc_stack(&mut self, scope: &'_ mut Scope) -> Result<usize, CompilerError> {
|
||||
todo!("integrate with stack alloc logic")
|
||||
|
||||
// self.bpr_offset = Some(...)
|
||||
}
|
||||
|
||||
pub fn load(&mut self, scope: &'_ mut Scope) -> Result<Register, CompilerError> {
|
||||
todo!("load var from stack to reg (if possible)")
|
||||
}
|
||||
|
||||
pub fn drop(&mut self, scope: &'_ mut Scope) -> Result<(), CompilerError> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn spill(&mut self, scope: &'_ mut Scope) -> Result<(), CompilerError> {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
use crate::model::{CompilerError, Program};
|
||||
|
||||
mod dsa;
|
||||
|
||||
pub fn compiler_backend(ext: &str, ast: &Program) -> Result<String, CompilerError> {
|
||||
match ext {
|
||||
"dsa" => Ok(dsa::generate_code(ast)?),
|
||||
_ => Err(CompilerError::Generic(format!(
|
||||
"File type {} not supported",
|
||||
ext
|
||||
))),
|
||||
}
|
||||
}
|
||||
@@ -1,756 +0,0 @@
|
||||
use std::collections::HashMap;
|
||||
use std::hash::Hash;
|
||||
use std::sync::LazyLock;
|
||||
use std::sync::atomic::AtomicU32;
|
||||
use std::time::SystemTime;
|
||||
|
||||
use chrono::{DateTime, Local};
|
||||
|
||||
use crate::registers::{Location, RegisterAllocator};
|
||||
use crate::{block, cmd, comment, dsa};
|
||||
|
||||
use crate::parser::{
|
||||
BinaryOperator, CompilerError, ConstExpr, Declaration, Dependency, Expression,
|
||||
Program, Statement, UnaryOperator, Variable,
|
||||
};
|
||||
|
||||
pub struct CodeGenerator {
|
||||
ast: Program,
|
||||
imports: HashMap<String, String>,
|
||||
globals: Vec<String>,
|
||||
functions: Vec<String>,
|
||||
symbols: Vec<String>,
|
||||
allocator: RegisterAllocator,
|
||||
}
|
||||
|
||||
static GLOBAL_METHODS: LazyLock<HashMap<&str, &str>> = LazyLock::new(|| {
|
||||
HashMap::from([
|
||||
// ("print", "print::print"),
|
||||
// ("println", "print::println"),
|
||||
// ("printnum", "print::print_num"),
|
||||
// ("print_space", "print::print_whitespace"),
|
||||
// ("print_newline", "print::print_newline"),
|
||||
// ("print_char", "print::print_byte"),
|
||||
// ("print_word", "print::print_word"),
|
||||
// ("print_hex", "print::print_hex_word"),
|
||||
])
|
||||
});
|
||||
|
||||
fn import(name: &str, path: &str) -> String {
|
||||
format!("include {name}: \"{}\"", path)
|
||||
}
|
||||
|
||||
impl CodeGenerator {
|
||||
const RET: &'static str = "\tjmp _ret";
|
||||
|
||||
pub fn new(ast: Program) -> Self {
|
||||
CodeGenerator {
|
||||
ast,
|
||||
imports: HashMap::new(),
|
||||
globals: Vec::new(),
|
||||
functions: Vec::new(),
|
||||
symbols: Vec::new(),
|
||||
allocator: RegisterAllocator::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn include(&mut self, name: &str, path: &str) {
|
||||
self.imports.insert(name.to_string(), path.to_string());
|
||||
}
|
||||
|
||||
fn is_global(&self, name: &str) -> bool {
|
||||
// Check if this variable is in the globals list
|
||||
self.globals
|
||||
.iter()
|
||||
.any(|g| g.contains(&format!("dw {}:", name)))
|
||||
}
|
||||
|
||||
pub fn generate(&mut self) -> Result<String, CompilerError> {
|
||||
// always include the print library for debugging!
|
||||
self.include("print", "./lib/io/print.dsa");
|
||||
|
||||
for block in self.ast.clone().declarations {
|
||||
match block {
|
||||
Declaration::Variable {
|
||||
var: Variable { name, .. },
|
||||
..
|
||||
} => self.symbols.push(name),
|
||||
Declaration::Function { name, .. } => self.symbols.push(name),
|
||||
Declaration::Dependency(Dependency { name, .. }) => {
|
||||
self.symbols.push(name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for block in self.ast.clone().declarations {
|
||||
self.generate_block(block.clone())?;
|
||||
}
|
||||
|
||||
self.generate_layout()
|
||||
}
|
||||
|
||||
fn generate_layout(&mut self) -> Result<String, CompilerError> {
|
||||
let datetime: DateTime<Local> = SystemTime::now().into();
|
||||
Ok(dsa![
|
||||
"",
|
||||
comment!("GENERATED BY DSC COMPILER"),
|
||||
comment!(format!(
|
||||
"Generated at {}",
|
||||
datetime.format("%Y-%m-%d %H:%M:%S")
|
||||
)),
|
||||
"",
|
||||
// imports
|
||||
comment!("Imports"),
|
||||
self.imports
|
||||
.iter()
|
||||
.map(|(k, v)| import(k, v))
|
||||
.collect::<Vec<String>>()
|
||||
.join("\n"),
|
||||
"",
|
||||
// reserved memory
|
||||
comment!("Globals & Reserved Memory"),
|
||||
self.globals.join("\n"),
|
||||
"",
|
||||
// entry point
|
||||
comment!("Entry Point"),
|
||||
"dw stack: 0x10000",
|
||||
"db message: \"Process Exited with code:\"",
|
||||
block! [ "_init"
|
||||
dsa![ldw stack, bpr],
|
||||
dsa![mov bpr, spr],
|
||||
dsa![push zero],
|
||||
dsa![call main],
|
||||
dsa![call print::print_newline],
|
||||
dsa![lwi message, rg0],
|
||||
dsa![push rg0],
|
||||
dsa![call print::print],
|
||||
dsa![pop zero],
|
||||
dsa![call print::print_hex_word],
|
||||
dsa![pop zero],
|
||||
dsa![hlt]
|
||||
],
|
||||
"",
|
||||
comment!("Return"),
|
||||
block! [ "_ret"
|
||||
dsa![mov bpr, spr],
|
||||
dsa![pop bpr],
|
||||
dsa![return]
|
||||
],
|
||||
comment!("Compiled Code Starts..."),
|
||||
// block! [ "main"
|
||||
// dsa![push bpr],
|
||||
// dsa![mov spr, bpr],
|
||||
// dsa![lwi 67, rg1],
|
||||
// dsa![stw rg1, spr, 8],
|
||||
// dsa![mov bpr, spr],
|
||||
// dsa![pop bpr],
|
||||
// dsa![return]
|
||||
// ],
|
||||
self.functions.join("\n"),
|
||||
])
|
||||
}
|
||||
|
||||
fn generate_global(&mut self, name: &str, init: Option<ConstExpr>) {
|
||||
self.globals.push(format!(
|
||||
"dw {}: {}",
|
||||
name,
|
||||
init.unwrap_or(ConstExpr::Number(0))
|
||||
))
|
||||
}
|
||||
|
||||
fn generate_block(&mut self, block: Declaration) -> Result<(), CompilerError> {
|
||||
match block {
|
||||
Declaration::Variable { var, init, .. } => {
|
||||
self.generate_global(&var.name, init)
|
||||
}
|
||||
Declaration::Function {
|
||||
name,
|
||||
return_type,
|
||||
params,
|
||||
body,
|
||||
} => {
|
||||
let func = self.generate_function(&name, ¶ms, &body).join("\n");
|
||||
|
||||
self.functions.push(format!("{func}\n"));
|
||||
}
|
||||
Declaration::Dependency(Dependency { name, path }) => {
|
||||
self.imports.insert(name, path);
|
||||
}
|
||||
};
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Example: Generate code for a function
|
||||
fn generate_function(
|
||||
&mut self,
|
||||
name: &str,
|
||||
params: &[Variable],
|
||||
body: &[Statement],
|
||||
) -> Vec<String> {
|
||||
let mut code = Vec::new();
|
||||
|
||||
// Reset allocator for new function
|
||||
self.allocator.reset();
|
||||
|
||||
// Function prologue
|
||||
code.push(format!("{}:", name));
|
||||
code.push("\tpush bpr".to_string());
|
||||
code.push("\tmov spr, bpr".to_string());
|
||||
code.push(String::new());
|
||||
|
||||
// Allocate parameters to registers or stack locations
|
||||
for (i, param) in params.iter().enumerate() {
|
||||
let offset = 8 + (i as i32 * 4); // Parameters start at bpr+8
|
||||
// Track that this parameter is at a stack location
|
||||
let (reg, load_code) = self.allocator.alloc_var(¶m.name).unwrap();
|
||||
code.extend(load_code);
|
||||
code.push(format!("\tldw bpr, {}, {}", reg, offset));
|
||||
}
|
||||
|
||||
// Generate code for function body
|
||||
for stmt in body {
|
||||
let stmt_code = self.generate_statement(stmt).unwrap();
|
||||
code.extend(stmt_code);
|
||||
}
|
||||
|
||||
// automatically return at function end
|
||||
if let Some(x) = code.last()
|
||||
&& x == Self::RET
|
||||
{
|
||||
} else {
|
||||
code.push(Self::RET.to_string());
|
||||
}
|
||||
|
||||
code
|
||||
}
|
||||
|
||||
// Example: Generate code for a statement
|
||||
fn generate_statement(
|
||||
&mut self,
|
||||
stmt: &Statement,
|
||||
) -> Result<Vec<String>, CompilerError> {
|
||||
let mut code = Vec::new();
|
||||
|
||||
match stmt {
|
||||
Statement::Declaration { var, value } => {
|
||||
if let Some(expr) = value {
|
||||
// Evaluate expression
|
||||
let (result_reg, expr_code) = self.generate_expression(expr, true)?;
|
||||
code.extend(expr_code);
|
||||
|
||||
// Store result in variable
|
||||
let store_code = self.allocator.store_var(&var.name, &result_reg);
|
||||
code.extend(store_code);
|
||||
|
||||
// Free temporary register
|
||||
self.allocator.free_temp(&result_reg);
|
||||
} else {
|
||||
// Just declaring variable without initialization
|
||||
self.allocator.alloc_var(&var.name)?;
|
||||
}
|
||||
}
|
||||
|
||||
Statement::Break => unimplemented!(),
|
||||
Statement::Continue => unimplemented!(),
|
||||
|
||||
Statement::PtrWrite { ptr, value } => {
|
||||
let (result_reg, expr_code) = self.generate_expression(value, true)?;
|
||||
code.extend(expr_code);
|
||||
|
||||
let (ptr_reg, ptr_code) = self.generate_expression(ptr, true)?;
|
||||
code.extend(ptr_code);
|
||||
|
||||
code.push(format!("\tstw {}, {}", result_reg, ptr_reg));
|
||||
|
||||
self.allocator.free_temp(&result_reg);
|
||||
self.allocator.free_temp(&ptr_reg);
|
||||
}
|
||||
|
||||
Statement::Assign { varname, value } => {
|
||||
// Evaluate expression
|
||||
let (result_reg, expr_code) = self.generate_expression(value, true)?;
|
||||
code.extend(expr_code);
|
||||
|
||||
// Check if this is a global variable
|
||||
if self.is_global(varname) {
|
||||
// Store to global label
|
||||
code.push(format!("\tstw {}, {}", result_reg, varname));
|
||||
} else {
|
||||
// Store result in local variable
|
||||
let store_code = self.allocator.store_var(varname, &result_reg);
|
||||
code.extend(store_code);
|
||||
}
|
||||
|
||||
// Free temporary register
|
||||
self.allocator.free_temp(&result_reg);
|
||||
}
|
||||
|
||||
Statement::Return(expr) => {
|
||||
if let Some(e) = expr {
|
||||
let (result_reg, expr_code) = self.generate_expression(e, true)?;
|
||||
code.extend(expr_code);
|
||||
code.push(format!("\tstw {}, bpr, 8", result_reg));
|
||||
code.push(format!("\tjmp _ret"));
|
||||
self.allocator.free_temp(&result_reg);
|
||||
}
|
||||
}
|
||||
|
||||
Statement::If {
|
||||
condition,
|
||||
then_stmt,
|
||||
else_stmt,
|
||||
} => {
|
||||
// Generate condition
|
||||
let (cond_reg, cond_code) = self.generate_expression(condition, true)?;
|
||||
code.extend(cond_code);
|
||||
|
||||
// Compare with zero
|
||||
code.push(format!("\tcmp {}, zero", cond_reg));
|
||||
self.allocator.free_temp(&cond_reg);
|
||||
|
||||
// Generate unique labels
|
||||
let then_label = format!("_then_{}", self.get_unique_label());
|
||||
let else_label = format!("_else_{}", self.get_unique_label());
|
||||
let end_label = format!("_end_{}", self.get_unique_label());
|
||||
|
||||
// Jump to else if condition is false (equal to zero)
|
||||
code.push(format!("\tjeq {}", else_label));
|
||||
|
||||
// Then block
|
||||
code.push(format!("{}:", then_label));
|
||||
for s in then_stmt {
|
||||
code.extend(self.generate_statement(s)?);
|
||||
}
|
||||
|
||||
if then_stmt.len() == 0 {
|
||||
code.push("\tnop".to_string());
|
||||
}
|
||||
|
||||
code.push(format!("\tjmp {}", end_label));
|
||||
|
||||
// Else block
|
||||
code.push(format!("{}:", else_label));
|
||||
for s in else_stmt {
|
||||
code.extend(self.generate_statement(s)?);
|
||||
}
|
||||
|
||||
if else_stmt.len() == 0 {
|
||||
code.push("\tnop".to_string());
|
||||
}
|
||||
|
||||
code.push(format!("{}:", end_label));
|
||||
}
|
||||
|
||||
Statement::While { condition, body } => {
|
||||
let loop_start = format!("_while_start_{}", self.get_unique_label());
|
||||
let loop_end = format!("_while_end_{}", self.get_unique_label());
|
||||
|
||||
code.push(format!("{}:", loop_start));
|
||||
|
||||
// Generate condition
|
||||
let (cond_reg, cond_code) = self.generate_expression(condition, true)?;
|
||||
code.extend(cond_code);
|
||||
|
||||
code.push(format!("\tcmp {}, zero", cond_reg));
|
||||
self.allocator.free_temp(&cond_reg);
|
||||
|
||||
code.push(format!("\tjeq {}", loop_end));
|
||||
|
||||
// Loop body
|
||||
for s in body {
|
||||
code.extend(self.generate_statement(s)?);
|
||||
}
|
||||
|
||||
code.push(format!("\tjmp {}", loop_start));
|
||||
code.push(format!("{}:", loop_end));
|
||||
}
|
||||
|
||||
Statement::Loop(body) => {
|
||||
let loop_start = format!("_loop_start_{}", self.get_unique_label());
|
||||
|
||||
code.push(format!("{}:", loop_start));
|
||||
|
||||
for s in body {
|
||||
code.extend(self.generate_statement(s)?);
|
||||
}
|
||||
|
||||
code.push(format!("\tjmp {}", loop_start));
|
||||
}
|
||||
|
||||
Statement::Expression { expr } => {
|
||||
let (result_reg, expr_code) = self.generate_expression(expr, false)?;
|
||||
code.extend(expr_code);
|
||||
self.allocator.free_temp(&result_reg);
|
||||
}
|
||||
|
||||
Statement::Block(statements) => {
|
||||
for s in statements {
|
||||
code.extend(self.generate_statement(s)?);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(code)
|
||||
}
|
||||
|
||||
// Example: Generate code for an expression
|
||||
// Returns (register containing result, assembly code)
|
||||
fn generate_expression(
|
||||
&mut self,
|
||||
expr: &Expression,
|
||||
use_result: bool,
|
||||
) -> Result<(String, Vec<String>), CompilerError> {
|
||||
let mut code = Vec::new();
|
||||
|
||||
// optimisation to prevent generating dead code!
|
||||
if expr.is_pure() && !use_result {
|
||||
return Ok((String::new(), code));
|
||||
}
|
||||
|
||||
match expr {
|
||||
Expression::StringLiteral(value) => {
|
||||
let (reg, alloc_code) = self.allocator.alloc_temp()?;
|
||||
code.extend(alloc_code);
|
||||
|
||||
// write string into memory
|
||||
let uuid = self.get_unique_label();
|
||||
code.push(format!("\tdb str_{uuid}: \"{value}\""));
|
||||
|
||||
// Load pointer to string
|
||||
code.push(format!("\tlwi str_{uuid}, {reg}"));
|
||||
|
||||
Ok((reg, code))
|
||||
}
|
||||
|
||||
Expression::CharLiteral(value) => {
|
||||
let (reg, alloc_code) = self.allocator.alloc_temp()?;
|
||||
code.extend(alloc_code);
|
||||
|
||||
// Load immediate value
|
||||
code.push(format!("\tlli {}, {} // '{value}'", *value as u8, reg));
|
||||
|
||||
Ok((reg, code))
|
||||
}
|
||||
|
||||
Expression::Number(value) => {
|
||||
let (reg, alloc_code) = self.allocator.alloc_temp()?;
|
||||
code.extend(alloc_code);
|
||||
|
||||
// Load immediate value
|
||||
code.push(format!("\tlli {}, {}", value & 0xFFFF, reg));
|
||||
if *value > 0xFFFF || *value < 0 {
|
||||
code.push(format!("\tlui {}, {}", (value >> 16) & 0xFFFF, reg));
|
||||
}
|
||||
|
||||
Ok((reg, code))
|
||||
}
|
||||
|
||||
Expression::Variable { name, .. } => {
|
||||
if self.is_global(&name.name) {
|
||||
// Allocate a temporary register for the global
|
||||
let (reg, alloc_code) = self.allocator.alloc_temp()?;
|
||||
code.extend(alloc_code);
|
||||
|
||||
// Load from global label
|
||||
code.push(format!("\tldw {}, {}", name.name, reg));
|
||||
|
||||
Ok((reg, code))
|
||||
} else {
|
||||
// Local variable - use existing allocator logic
|
||||
let (reg, load_code) = self.allocator.load_var(&name.name)?;
|
||||
code.extend(load_code);
|
||||
Ok((reg, code))
|
||||
}
|
||||
}
|
||||
|
||||
Expression::Binary { op, left, right } => {
|
||||
// Evaluate left operand
|
||||
let (left_reg, left_code) = self.generate_expression(left, true)?;
|
||||
code.extend(left_code);
|
||||
|
||||
// Evaluate right operand
|
||||
let (right_reg, right_code) = self.generate_expression(right, true)?;
|
||||
code.extend(right_code);
|
||||
|
||||
// Allocate result register
|
||||
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
|
||||
code.extend(result_alloc);
|
||||
|
||||
// Generate operation
|
||||
match op {
|
||||
BinaryOperator::Add => {
|
||||
code.push(format!(
|
||||
"\tadd {}, {}, {}",
|
||||
left_reg, right_reg, result_reg
|
||||
));
|
||||
}
|
||||
BinaryOperator::Sub => {
|
||||
code.push(format!(
|
||||
"\tsub {}, {}, {}",
|
||||
left_reg, right_reg, result_reg
|
||||
));
|
||||
}
|
||||
BinaryOperator::Mul => {
|
||||
self.include("maths", "./lib/maths/core.dsa");
|
||||
// Call multiply function
|
||||
code.push(format!("\tpush {}", right_reg));
|
||||
code.push(format!("\tpush {}", left_reg));
|
||||
code.push("\tcall maths::multiply".to_string());
|
||||
code.push(format!("\tpop {}", result_reg));
|
||||
code.push("\tpop zero".to_string());
|
||||
}
|
||||
// Comparison operators - return 1 (true) or 0 (false)
|
||||
BinaryOperator::Eq => {
|
||||
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
|
||||
code.push(format!("\tlli 0, {}", result_reg));
|
||||
let end_label = format!("_cmp_end_{}", self.get_unique_label());
|
||||
code.push(format!("\tjne {}", end_label)); // If not equal, skip setting to 1
|
||||
code.push(format!("\tlli 1, {}", result_reg));
|
||||
code.push(format!("{}:", end_label));
|
||||
}
|
||||
BinaryOperator::Ne => {
|
||||
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
|
||||
code.push(format!("\tlli 0, {}", result_reg));
|
||||
let end_label = format!("_cmp_end_{}", self.get_unique_label());
|
||||
code.push(format!("\tjeq {}", end_label)); // If equal, skip setting to 1
|
||||
code.push(format!("\tlli 1, {}", result_reg));
|
||||
code.push(format!("{}:", end_label));
|
||||
}
|
||||
BinaryOperator::Lt => {
|
||||
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
|
||||
code.push(format!("\tlli 0, {}", result_reg));
|
||||
let end_label = format!("_cmp_end_{}", self.get_unique_label());
|
||||
code.push(format!("\tjge {}", end_label)); // If greater or equal, skip setting to 1
|
||||
code.push(format!("\tlli 1, {}", result_reg));
|
||||
code.push(format!("{}:", end_label));
|
||||
}
|
||||
BinaryOperator::Le => {
|
||||
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
|
||||
code.push(format!("\tlli 0, {}", result_reg));
|
||||
let end_label = format!("_cmp_end_{}", self.get_unique_label());
|
||||
code.push(format!("\tjgt {}", end_label)); // If greater than, skip setting to 1
|
||||
code.push(format!("\tlli 1, {}", result_reg));
|
||||
code.push(format!("{}:", end_label));
|
||||
}
|
||||
BinaryOperator::Gt => {
|
||||
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
|
||||
code.push(format!("\tlli 0, {}", result_reg));
|
||||
let end_label = format!("_cmp_end_{}", self.get_unique_label());
|
||||
code.push(format!("\tjle {}", end_label)); // If less or equal, skip setting to 1
|
||||
code.push(format!("\tlli 1, {}", result_reg));
|
||||
code.push(format!("{}:", end_label));
|
||||
}
|
||||
BinaryOperator::Ge => {
|
||||
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
|
||||
code.push(format!("\tlli 0, {}", result_reg));
|
||||
let end_label = format!("_cmp_end_{}", self.get_unique_label());
|
||||
code.push(format!("\tjlt {}", end_label)); // If less than, skip setting to 1
|
||||
code.push(format!("\tlli 1, {}", result_reg));
|
||||
code.push(format!("{}:", end_label));
|
||||
}
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
|
||||
// Free operand registers (allocator will protect variables)
|
||||
self.allocator.free_temp(&left_reg);
|
||||
self.allocator.free_temp(&right_reg);
|
||||
|
||||
Ok((result_reg, code))
|
||||
}
|
||||
|
||||
Expression::Call { name, args } => {
|
||||
// first evaluate all the args we're going to need
|
||||
let mut arg_regs = Vec::new();
|
||||
for arg in args.iter().rev() {
|
||||
let (arg_reg, arg_code) = self.generate_expression(arg, true)?;
|
||||
code.extend(arg_code);
|
||||
arg_regs.push(arg_reg);
|
||||
}
|
||||
|
||||
// Save caller-saved registers and track which ones we saved
|
||||
// old method, inefficient.
|
||||
// let saved_regs = self.allocator.get_caller_saved_registers();
|
||||
// for reg in &saved_regs {
|
||||
// code.push(format!("\tpush {}", reg));
|
||||
// }
|
||||
|
||||
// Save caller-saved registers and track which ones we saved
|
||||
let saved_regs = self.allocator.get_caller_saved_registers();
|
||||
for reg in &saved_regs {
|
||||
// spill variables to stack
|
||||
code.extend(self.allocator.spill_register(reg).unwrap());
|
||||
}
|
||||
|
||||
// Evaluate and push arguments in reverse order
|
||||
for (i, arg_reg) in arg_regs.iter().enumerate() {
|
||||
code.push(format!(
|
||||
"\tpush {} // push arg {}",
|
||||
arg_reg,
|
||||
args.len() - 1 - i
|
||||
));
|
||||
}
|
||||
|
||||
// if GLOBAL_METHODS.contains_key(name.name.as_str()) {
|
||||
// code.push(format!("\tcall {}",
|
||||
// GLOBAL_METHODS[name.name.as_str()])); } else
|
||||
if self.symbols.contains(&name.name) {
|
||||
// Call local function
|
||||
code.push(format!("\tcall {}", name));
|
||||
} else if let Some(ns) = name.namespace.clone()
|
||||
&& self.imports.contains_key(&ns)
|
||||
{
|
||||
code.push(format!("\tcall {}", name));
|
||||
} else {
|
||||
return Err(CompilerError::Undefined(name.clone()));
|
||||
}
|
||||
|
||||
let result_reg: String;
|
||||
|
||||
if use_result {
|
||||
let (temp_result_reg, result_alloc) = self.allocator.alloc_temp()?;
|
||||
result_reg = temp_result_reg;
|
||||
|
||||
code.extend(result_alloc);
|
||||
code.push(format!("\tpop {}", result_reg));
|
||||
|
||||
// Clean up arguments
|
||||
if args.len() > 1 {
|
||||
for _ in 0..(args.len() - 1) {
|
||||
code.push("\tpop zero".to_string());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
result_reg = "zero".to_string();
|
||||
|
||||
// Clean up arguments
|
||||
if args.len() > 0 {
|
||||
for _ in 0..(args.len()) {
|
||||
code.push("\tpop zero".to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Restore caller-saved registers in reverse order (LIFO)
|
||||
// for reg in saved_regs.iter().rev() {
|
||||
// code.push(format!("\tpop {}", reg));
|
||||
// }
|
||||
|
||||
// Free argument registers
|
||||
for reg in arg_regs {
|
||||
self.allocator.free_temp(®);
|
||||
}
|
||||
|
||||
Ok((result_reg, code))
|
||||
}
|
||||
|
||||
Expression::Unary { op, operand } => {
|
||||
let (operand_reg, operand_code) =
|
||||
self.generate_expression(operand, true)?;
|
||||
code.extend(operand_code);
|
||||
|
||||
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
|
||||
code.extend(result_alloc);
|
||||
|
||||
match op {
|
||||
UnaryOperator::Minus => {
|
||||
// Negate: result = 0 - operand
|
||||
code.push(format!("\tsub zero, {}, {}", operand_reg, result_reg));
|
||||
}
|
||||
UnaryOperator::Plus => {
|
||||
// Just move
|
||||
code.push(format!("\tmov {}, {}", operand_reg, result_reg));
|
||||
}
|
||||
UnaryOperator::Dereference => {
|
||||
code.push(format!("\tldw {}, {}", operand_reg, result_reg));
|
||||
}
|
||||
UnaryOperator::Reference => {
|
||||
code.extend(self.allocator.spill_register(&operand_reg)?);
|
||||
code.push(format!(
|
||||
"\tsubi bpr {} {}",
|
||||
-(4 + self.allocator.get_stack_offset()),
|
||||
result_reg
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
self.allocator.free_temp(&operand_reg);
|
||||
Ok((result_reg, code))
|
||||
}
|
||||
|
||||
Expression::Empty => Ok(("zero".to_string(), code)),
|
||||
}
|
||||
}
|
||||
|
||||
// Helper for generating unique labels
|
||||
fn get_unique_label(&mut self) -> String {
|
||||
// You'd implement a counter here
|
||||
static COUNTER: AtomicU32 = AtomicU32::new(0);
|
||||
|
||||
let val = COUNTER.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
|
||||
(val + 1).to_string()
|
||||
}
|
||||
}
|
||||
|
||||
/// Build a single string from any number of arguments.
|
||||
/// Each argument must implement `Display` or be convertible to a string.
|
||||
#[macro_export]
|
||||
macro_rules! dsa {
|
||||
($($arg:expr),* $(,)?) => {{
|
||||
// Start with an empty String – we’ll grow it as we go.
|
||||
use std::fmt::Write;
|
||||
let mut s = ::std::string::String::new();
|
||||
$(
|
||||
// `write!` is cheaper than `format!` for each element
|
||||
// because it re‑uses the same buffer.
|
||||
|
||||
write!(s, "{}\n", $arg).expect("write to String failed");
|
||||
)*
|
||||
s
|
||||
}};
|
||||
}
|
||||
|
||||
// ──────────────────────── dsa! ────────────────────────
|
||||
// A tiny helper that just turns its token‑stream into a string.
|
||||
// The trailing comma is kept – it’s part of the syntax you want.
|
||||
#[macro_export]
|
||||
macro_rules! cmd {
|
||||
($($tokens:tt)*) => {{
|
||||
// We’ll just stringify the tokens and return a String.
|
||||
format!("{}", concat!(stringify!($tokens), "\n"))
|
||||
}};
|
||||
}
|
||||
|
||||
// ──────────────────────── block! ────────────────────────
|
||||
// Usage:
|
||||
//
|
||||
// let asm = block![ "name"
|
||||
// dsa![mov rg0, rg1],
|
||||
// dsa![add rg1, rg1]
|
||||
// ];
|
||||
//
|
||||
// `asm` is a `&'static str` containing:
|
||||
//
|
||||
// name:
|
||||
// mov rg0, rg1
|
||||
// add rg1, rg1
|
||||
//
|
||||
#[macro_export]
|
||||
macro_rules! block {
|
||||
// The first token must be a string literal – that’s the label.
|
||||
($label:literal $(dsa![$($ins:tt)*]),* ) => {{
|
||||
// Build a single string at compile time.
|
||||
const CODE: &str = concat!(
|
||||
$label, ":\n",
|
||||
// Each `dsa!` call yields a string like `"mov rg0, rg1"`.
|
||||
// We add a newline after each one to get the desired layout.
|
||||
$(concat!("\t", stringify!($($ins)*), "\n")),*
|
||||
);
|
||||
CODE
|
||||
}};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! comment {
|
||||
($text:expr) => {{ format!("// {}", $text) }};
|
||||
}
|
||||
@@ -44,6 +44,7 @@ pub enum TokenType {
|
||||
Eof,
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
pub enum Type {
|
||||
Int32,
|
||||
Int16,
|
||||
@@ -0,0 +1,25 @@
|
||||
use common::logging::log;
|
||||
|
||||
use crate::model::{CompilerError, Program};
|
||||
use parser::Parser;
|
||||
|
||||
pub mod lexer;
|
||||
pub mod parser;
|
||||
|
||||
pub fn generate_ast(input: &str) -> Result<Program, CompilerError> {
|
||||
log("Tokenising Input...");
|
||||
|
||||
let mut lexer = lexer::Lexer::new(&input);
|
||||
let tokens = lexer.tokenize().map_err(|e| CompilerError::Generic(e))?;
|
||||
// println!("{tokens:?}");
|
||||
|
||||
log(&format!("Parsing {} Tokens...", tokens.len()));
|
||||
|
||||
let mut parser = Parser::new(tokens);
|
||||
let ast = match parser.parse() {
|
||||
Ok(ast) => ast,
|
||||
Err(e) => return Err(CompilerError::Generic(e)),
|
||||
};
|
||||
|
||||
Ok(ast)
|
||||
}
|
||||
@@ -2,167 +2,12 @@
|
||||
// AST Node Types
|
||||
// ============================================================================
|
||||
|
||||
use std::fmt;
|
||||
use crate::model::{
|
||||
BinaryOperator, Block, ConstExpr, Declaration, Dependency, Expression, Name, Program,
|
||||
Statement, TypeId, UnaryOperator, Variable,
|
||||
};
|
||||
|
||||
use crate::lexer::{Token, TokenType};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Program {
|
||||
pub declarations: Vec<Declaration>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Declaration {
|
||||
Function {
|
||||
name: String,
|
||||
return_type: Type,
|
||||
params: Vec<Parameter>,
|
||||
body: Block,
|
||||
},
|
||||
Variable {
|
||||
name: String,
|
||||
init: Option<ConstExpr>,
|
||||
},
|
||||
Import {
|
||||
name: String,
|
||||
path: String,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Parameter {
|
||||
pub name: String,
|
||||
pub param_type: Type,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Type {
|
||||
Int,
|
||||
Long,
|
||||
Float,
|
||||
Double,
|
||||
Char,
|
||||
Void,
|
||||
Ptr(Box<Type>),
|
||||
Array(Box<Type>, usize),
|
||||
Struct(String),
|
||||
}
|
||||
|
||||
pub type Block = Vec<Statement>;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Statement {
|
||||
Block(Block),
|
||||
Assign {
|
||||
// left side
|
||||
name: String,
|
||||
declare_type: Option<Type>,
|
||||
|
||||
// right side
|
||||
value: Option<Box<Expression>>,
|
||||
},
|
||||
Expression {
|
||||
expr: Expression,
|
||||
},
|
||||
If {
|
||||
condition: Expression,
|
||||
then_stmt: Block,
|
||||
else_stmt: Block,
|
||||
},
|
||||
While {
|
||||
condition: Expression,
|
||||
body: Vec<Statement>,
|
||||
},
|
||||
Return {
|
||||
expr: Option<Expression>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ConstExpr {
|
||||
Number(i32),
|
||||
String(String),
|
||||
}
|
||||
|
||||
impl fmt::Display for ConstExpr {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
ConstExpr::Number(n) => write!(f, "{}", n),
|
||||
ConstExpr::String(s) => write!(f, "\"{}\"", s),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Expression {
|
||||
Empty,
|
||||
Binary {
|
||||
op: BinaryOperator,
|
||||
left: Box<Expression>,
|
||||
right: Box<Expression>,
|
||||
},
|
||||
Unary {
|
||||
op: UnaryOperator,
|
||||
operand: Box<Expression>,
|
||||
},
|
||||
Variable {
|
||||
name: String,
|
||||
expr_type: Option<Type>,
|
||||
},
|
||||
Number {
|
||||
value: i32,
|
||||
},
|
||||
Call {
|
||||
name: String,
|
||||
args: Vec<Expression>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum BinaryOperator {
|
||||
Add,
|
||||
Sub,
|
||||
Mul,
|
||||
Div,
|
||||
Eq,
|
||||
Ne,
|
||||
Lt,
|
||||
Gt,
|
||||
Le,
|
||||
Ge,
|
||||
}
|
||||
|
||||
impl fmt::Display for BinaryOperator {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
BinaryOperator::Add => write!(f, "+"),
|
||||
BinaryOperator::Sub => write!(f, "-"),
|
||||
BinaryOperator::Mul => write!(f, "*"),
|
||||
BinaryOperator::Div => write!(f, "/"),
|
||||
BinaryOperator::Eq => write!(f, "=="),
|
||||
BinaryOperator::Ne => write!(f, "!="),
|
||||
BinaryOperator::Lt => write!(f, "<"),
|
||||
BinaryOperator::Gt => write!(f, ">"),
|
||||
BinaryOperator::Le => write!(f, "<="),
|
||||
BinaryOperator::Ge => write!(f, ">="),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum UnaryOperator {
|
||||
Plus,
|
||||
Minus,
|
||||
}
|
||||
|
||||
impl fmt::Display for UnaryOperator {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
UnaryOperator::Plus => write!(f, "+"),
|
||||
UnaryOperator::Minus => write!(f, "-"),
|
||||
}
|
||||
}
|
||||
}
|
||||
use super::lexer::{Token, TokenType};
|
||||
|
||||
// ============================================================================
|
||||
// Parser
|
||||
@@ -252,7 +97,7 @@ impl Parser {
|
||||
.ok_or(String::from("Expected string literal"))?;
|
||||
|
||||
self.advance();
|
||||
return Ok(Declaration::Import { name, path });
|
||||
return Ok(Declaration::Dependency(Dependency { name, path }));
|
||||
}
|
||||
|
||||
self.expect(TokenType::Int)?;
|
||||
@@ -267,16 +112,16 @@ impl Parser {
|
||||
TokenType::LParen => {
|
||||
// Function declaration
|
||||
self.advance();
|
||||
let mut params = Vec::<Parameter>::new();
|
||||
let mut params = Vec::<Variable>::new();
|
||||
|
||||
if !matches!(self.current().token_type, TokenType::RParen) {
|
||||
self.expect(TokenType::Int)?;
|
||||
|
||||
match &self.current().token_type {
|
||||
TokenType::Identifier(s) => {
|
||||
params.push(Parameter {
|
||||
params.push(Variable {
|
||||
name: s.clone(),
|
||||
param_type: Type::Int,
|
||||
type_id: TypeId::U32,
|
||||
});
|
||||
self.advance();
|
||||
}
|
||||
@@ -289,9 +134,9 @@ impl Parser {
|
||||
|
||||
match &self.current().token_type {
|
||||
TokenType::Identifier(s) => {
|
||||
params.push(Parameter {
|
||||
params.push(Variable {
|
||||
name: s.clone(),
|
||||
param_type: Type::Int,
|
||||
type_id: TypeId::U32,
|
||||
});
|
||||
self.advance();
|
||||
}
|
||||
@@ -307,7 +152,7 @@ impl Parser {
|
||||
name,
|
||||
params,
|
||||
body,
|
||||
return_type: Type::Int,
|
||||
return_type: TypeId::U32,
|
||||
})
|
||||
}
|
||||
_ => {
|
||||
@@ -327,7 +172,14 @@ impl Parser {
|
||||
};
|
||||
|
||||
self.expect(TokenType::Semicolon)?;
|
||||
Ok(Declaration::Variable { name, init })
|
||||
Ok(Declaration::Variable {
|
||||
var: Variable {
|
||||
name,
|
||||
type_id: TypeId::U32,
|
||||
},
|
||||
init,
|
||||
is_const: false,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -369,9 +221,8 @@ impl Parser {
|
||||
|
||||
self.expect(TokenType::Semicolon)?;
|
||||
Ok(Statement::Assign {
|
||||
name,
|
||||
value: Some(Box::new(expr)),
|
||||
declare_type: None,
|
||||
varname: name,
|
||||
value: expr,
|
||||
})
|
||||
}
|
||||
// var expression
|
||||
@@ -379,7 +230,10 @@ impl Parser {
|
||||
self.expect(TokenType::Semicolon)?;
|
||||
Ok(Statement::Expression {
|
||||
expr: Expression::Variable {
|
||||
name,
|
||||
name: Name {
|
||||
name,
|
||||
namespace: None,
|
||||
},
|
||||
expr_type: None,
|
||||
},
|
||||
})
|
||||
@@ -406,15 +260,13 @@ impl Parser {
|
||||
// Convert to assignment expression statement
|
||||
let expr = if let Some(init_expr) = init {
|
||||
Statement::Assign {
|
||||
name,
|
||||
value: Some(Box::new(init_expr)),
|
||||
declare_type: Some(Type::Int),
|
||||
varname: name,
|
||||
value: init_expr,
|
||||
}
|
||||
} else {
|
||||
Statement::Assign {
|
||||
name,
|
||||
value: None,
|
||||
declare_type: Some(Type::Int),
|
||||
varname: name,
|
||||
value: Expression::Empty,
|
||||
}
|
||||
};
|
||||
|
||||
@@ -474,7 +326,7 @@ impl Parser {
|
||||
};
|
||||
|
||||
self.expect(TokenType::Semicolon)?;
|
||||
Ok(Statement::Return { expr })
|
||||
Ok(Statement::Return(expr))
|
||||
}
|
||||
|
||||
fn parse_expression(&mut self) -> Result<Expression, String> {
|
||||
@@ -499,6 +351,7 @@ impl Parser {
|
||||
op,
|
||||
left: Box::new(expr),
|
||||
right,
|
||||
type_id: None,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -519,6 +372,7 @@ impl Parser {
|
||||
op,
|
||||
left: Box::new(expr),
|
||||
right,
|
||||
type_id: None,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -539,6 +393,7 @@ impl Parser {
|
||||
op,
|
||||
left: Box::new(expr),
|
||||
right,
|
||||
type_id: None,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -555,7 +410,11 @@ impl Parser {
|
||||
if let Some(op) = op {
|
||||
self.advance();
|
||||
let operand = Box::new(self.parse_unary()?);
|
||||
return Ok(Expression::Unary { op, operand });
|
||||
return Ok(Expression::Unary {
|
||||
op,
|
||||
operand,
|
||||
type_id: None,
|
||||
});
|
||||
}
|
||||
|
||||
self.parse_primary()
|
||||
@@ -566,7 +425,10 @@ impl Parser {
|
||||
TokenType::Number(n) => {
|
||||
let value = *n;
|
||||
self.advance();
|
||||
Ok(Expression::Number { value })
|
||||
Ok(Expression::Number {
|
||||
value: value as isize,
|
||||
type_id: None,
|
||||
})
|
||||
}
|
||||
TokenType::Identifier(name) => {
|
||||
let name = name.clone();
|
||||
@@ -587,10 +449,20 @@ impl Parser {
|
||||
}
|
||||
|
||||
self.expect(TokenType::RParen)?;
|
||||
Ok(Expression::Call { name, args })
|
||||
Ok(Expression::Call {
|
||||
name: Name {
|
||||
name,
|
||||
namespace: None,
|
||||
},
|
||||
args,
|
||||
type_id: None,
|
||||
})
|
||||
} else {
|
||||
Ok(Expression::Variable {
|
||||
name,
|
||||
name: Name {
|
||||
name,
|
||||
namespace: None,
|
||||
},
|
||||
expr_type: None,
|
||||
})
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,38 @@
|
||||
use common::logging::log;
|
||||
|
||||
use crate::model::{CompilerError, Program};
|
||||
use parser::{ParseResult, Parser};
|
||||
// use semantic_analyser::Analyser;
|
||||
|
||||
pub mod lexer;
|
||||
pub mod parser;
|
||||
// pub mod semantic_analyser;
|
||||
|
||||
pub fn generate_ast(input: &str) -> Result<Program, CompilerError> {
|
||||
log("Tokenising Input...");
|
||||
|
||||
let lexer = lexer::Lexer::new(&input);
|
||||
let tokens = lexer.collect::<Vec<_>>();
|
||||
println!("{tokens:#?}");
|
||||
|
||||
log(&format!("Parsing {} Tokens...", tokens.len()));
|
||||
|
||||
let mut parser = Parser::new(tokens);
|
||||
let ast = match parser.parse() {
|
||||
ParseResult::Accept(ast) => ast,
|
||||
ParseResult::Reject(e) => return Err(e),
|
||||
ParseResult::Deny => {
|
||||
return Err(CompilerError::Generic("Parser used ::Deny".to_string()));
|
||||
}
|
||||
};
|
||||
// println!("{ast:#?}");
|
||||
|
||||
log("Analyzing AST...");
|
||||
log("Checking Type Information...");
|
||||
|
||||
// let mut analyser = Analyser::new();
|
||||
// analyser.analyse(ast.clone()).unwrap();
|
||||
|
||||
log("Type Checking Complete...");
|
||||
Ok(ast)
|
||||
}
|
||||
@@ -0,0 +1,987 @@
|
||||
use super::lexer::Token;
|
||||
use crate::model::{
|
||||
AssignmentOperator, BinaryOperator, Block, Call, CompilerError, ConstExpr,
|
||||
Declaration, Dependency, Expression, Number, Program, Statement, TypeId,
|
||||
UnaryOperator, Variable,
|
||||
};
|
||||
use crate::{expect_tt, expect_value};
|
||||
use std::ops::{ControlFlow, FromResidual, Try};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ParseResult<T, E> {
|
||||
Accept(T),
|
||||
Deny,
|
||||
Reject(E),
|
||||
}
|
||||
|
||||
pub struct Parser {
|
||||
tokens: Vec<Token>,
|
||||
idx: usize,
|
||||
}
|
||||
|
||||
impl Parser {
|
||||
pub fn new(tokens: Vec<Token>) -> Self {
|
||||
Self { tokens, idx: 0 }
|
||||
}
|
||||
|
||||
pub fn parse(&mut self) -> ParseResult<Program, CompilerError> {
|
||||
let mut declarations = Vec::new();
|
||||
|
||||
while let ParseResult::Accept(_) = self.peek_next() {
|
||||
declarations.push(self.parse_declaration()?);
|
||||
}
|
||||
|
||||
ParseResult::Accept(Program { declarations })
|
||||
}
|
||||
|
||||
fn parse_declaration(&mut self) -> ParseResult<Declaration, CompilerError> {
|
||||
if expect_tt!(self.peek_next()?, Fn).accepted() {
|
||||
return self.parse_func();
|
||||
}
|
||||
|
||||
if expect_tt!(self.peek_next()?, Struct).accepted() {
|
||||
return self.parse_struct();
|
||||
}
|
||||
|
||||
if expect_tt!(self.peek_next()?, Include).accepted() {
|
||||
// expect include keyword
|
||||
let _ = self.next();
|
||||
|
||||
// expect namespace identifier
|
||||
let name = expect_value!(self.next()?, Identifier)?;
|
||||
|
||||
// expect colon
|
||||
let _ = expect_tt!(self.next()?, Colon)?;
|
||||
|
||||
// expect string literal (module path)
|
||||
let path = expect_value!(self.next()?, String)?;
|
||||
|
||||
// expect semicolon
|
||||
let _ = expect_tt!(self.next()?, Semicolon)?;
|
||||
|
||||
return ParseResult::Accept(Declaration::Dependency(Dependency {
|
||||
name: name.name,
|
||||
path,
|
||||
}));
|
||||
}
|
||||
|
||||
if expect_tt!(self.peek_next()?, Const, Static).accepted() {
|
||||
let is_const = match self.next()? {
|
||||
Token::Const => true,
|
||||
Token::Static => false,
|
||||
_ => {
|
||||
return ParseResult::Reject(CompilerError::Generic(String::from(
|
||||
"This can't happen!",
|
||||
)));
|
||||
}
|
||||
};
|
||||
|
||||
let var = self.parse_var_decl()?;
|
||||
|
||||
let _ = expect_tt!(self.next()?, Assign)?;
|
||||
|
||||
let value = self.next()?;
|
||||
let init = match value {
|
||||
Token::String(x) => Some(ConstExpr::String(x)),
|
||||
Token::SignedInt(x, _) => Some(ConstExpr::Number(x)),
|
||||
Token::UnsignedInt(x, _) => Some(ConstExpr::Number(x as i32)),
|
||||
_ => {
|
||||
return ParseResult::Reject(CompilerError::UnexpectedToken(
|
||||
value.tt().to_string(),
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
let _ = expect_tt!(self.next()?, Semicolon)?;
|
||||
|
||||
return ParseResult::Accept(Declaration::Variable {
|
||||
var,
|
||||
init,
|
||||
is_const,
|
||||
});
|
||||
}
|
||||
|
||||
ParseResult::Reject(CompilerError::UnexpectedEndOfInput)
|
||||
}
|
||||
|
||||
fn parse_struct(&mut self) -> ParseResult<Declaration, CompilerError> {
|
||||
let _ = expect_tt!(self.next()?, Struct)?;
|
||||
let name = expect_value!(self.next()?, Identifier)?;
|
||||
|
||||
let _ = expect_tt!(self.next()?, LeftBrace)?;
|
||||
|
||||
let mut fields = Vec::new();
|
||||
while expect_tt!(self.peek_next()?, Identifier).accepted() {
|
||||
let arg = self.parse_var_decl()?;
|
||||
fields.push(arg);
|
||||
|
||||
if expect_tt!(self.peek_next()?, Comma).accepted() {
|
||||
self.next()?;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let _ = expect_tt!(self.next()?, RightBrace)?;
|
||||
ParseResult::Accept(Declaration::Struct { name, fields })
|
||||
}
|
||||
|
||||
fn parse_func(&mut self) -> ParseResult<Declaration, CompilerError> {
|
||||
// expect function keyword
|
||||
let _ = expect_tt!(self.next()?, Fn);
|
||||
// expect function name
|
||||
let name = expect_value!(self.next()?, Identifier)?;
|
||||
|
||||
// expect left paren
|
||||
let _ = expect_tt!(self.next()?, LeftParen)?;
|
||||
|
||||
let mut params = Vec::new();
|
||||
while expect_tt!(self.peek_next()?, Identifier).accepted() {
|
||||
let arg = self.parse_var_decl()?;
|
||||
params.push(arg);
|
||||
|
||||
if expect_tt!(self.peek_next()?, Comma).accepted() {
|
||||
self.next()?;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// expect right paren
|
||||
let _ = expect_tt!(self.next()?, RightParen)?;
|
||||
|
||||
// see if we can parse the return type!
|
||||
let mut return_type = TypeId::Void;
|
||||
if expect_tt!(self.peek_next()?, RightArrow).accepted() {
|
||||
let _ = self.next();
|
||||
return_type = self.parse_type()?;
|
||||
}
|
||||
|
||||
// expect vald block
|
||||
if expect_tt!(self.peek_next()?, LeftBrace).accepted() {
|
||||
ParseResult::Accept(Declaration::Function {
|
||||
name: name.name,
|
||||
params,
|
||||
return_type,
|
||||
body: self.parse_block()?,
|
||||
})
|
||||
} else {
|
||||
ParseResult::Reject(CompilerError::UnexpectedToken(
|
||||
self.peek_next()?.tt().to_string(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_block(&mut self) -> ParseResult<Block, CompilerError> {
|
||||
// expect left brace
|
||||
let _ = expect_tt!(self.next()?, LeftBrace)?;
|
||||
|
||||
let mut block = Vec::new();
|
||||
while !expect_tt!(self.peek_next()?, RightBrace).accepted() {
|
||||
block.push(self.parse_statement()?);
|
||||
}
|
||||
|
||||
// expect right brace
|
||||
let _ = expect_tt!(self.next()?, RightBrace)?;
|
||||
|
||||
ParseResult::Accept(block)
|
||||
}
|
||||
|
||||
fn parse_statement(&mut self) -> ParseResult<Statement, CompilerError> {
|
||||
// handle if statements
|
||||
if expect_tt!(self.peek_next()?, If).accepted() {
|
||||
self.next()?;
|
||||
|
||||
let condition = self.parse_expression()?;
|
||||
|
||||
let then_stmt = self.parse_block()?;
|
||||
|
||||
if !expect_tt!(self.peek_next()?, Else).accepted() {
|
||||
return ParseResult::Accept(Statement::If {
|
||||
condition,
|
||||
then_stmt,
|
||||
else_stmt: vec![],
|
||||
});
|
||||
}
|
||||
|
||||
let _ = expect_tt!(self.next()?, Else)?;
|
||||
|
||||
let else_stmt = self.parse_block()?;
|
||||
|
||||
return ParseResult::Accept(Statement::If {
|
||||
condition,
|
||||
then_stmt,
|
||||
else_stmt,
|
||||
});
|
||||
}
|
||||
|
||||
// handle while loops
|
||||
if expect_tt!(self.peek_next()?, While).accepted() {
|
||||
self.next()?;
|
||||
|
||||
// expect valid expression
|
||||
let expr = self.parse_expression()?;
|
||||
|
||||
// expect valid block after
|
||||
let block = self.parse_block()?;
|
||||
|
||||
// return result
|
||||
return ParseResult::Accept(Statement::While {
|
||||
condition: expr,
|
||||
body: block,
|
||||
});
|
||||
}
|
||||
|
||||
// handle indefinite loops
|
||||
if expect_tt!(self.peek_next()?, Loop).accepted() {
|
||||
self.next()?;
|
||||
|
||||
// parse the inner block
|
||||
return ParseResult::Accept(Statement::Loop(self.parse_block()?));
|
||||
}
|
||||
|
||||
if expect_tt!(self.peek_next()?, Return).accepted() {
|
||||
self.next()?;
|
||||
|
||||
// handle case where nothing is returned
|
||||
if expect_tt!(self.peek_next()?, Semicolon).accepted() {
|
||||
return ParseResult::Accept(Statement::Return(None));
|
||||
}
|
||||
|
||||
let expr = self.parse_expression()?;
|
||||
expect_tt!(self.next()?, Semicolon)?;
|
||||
|
||||
return ParseResult::Accept(Statement::Return(Some(expr)));
|
||||
}
|
||||
|
||||
if expect_tt!(self.peek_next()?, Break).accepted() {
|
||||
self.next()?;
|
||||
|
||||
// expect semicolon
|
||||
expect_tt!(self.next()?, Semicolon)?;
|
||||
|
||||
// return result
|
||||
return ParseResult::Accept(Statement::Break);
|
||||
}
|
||||
|
||||
if expect_tt!(self.peek_next()?, Continue).accepted() {
|
||||
self.next()?;
|
||||
|
||||
// expect semicolon
|
||||
expect_tt!(self.next()?, Semicolon)?;
|
||||
|
||||
// return result
|
||||
return ParseResult::Accept(Statement::Continue);
|
||||
}
|
||||
|
||||
// handle writes to pointers!
|
||||
if expect_tt!(self.peek_next()?, Star).accepted() {
|
||||
self.next()?;
|
||||
|
||||
let left = if expect_tt!(self.peek_next()?, Identifier).accepted() {
|
||||
let identifier = expect_value!(self.next()?, Identifier)?;
|
||||
|
||||
Expression::Variable {
|
||||
name: identifier,
|
||||
expr_type: None,
|
||||
}
|
||||
} else if expect_tt!(self.peek_next()?, LeftParen).accepted() {
|
||||
self.next()?;
|
||||
|
||||
let expr = self.parse_expression()?;
|
||||
|
||||
let _ = expect_tt!(self.next()?, RightParen).accepted();
|
||||
|
||||
expr
|
||||
} else {
|
||||
return ParseResult::Reject(CompilerError::UnexpectedToken(
|
||||
self.peek_next()?.tt().to_string(),
|
||||
));
|
||||
};
|
||||
|
||||
let _ = expect_tt!(self.next()?, Assign)?;
|
||||
|
||||
let right = self.parse_expression()?;
|
||||
|
||||
// expect semicolon
|
||||
expect_tt!(self.next()?, Semicolon)?;
|
||||
|
||||
// return result
|
||||
return ParseResult::Accept(Statement::PtrWrite {
|
||||
ptr: left,
|
||||
value: right,
|
||||
});
|
||||
}
|
||||
|
||||
// handle let statements (declarations)
|
||||
if expect_tt!(self.peek_next()?, Let).accepted() {
|
||||
self.next();
|
||||
|
||||
// expect variable name and type.
|
||||
let name = self.parse_var_decl()?;
|
||||
|
||||
// handle uninitialised variable case
|
||||
if expect_tt!(self.peek_next()?, Semicolon).accepted() {
|
||||
self.next();
|
||||
return ParseResult::Accept(Statement::Declaration {
|
||||
var: name,
|
||||
value: None,
|
||||
});
|
||||
}
|
||||
|
||||
// handle initialised case
|
||||
// expect equals
|
||||
let _ = expect_tt!(self.next()?, Assign)?;
|
||||
|
||||
// expect a valid expression
|
||||
let expr = self.parse_expression()?;
|
||||
|
||||
let _ = expect_tt!(self.next()?, Semicolon);
|
||||
|
||||
// return statement
|
||||
return ParseResult::Accept(Statement::Declaration {
|
||||
var: name,
|
||||
value: Some(expr),
|
||||
});
|
||||
}
|
||||
|
||||
// handle an in-place function call
|
||||
if let ParseResult::Accept(name) = expect_value!(self.peek_next()?, Identifier)
|
||||
&& let ParseResult::Accept(operator) = expect_tt!(
|
||||
self.peek(1)?,
|
||||
Assign,
|
||||
PlusEqual,
|
||||
MinusEqual,
|
||||
StarEqual,
|
||||
SlashEqual,
|
||||
PercentEqual,
|
||||
AndEqual,
|
||||
OrEqual,
|
||||
XorEqual,
|
||||
ShlEqual,
|
||||
ShrEqual
|
||||
)
|
||||
{
|
||||
// consume name token
|
||||
self.next()?;
|
||||
|
||||
// pattern match to find operator
|
||||
let operator = match operator {
|
||||
Token::Assign => AssignmentOperator::Assign,
|
||||
Token::PlusEqual => AssignmentOperator::AddAssign,
|
||||
Token::MinusEqual => AssignmentOperator::SubAssign,
|
||||
Token::StarEqual => AssignmentOperator::MulAssign,
|
||||
Token::SlashEqual => AssignmentOperator::DivAssign,
|
||||
Token::PercentEqual => AssignmentOperator::ModAssign,
|
||||
Token::AndEqual => AssignmentOperator::AndAssign,
|
||||
Token::OrEqual => AssignmentOperator::OrAssign,
|
||||
Token::XorEqual => AssignmentOperator::XorAssign,
|
||||
Token::ShlEqual => AssignmentOperator::LeftShiftAssign,
|
||||
Token::ShrEqual => AssignmentOperator::RightShiftAssign,
|
||||
_ => {
|
||||
return ParseResult::Reject(CompilerError::UnexpectedToken(
|
||||
self.peek_next()?.tt().to_string(),
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
// consume operator token
|
||||
self.next()?;
|
||||
|
||||
let value = self.parse_expression()?;
|
||||
|
||||
let _ = expect_tt!(self.next()?, Semicolon);
|
||||
|
||||
return ParseResult::Accept(Statement::Assign {
|
||||
varname: name.name,
|
||||
operator,
|
||||
value,
|
||||
});
|
||||
}
|
||||
|
||||
// parse an expression and a semicolon
|
||||
let expr = self.parse_expression()?;
|
||||
let _ = expect_tt!(self.next()?, Semicolon)?;
|
||||
|
||||
ParseResult::Accept(Statement::Expression { expr })
|
||||
}
|
||||
|
||||
fn parse_expression(&mut self) -> ParseResult<Expression, CompilerError> {
|
||||
self.parse_logical_or()
|
||||
}
|
||||
|
||||
fn parse_logical_or(&mut self) -> ParseResult<Expression, CompilerError> {
|
||||
let left = self.parse_logical_and()?;
|
||||
|
||||
let op = match self.peek_next()? {
|
||||
Token::LogicalOr => BinaryOperator::LogicalOr,
|
||||
_ => return ParseResult::Accept(left),
|
||||
};
|
||||
|
||||
self.next()?;
|
||||
ParseResult::Accept(Expression::Binary {
|
||||
op,
|
||||
left: Box::new(left),
|
||||
right: Box::new(self.parse_logical_or()?),
|
||||
type_id: Some(TypeId::U32),
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_logical_and(&mut self) -> ParseResult<Expression, CompilerError> {
|
||||
let left = self.parse_bitwise_or()?;
|
||||
|
||||
let op = match self.peek_next()? {
|
||||
Token::LogicalAnd => BinaryOperator::LogicalAnd,
|
||||
_ => return ParseResult::Accept(left),
|
||||
};
|
||||
|
||||
self.next()?;
|
||||
ParseResult::Accept(Expression::Binary {
|
||||
op,
|
||||
left: Box::new(left),
|
||||
right: Box::new(self.parse_logical_and()?),
|
||||
type_id: Some(TypeId::U32),
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_bitwise_or(&mut self) -> ParseResult<Expression, CompilerError> {
|
||||
let left = self.parse_bitwise_xor()?;
|
||||
|
||||
let op = match self.peek_next()? {
|
||||
Token::Pipe => BinaryOperator::BitwiseOr,
|
||||
_ => return ParseResult::Accept(left),
|
||||
};
|
||||
|
||||
self.next()?;
|
||||
ParseResult::Accept(Expression::Binary {
|
||||
op,
|
||||
left: Box::new(left),
|
||||
right: Box::new(self.parse_bitwise_or()?),
|
||||
type_id: Some(TypeId::U32),
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_bitwise_xor(&mut self) -> ParseResult<Expression, CompilerError> {
|
||||
let left = self.parse_bitwise_and()?;
|
||||
|
||||
let op = match self.peek_next()? {
|
||||
Token::Caret => BinaryOperator::BitwiseXor,
|
||||
_ => return ParseResult::Accept(left),
|
||||
};
|
||||
|
||||
self.next()?;
|
||||
ParseResult::Accept(Expression::Binary {
|
||||
op,
|
||||
left: Box::new(left),
|
||||
right: Box::new(self.parse_bitwise_xor()?),
|
||||
type_id: Some(TypeId::U32),
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_bitwise_and(&mut self) -> ParseResult<Expression, CompilerError> {
|
||||
let left = self.parse_comparison()?;
|
||||
|
||||
let op = match self.peek_next()? {
|
||||
Token::Ampersand => BinaryOperator::BitwiseAnd,
|
||||
_ => return ParseResult::Accept(left),
|
||||
};
|
||||
|
||||
self.next()?;
|
||||
ParseResult::Accept(Expression::Binary {
|
||||
op,
|
||||
left: Box::new(left),
|
||||
right: Box::new(self.parse_bitwise_and()?),
|
||||
type_id: Some(TypeId::U32),
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_comparison(&mut self) -> ParseResult<Expression, CompilerError> {
|
||||
let left = self.parse_shift()?;
|
||||
|
||||
let op = match self.peek_next()? {
|
||||
Token::EqualEqual => BinaryOperator::Equal,
|
||||
Token::BangEqual => BinaryOperator::NotEqual,
|
||||
Token::Less => BinaryOperator::LessThan,
|
||||
Token::Greater => BinaryOperator::GreaterThan,
|
||||
Token::LessEqual => BinaryOperator::LessOrEqual,
|
||||
Token::GreaterEqual => BinaryOperator::GreaterOrEqual,
|
||||
_ => return ParseResult::Accept(left),
|
||||
};
|
||||
|
||||
self.next()?;
|
||||
ParseResult::Accept(Expression::Binary {
|
||||
op,
|
||||
left: Box::new(left),
|
||||
right: Box::new(self.parse_comparison()?),
|
||||
type_id: Some(TypeId::Bool),
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_shift(&mut self) -> ParseResult<Expression, CompilerError> {
|
||||
let left = self.parse_additive()?;
|
||||
|
||||
let op = match self.peek_next()? {
|
||||
Token::LeftShift => BinaryOperator::LeftShift,
|
||||
Token::RightShift => BinaryOperator::RightShift,
|
||||
_ => return ParseResult::Accept(left),
|
||||
};
|
||||
|
||||
self.next()?;
|
||||
ParseResult::Accept(Expression::Binary {
|
||||
op,
|
||||
left: Box::new(left),
|
||||
right: Box::new(self.parse_shift()?),
|
||||
type_id: Some(TypeId::U32),
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_additive(&mut self) -> ParseResult<Expression, CompilerError> {
|
||||
let left = self.parse_multiplicative()?;
|
||||
|
||||
let op = match self.peek_next()? {
|
||||
Token::Plus => BinaryOperator::Add,
|
||||
Token::Minus => BinaryOperator::Sub,
|
||||
_ => return ParseResult::Accept(left),
|
||||
};
|
||||
|
||||
self.next()?;
|
||||
ParseResult::Accept(Expression::Binary {
|
||||
op,
|
||||
left: Box::new(left),
|
||||
right: Box::new(self.parse_additive()?),
|
||||
type_id: Some(TypeId::U32),
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_multiplicative(&mut self) -> ParseResult<Expression, CompilerError> {
|
||||
let left = self.parse_unary()?;
|
||||
|
||||
let op = match self.peek_next()? {
|
||||
Token::Star => BinaryOperator::Mul,
|
||||
Token::Slash => BinaryOperator::Div,
|
||||
_ => return ParseResult::Accept(left),
|
||||
};
|
||||
|
||||
self.next()?;
|
||||
ParseResult::Accept(Expression::Binary {
|
||||
op,
|
||||
left: Box::new(left),
|
||||
right: Box::new(self.parse_multiplicative()?),
|
||||
type_id: None,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_unary(&mut self) -> ParseResult<Expression, CompilerError> {
|
||||
let op = match self.peek_next()? {
|
||||
// prefix inc/dec
|
||||
Token::PlusPlus => UnaryOperator::Increment,
|
||||
Token::MinusMinus => UnaryOperator::Decrement,
|
||||
|
||||
// arithmetic
|
||||
Token::Plus => UnaryOperator::Plus,
|
||||
Token::Minus => UnaryOperator::Minus,
|
||||
|
||||
// pointer
|
||||
Token::Star => UnaryOperator::Dereference,
|
||||
Token::Ampersand => UnaryOperator::AddressOf,
|
||||
|
||||
// boolean
|
||||
Token::Bang => UnaryOperator::LogicalNot,
|
||||
Token::Tilde => UnaryOperator::BitwiseNot,
|
||||
|
||||
Token::SizeOf => UnaryOperator::SizeOf,
|
||||
_ => {
|
||||
let expr = self.parse_primary()?;
|
||||
return self.parse_postfix(expr);
|
||||
}
|
||||
};
|
||||
|
||||
self.next()?;
|
||||
let operand = Box::new(self.parse_unary()?);
|
||||
ParseResult::Accept(Expression::Unary {
|
||||
op,
|
||||
operand,
|
||||
type_id: None,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_postfix(
|
||||
&mut self,
|
||||
mut expr: Expression,
|
||||
) -> ParseResult<Expression, CompilerError> {
|
||||
loop {
|
||||
match self.peek_next()? {
|
||||
// Type cast: expr as Type
|
||||
Token::As => {
|
||||
self.next()?; // consume 'as'
|
||||
let target_type = self.parse_type()?;
|
||||
expr = Expression::TypeCast {
|
||||
expr: Box::new(expr),
|
||||
target_type,
|
||||
type_id: None,
|
||||
};
|
||||
}
|
||||
|
||||
// Postfix increment/decrement
|
||||
Token::PlusPlus => {
|
||||
self.next()?;
|
||||
expr = Expression::UnaryPostfix {
|
||||
op: UnaryOperator::Increment,
|
||||
operand: Box::new(expr),
|
||||
type_id: None,
|
||||
};
|
||||
}
|
||||
Token::MinusMinus => {
|
||||
self.next()?;
|
||||
expr = Expression::UnaryPostfix {
|
||||
op: UnaryOperator::Decrement,
|
||||
operand: Box::new(expr),
|
||||
type_id: None,
|
||||
};
|
||||
}
|
||||
|
||||
// Array indexing: expr[index]
|
||||
Token::LeftBracket => {
|
||||
self.next()?; // consume '['
|
||||
let index = Box::new(self.parse_expression()?);
|
||||
|
||||
let _ = expect_tt!(self.next()?, RightBracket)?;
|
||||
|
||||
expr = Expression::IndexAccess {
|
||||
expr: Box::new(expr),
|
||||
index,
|
||||
type_id: None,
|
||||
};
|
||||
}
|
||||
|
||||
// Function call: expr(args...)
|
||||
Token::LeftParen => {
|
||||
self.next()?; // consume '('
|
||||
let mut args = Vec::new();
|
||||
|
||||
if !matches!(self.peek_next()?, Token::RightParen) {
|
||||
loop {
|
||||
args.push(self.parse_expression()?);
|
||||
if !matches!(self.peek_next()?, Token::Comma) {
|
||||
break;
|
||||
}
|
||||
self.next()?; // consume comma
|
||||
}
|
||||
}
|
||||
|
||||
let _ = expect_tt!(self.next()?, RightParen)?;
|
||||
|
||||
if let Expression::Variable { name, .. } = expr {
|
||||
expr = Expression::Call {
|
||||
func: Call { name, args },
|
||||
type_id: None,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Member access: expr.member (if you support structs)
|
||||
Token::Dot => {
|
||||
self.next()?;
|
||||
let field_name = expect_value!(self.next()?, Identifier)?;
|
||||
expr = Expression::MemberAccess {
|
||||
expr: Box::new(expr),
|
||||
field_name,
|
||||
type_id: None,
|
||||
};
|
||||
}
|
||||
|
||||
// No more postfix operations
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
|
||||
ParseResult::Accept(expr)
|
||||
}
|
||||
|
||||
fn parse_primary(&mut self) -> ParseResult<Expression, CompilerError> {
|
||||
match self.peek_next()? {
|
||||
Token::UnsignedInt(value, type_id) => {
|
||||
self.next()?;
|
||||
ParseResult::Accept(Expression::Number(Number::Unsigned(value, type_id)))
|
||||
}
|
||||
Token::SignedInt(value, type_id) => {
|
||||
self.next()?;
|
||||
ParseResult::Accept(Expression::Number(Number::Signed(value, type_id)))
|
||||
}
|
||||
Token::String(value) => {
|
||||
self.next()?;
|
||||
ParseResult::Accept(Expression::StringLiteral(value))
|
||||
}
|
||||
Token::Char(value) => {
|
||||
self.next()?;
|
||||
ParseResult::Accept(Expression::CharLiteral(value))
|
||||
}
|
||||
|
||||
Token::Identifier(name) => {
|
||||
self.next()?;
|
||||
|
||||
// if the next token isn't the beginning of a struct literal this is just
|
||||
// an identifier.
|
||||
if !expect_tt!(self.peek_next()?, LeftBrace).accepted() {
|
||||
return ParseResult::Accept(Expression::Variable {
|
||||
name,
|
||||
expr_type: None,
|
||||
});
|
||||
}
|
||||
|
||||
let _ = self.next()?;
|
||||
|
||||
let mut fields = Vec::new();
|
||||
while !expect_tt!(self.peek_next()?, RightBrace).accepted() {
|
||||
let name = expect_value!(self.next()?, Identifier)?;
|
||||
let _ = expect_tt!(self.next()?, Colon)?;
|
||||
let expr = self.parse_expression()?;
|
||||
|
||||
fields.push((name, expr));
|
||||
|
||||
if expect_tt!(self.peek_next()?, Comma).accepted() {
|
||||
self.next()?;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let _ = expect_tt!(self.next()?, RightBrace)?;
|
||||
|
||||
ParseResult::Accept(Expression::StructLiteral {
|
||||
name,
|
||||
fields,
|
||||
type_id: None,
|
||||
})
|
||||
}
|
||||
Token::LeftBracket => {
|
||||
self.next()?; // consume '['
|
||||
let mut elements = Vec::new();
|
||||
|
||||
if !matches!(self.peek_next()?, Token::RightBracket) {
|
||||
loop {
|
||||
elements.push(self.parse_expression()?);
|
||||
if !matches!(self.peek_next()?, Token::Comma) {
|
||||
break;
|
||||
}
|
||||
self.next()?; // consume comma
|
||||
}
|
||||
}
|
||||
|
||||
expect_tt!(self.next()?, RightBracket)?;
|
||||
ParseResult::Accept(Expression::ArrayLiteral {
|
||||
elements,
|
||||
type_id: None,
|
||||
})
|
||||
}
|
||||
Token::LeftParen => {
|
||||
self.next()?;
|
||||
let expr = self.parse_expression()?;
|
||||
let _ = expect_tt!(self.next()?, RightParen)?;
|
||||
ParseResult::Accept(expr)
|
||||
}
|
||||
_ => ParseResult::Reject(CompilerError::UnexpectedToken(
|
||||
self.peek_next()?.tt().to_string(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_var_decl(&mut self) -> ParseResult<Variable, CompilerError> {
|
||||
let name = expect_value!(self.next()?, Identifier)?;
|
||||
|
||||
let _ = expect_tt!(self.next()?, Colon)?;
|
||||
|
||||
let type_id = self.parse_type()?;
|
||||
|
||||
ParseResult::Accept(Variable {
|
||||
name: name.name,
|
||||
type_id,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_type(&mut self) -> ParseResult<TypeId, CompilerError> {
|
||||
println!("yes {:?}", self.peek_next()?);
|
||||
|
||||
// parse primitive or named type
|
||||
if expect_tt!(self.peek_next()?, Identifier).accepted() {
|
||||
return self.parse_type_identifier();
|
||||
}
|
||||
|
||||
// parse array type
|
||||
if expect_tt!(self.peek_next()?, LeftBracket).accepted() {
|
||||
let _ = self.next()?;
|
||||
|
||||
let internal_type = self.parse_type()?;
|
||||
let _ = expect_tt!(self.next()?, Semicolon)?;
|
||||
|
||||
let size = expect_value!(self.next()?, UnsignedInt)?;
|
||||
|
||||
let _ = expect_tt!(self.next()?, RightBracket)?;
|
||||
|
||||
return ParseResult::Accept(TypeId::Array {
|
||||
r#type: Box::new(internal_type),
|
||||
size: size as usize,
|
||||
});
|
||||
}
|
||||
|
||||
// parse tuple type
|
||||
if expect_tt!(self.peek_next()?, LeftParen).accepted() {
|
||||
let _ = self.next()?;
|
||||
|
||||
let mut types = Vec::new();
|
||||
while !expect_tt!(self.peek_next()?, RightParen).accepted() {
|
||||
types.push(self.parse_type()?);
|
||||
if !expect_tt!(self.peek_next()?, Comma).accepted() {
|
||||
break;
|
||||
}
|
||||
let _ = self.next()?;
|
||||
}
|
||||
let _ = expect_tt!(self.next()?, RightParen)?;
|
||||
|
||||
return ParseResult::Accept(TypeId::Tuple(types));
|
||||
}
|
||||
|
||||
ParseResult::Reject(CompilerError::Generic(format!(
|
||||
"Parsing type but no valid type was detected: {:?}",
|
||||
self.peek_next()?
|
||||
)))
|
||||
}
|
||||
|
||||
fn parse_type_identifier(&mut self) -> ParseResult<TypeId, CompilerError> {
|
||||
// get the type name incl namespace
|
||||
let name = expect_value!(self.next()?, Identifier)?;
|
||||
|
||||
let type_id = match name.name.as_str() {
|
||||
"u32" => TypeId::U32,
|
||||
"u16" => TypeId::U16,
|
||||
"u8" => TypeId::U8,
|
||||
"i32" => TypeId::I32,
|
||||
"i16" => TypeId::I16,
|
||||
"i8" => TypeId::I8,
|
||||
"void" => TypeId::Void,
|
||||
"char" => TypeId::Char,
|
||||
"str" => TypeId::Ptr(Box::new(TypeId::Char)),
|
||||
_ => {
|
||||
let mut generics = Vec::new();
|
||||
if expect_tt!(self.peek_next()?, Less).accepted() {
|
||||
let _ = self.next()?;
|
||||
|
||||
// loop until we find the closing '>'
|
||||
while !expect_tt!(self.peek_next()?, Greater).accepted() {
|
||||
generics.push(self.parse_type()?);
|
||||
if !expect_tt!(self.peek_next()?, Comma).accepted() {
|
||||
break;
|
||||
}
|
||||
let _ = self.next()?;
|
||||
}
|
||||
let _ = expect_tt!(self.next()?, Greater)?;
|
||||
}
|
||||
|
||||
TypeId::UnknownCustom { name, generics }
|
||||
}
|
||||
};
|
||||
|
||||
ParseResult::Accept(type_id)
|
||||
}
|
||||
|
||||
fn next(&mut self) -> ParseResult<Token, CompilerError> {
|
||||
if self.idx >= self.tokens.len() {
|
||||
ParseResult::Reject(CompilerError::UnexpectedEndOfInput)
|
||||
} else {
|
||||
let token = self.tokens[self.idx].clone();
|
||||
self.idx += 1;
|
||||
ParseResult::Accept(token)
|
||||
}
|
||||
}
|
||||
|
||||
fn peek_next(&self) -> ParseResult<Token, CompilerError> {
|
||||
if self.idx >= self.tokens.len() {
|
||||
ParseResult::Reject(CompilerError::UnexpectedEndOfInput)
|
||||
} else {
|
||||
ParseResult::Accept(self.tokens[self.idx].clone())
|
||||
}
|
||||
}
|
||||
|
||||
fn peek(&self, offset: usize) -> ParseResult<Token, CompilerError> {
|
||||
if self.idx + offset >= self.tokens.len() {
|
||||
ParseResult::Reject(CompilerError::UnexpectedEndOfInput)
|
||||
} else {
|
||||
ParseResult::Accept(self.tokens[self.idx + offset].clone())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, E> ParseResult<T, E> {
|
||||
pub fn accepted(&self) -> bool {
|
||||
matches!(self, ParseResult::Accept(_))
|
||||
}
|
||||
}
|
||||
|
||||
pub enum ParseResultResidual<T> {
|
||||
Deny,
|
||||
Reject(T),
|
||||
}
|
||||
|
||||
impl<T, E> Try for ParseResult<T, E> {
|
||||
type Output = T;
|
||||
type Residual = ParseResultResidual<E>;
|
||||
|
||||
fn from_output(output: T) -> Self {
|
||||
ParseResult::Accept(output)
|
||||
}
|
||||
|
||||
fn branch(self) -> ControlFlow<Self::Residual, Self::Output> {
|
||||
match self {
|
||||
ParseResult::Accept(v) => ControlFlow::Continue(v),
|
||||
ParseResult::Deny => ControlFlow::Break(ParseResultResidual::Deny),
|
||||
ParseResult::Reject(e) => ControlFlow::Break(ParseResultResidual::Reject(e)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, E> FromResidual for ParseResult<T, E> {
|
||||
fn from_residual(residual: ParseResultResidual<E>) -> Self {
|
||||
match residual {
|
||||
ParseResultResidual::Deny => ParseResult::Deny,
|
||||
ParseResultResidual::Reject(e) => ParseResult::Reject(e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! expect_tt {
|
||||
($token:expr, $($variant:ident),+) => {{
|
||||
let token = $token.clone();
|
||||
let tt = token.tt().to_string();
|
||||
|
||||
let mut vs = String::new();
|
||||
$(
|
||||
let s = stringify!($variant);
|
||||
vs.push_str(s);
|
||||
vs.push_str("|");
|
||||
)+
|
||||
|
||||
match tt.as_str() {
|
||||
$(
|
||||
stringify!($variant) => ParseResult::Accept(token),
|
||||
)+
|
||||
_ => {
|
||||
// let expected = format!("[{}]", vec![$(stringify!($variant)),+].join(" | "));
|
||||
ParseResult::Reject(CompilerError::UnexpectedToken(tt))
|
||||
}
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! expect_value {
|
||||
($expr:expr, $variant:ident) => {{
|
||||
let tok = $expr;
|
||||
match tok.clone() {
|
||||
Token::$variant(first, ..) => ParseResult::Accept(first),
|
||||
_ => {
|
||||
ParseResult::Reject(CompilerError::UnexpectedToken(tok.tt().to_string()))
|
||||
}
|
||||
}
|
||||
}};
|
||||
}
|
||||
@@ -0,0 +1,226 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::model::{
|
||||
BinaryOperator, // You'll need to add this to your imports
|
||||
CompilerError,
|
||||
Declaration,
|
||||
Dependency,
|
||||
Expression,
|
||||
Program,
|
||||
TypeId,
|
||||
UnaryOperator,
|
||||
};
|
||||
|
||||
pub struct Analyser {
|
||||
symbol_table: HashMap<String, Declaration>,
|
||||
}
|
||||
|
||||
const NUMERIC_TYPES: &[TypeId] = &[
|
||||
TypeId::U32,
|
||||
TypeId::I32,
|
||||
TypeId::I16,
|
||||
TypeId::U16,
|
||||
TypeId::I8,
|
||||
TypeId::U8,
|
||||
];
|
||||
|
||||
impl Analyser {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
symbol_table: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn analyse(&mut self, ast: Program) -> Result<(), CompilerError> {
|
||||
// build table of global symbols.
|
||||
for dec in ast.declarations {
|
||||
let name = match dec.clone() {
|
||||
Declaration::Function { name, .. } => name,
|
||||
Declaration::Variable { var, .. } => var.name,
|
||||
Declaration::Dependency(Dependency { name, .. }) => name,
|
||||
};
|
||||
|
||||
self.symbol_table.insert(name, dec);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn match_type(
|
||||
actual: TypeId,
|
||||
expected: Option<TypeId>,
|
||||
) -> Result<TypeId, CompilerError> {
|
||||
match expected {
|
||||
Some(id) => {
|
||||
if id != actual {
|
||||
Err(CompilerError::TypeMismatch(id, actual))
|
||||
} else {
|
||||
Ok(actual)
|
||||
}
|
||||
}
|
||||
None => Ok(actual),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_type(
|
||||
&mut self, // Changed from &self to &mut self since we modify expr
|
||||
expr: &mut Expression,
|
||||
expected_type: Option<TypeId>,
|
||||
) -> Result<TypeId, CompilerError> {
|
||||
match expr {
|
||||
// Correct IFF we're expecting a void type
|
||||
Expression::Empty => Self::match_type(TypeId::Void, expected_type),
|
||||
|
||||
// Correct IFF we're expecting a char type
|
||||
Expression::CharLiteral(_) => Self::match_type(TypeId::Char, expected_type),
|
||||
|
||||
// Correct IFF we're expecting a string slice type
|
||||
Expression::StringLiteral(_) => {
|
||||
Self::match_type(TypeId::Ptr(Box::new(TypeId::Char)), expected_type)
|
||||
}
|
||||
|
||||
Expression::Variable { name, expr_type } => {
|
||||
let actual = expr_type.clone().ok_or(CompilerError::UnknownType)?;
|
||||
Self::match_type(actual, expected_type)
|
||||
}
|
||||
|
||||
Expression::Number { value, type_id } => {
|
||||
// If we already know the TypeId
|
||||
if let Some(id) = type_id {
|
||||
return Self::match_type(id.clone(), expected_type);
|
||||
}
|
||||
|
||||
// If we're expecting a type id, check it's numeric.
|
||||
// TODO: add checks to make sure it's valid for its size eg u8 cant be
|
||||
// more than 255
|
||||
if let Some(expected) = expected_type {
|
||||
if NUMERIC_TYPES.contains(&expected) {
|
||||
*type_id = Some(expected.clone());
|
||||
return Ok(expected);
|
||||
} else {
|
||||
return Err(CompilerError::TypeMismatch(expected, TypeId::U32));
|
||||
}
|
||||
}
|
||||
|
||||
// Default to i32 if no type information is available
|
||||
*type_id = Some(TypeId::I32);
|
||||
Ok(TypeId::I32)
|
||||
}
|
||||
|
||||
Expression::Binary {
|
||||
op,
|
||||
left,
|
||||
right,
|
||||
type_id,
|
||||
} => {
|
||||
// For binary operations, both operands should have compatible types
|
||||
// and the result type depends on the operation
|
||||
let left_type = self.get_type(left, None)?;
|
||||
let right_type = self.get_type(right, Some(left_type.clone()))?;
|
||||
|
||||
// For numeric operations, result has the same type as operands
|
||||
if NUMERIC_TYPES.contains(&left_type)
|
||||
&& NUMERIC_TYPES.contains(&right_type)
|
||||
{
|
||||
*type_id = Some(left_type);
|
||||
Self::match_type(left_type, expected_type)
|
||||
} else {
|
||||
Err(CompilerError::TypeMismatch(left_type, right_type))
|
||||
}
|
||||
}
|
||||
|
||||
Expression::Unary {
|
||||
op,
|
||||
operand,
|
||||
type_id,
|
||||
} => {
|
||||
match op {
|
||||
UnaryOperator::Plus | UnaryOperator::Minus => {
|
||||
// Unary +/- require numeric operands
|
||||
let inner_type = self.get_type(operand, None)?;
|
||||
|
||||
if NUMERIC_TYPES.contains(&inner_type) {
|
||||
*type_id = Some(inner_type.clone());
|
||||
Self::match_type(inner_type, expected_type)
|
||||
} else {
|
||||
Err(CompilerError::TypeMismatch(inner_type, TypeId::I32))
|
||||
}
|
||||
}
|
||||
|
||||
UnaryOperator::Dereference => {
|
||||
// For dereference (*ptr), the operand must be a pointer
|
||||
// and the result type is what the pointer points to
|
||||
let inner_type = self.get_type(operand, None)?;
|
||||
|
||||
match inner_type {
|
||||
TypeId::Ptr(inner) => {
|
||||
let deref_type = *inner;
|
||||
*type_id = Some(deref_type.clone());
|
||||
Self::match_type(deref_type, expected_type)
|
||||
}
|
||||
_ => Err(CompilerError::Generic(format!(
|
||||
"Cannot dereference non-pointer type: {:?}",
|
||||
inner_type
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
UnaryOperator::Reference => {
|
||||
// For reference (&var), we need to determine what we're taking
|
||||
// a reference to, then wrap it in a Ptr
|
||||
// If expected_type is Ptr(T), then operand should have type T
|
||||
let expected_inner = match expected_type.clone() {
|
||||
Some(TypeId::Ptr(inner)) => Some(*inner),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let inner_type = self.get_type(operand, expected_inner)?;
|
||||
let ref_type = TypeId::Ptr(Box::new(inner_type));
|
||||
*type_id = Some(ref_type.clone());
|
||||
Self::match_type(ref_type, expected_type)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Expression::Call {
|
||||
name,
|
||||
args,
|
||||
type_id,
|
||||
} => match self.symbol_table.get(&name.name) {
|
||||
Some(Declaration::Function {
|
||||
params,
|
||||
return_type,
|
||||
..
|
||||
}) => {
|
||||
// check that we've given the right number of arguments.
|
||||
if args.len() != params.len() {
|
||||
return Err(CompilerError::Generic(format!(
|
||||
"Function {} expected {} arguments but received {}",
|
||||
name.name,
|
||||
params.len(),
|
||||
args.len()
|
||||
)));
|
||||
}
|
||||
|
||||
for (arg, param) in args.iter_mut().zip(params.iter()) {
|
||||
// check that the argument type matches the parameter type.
|
||||
let provided_type = self.get_type(arg, Some(param.type_id))?;
|
||||
if provided_type != param.type_id {
|
||||
return Err(CompilerError::TypeMismatch(
|
||||
param.type_id,
|
||||
provided_type,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
*type_id = Some(return_type.clone());
|
||||
Self::match_type(return_type.clone(), expected_type)
|
||||
}
|
||||
_ => Err(CompilerError::Generic(format!(
|
||||
"Function {} not found in symbol table",
|
||||
name.name
|
||||
))),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
use crate::model::{CompilerError, Program};
|
||||
|
||||
// mod c;
|
||||
mod dsc;
|
||||
|
||||
pub fn compiler_frontend(ext: &str, data: &str) -> Result<Program, CompilerError> {
|
||||
match ext {
|
||||
"dsc" => Ok(dsc::generate_ast(&data)?),
|
||||
// "c" => Ok(c::generate_ast(&data)?),
|
||||
_ => Err(CompilerError::Generic(format!(
|
||||
"File type {} not supported",
|
||||
ext
|
||||
))),
|
||||
}
|
||||
}
|
||||
@@ -1,627 +0,0 @@
|
||||
use std::iter::Peekable;
|
||||
use std::str::Chars;
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum Token {
|
||||
// Keywords
|
||||
Fn,
|
||||
Let,
|
||||
If,
|
||||
Else,
|
||||
Loop,
|
||||
While,
|
||||
Break,
|
||||
Return,
|
||||
Continue,
|
||||
Include,
|
||||
Static,
|
||||
Const,
|
||||
|
||||
// Identifiers and literals
|
||||
Identifier(Name),
|
||||
String(String),
|
||||
Integer(u64),
|
||||
Char(char),
|
||||
|
||||
// Symbols
|
||||
LeftParen, // (
|
||||
RightParen, // )
|
||||
LeftBrace, // {
|
||||
RightBrace, // }
|
||||
Semicolon, // ;
|
||||
Colon, // :
|
||||
Comma, // ,
|
||||
|
||||
// Operators
|
||||
Plus, // +
|
||||
Minus, // -
|
||||
Star, // *
|
||||
Amphersand, // &
|
||||
Slash, // /
|
||||
Assign, // =
|
||||
EqualEqual, // ==
|
||||
Bang, // !
|
||||
BangEqual, // !=
|
||||
Less, // <
|
||||
LessEqual, // <=
|
||||
Greater, // >
|
||||
GreaterEqual, // >=
|
||||
RightArrow, // ->
|
||||
|
||||
// Special
|
||||
Eof,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct Name {
|
||||
pub name: String,
|
||||
pub namespace: Option<String>,
|
||||
}
|
||||
|
||||
use std::fmt;
|
||||
|
||||
impl fmt::Display for Name {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
if let Some(ref ns) = self.namespace {
|
||||
write!(f, "{}::{}", ns, self.name)
|
||||
} else {
|
||||
write!(f, "{}", self.name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Token {
|
||||
pub fn tt(&self) -> &str {
|
||||
match self {
|
||||
Token::Const => "Const",
|
||||
Token::Static => "Static",
|
||||
Token::Include => "Include",
|
||||
Token::Fn => "Fn",
|
||||
Token::If => "If",
|
||||
Token::Let => "Let",
|
||||
Token::Else => "Else",
|
||||
Token::Loop => "Loop",
|
||||
Token::While => "While",
|
||||
Token::Break => "Break",
|
||||
Token::Return => "Return",
|
||||
Token::Continue => "Continue",
|
||||
Token::Identifier(_) => "Identifier",
|
||||
Token::String(_) => "String",
|
||||
Token::Integer(_) => "UnsignedInt",
|
||||
Token::Char(_) => "Char",
|
||||
Token::LeftParen => "LeftParen",
|
||||
Token::RightParen => "RightParen",
|
||||
Token::LeftBrace => "LeftBrace",
|
||||
Token::RightBrace => "RightBrace",
|
||||
Token::Semicolon => "Semicolon",
|
||||
Token::Colon => "Colon",
|
||||
Token::Comma => "Comma",
|
||||
Token::RightArrow => "RightArrow",
|
||||
Token::Plus => "Plus",
|
||||
Token::Minus => "Minus",
|
||||
Token::Star => "Star",
|
||||
Token::Amphersand => "Amphersand",
|
||||
Token::Slash => "Slash",
|
||||
Token::Assign => "Assign",
|
||||
Token::EqualEqual => "EqualEqual",
|
||||
Token::Bang => "Bang",
|
||||
Token::BangEqual => "BangEqual",
|
||||
Token::Less => "Less",
|
||||
Token::LessEqual => "LessEqual",
|
||||
Token::Greater => "Greater",
|
||||
Token::GreaterEqual => "GreaterEqual",
|
||||
Token::Eof => "Eof",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Lexer<'a> {
|
||||
chars: Peekable<Chars<'a>>,
|
||||
current: Option<char>,
|
||||
line: usize,
|
||||
}
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
pub fn new(input: &'a str) -> Self {
|
||||
let mut chars = input.chars().peekable();
|
||||
let current = chars.next();
|
||||
|
||||
Lexer {
|
||||
chars,
|
||||
current,
|
||||
line: 1,
|
||||
}
|
||||
}
|
||||
|
||||
fn advance(&mut self) -> Option<char> {
|
||||
self.current = self.chars.next();
|
||||
self.current
|
||||
}
|
||||
|
||||
fn peek(&mut self) -> Option<&char> {
|
||||
self.chars.peek()
|
||||
}
|
||||
|
||||
fn skip_whitespace(&mut self) {
|
||||
while let Some(c) = self.current {
|
||||
if !c.is_whitespace() {
|
||||
break;
|
||||
}
|
||||
if c == '\n' {
|
||||
self.line += 1;
|
||||
}
|
||||
self.advance();
|
||||
}
|
||||
}
|
||||
|
||||
fn skip_line_comment(&mut self) {
|
||||
// Skip the two slashes
|
||||
self.advance(); // first /
|
||||
self.advance(); // second /
|
||||
|
||||
// Skip until newline or EOF
|
||||
while let Some(c) = self.current {
|
||||
if c == '\n' {
|
||||
self.line += 1;
|
||||
self.advance();
|
||||
break;
|
||||
}
|
||||
self.advance();
|
||||
}
|
||||
}
|
||||
|
||||
fn skip_block_comment(&mut self) -> Result<(), String> {
|
||||
// Skip the /*
|
||||
self.advance(); // /
|
||||
self.advance(); // *
|
||||
|
||||
let start_line = self.line;
|
||||
|
||||
// Look for */
|
||||
while let Some(c) = self.current {
|
||||
if c == '\n' {
|
||||
self.line += 1;
|
||||
}
|
||||
|
||||
if c == '*' {
|
||||
if let Some(&next) = self.peek() {
|
||||
if next == '/' {
|
||||
self.advance(); // *
|
||||
self.advance(); // /
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.advance();
|
||||
}
|
||||
|
||||
Err(format!(
|
||||
"Unterminated block comment starting at line {}",
|
||||
start_line
|
||||
))
|
||||
}
|
||||
|
||||
fn skip_whitespace_and_comments(&mut self) {
|
||||
loop {
|
||||
self.skip_whitespace();
|
||||
|
||||
// Check for comments
|
||||
if let Some('/') = self.current {
|
||||
if let Some(&next) = self.peek() {
|
||||
match next {
|
||||
'/' => {
|
||||
self.skip_line_comment();
|
||||
continue;
|
||||
}
|
||||
'*' => {
|
||||
if let Err(e) = self.skip_block_comment() {
|
||||
eprintln!("Lexer error: {}", e);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fn read_identifier(&mut self) -> String {
|
||||
let mut ident = String::new();
|
||||
|
||||
// Include the current character if it's valid
|
||||
if let Some(c) = self.current {
|
||||
if c.is_alphabetic() || c == '_' {
|
||||
ident.push(c);
|
||||
}
|
||||
}
|
||||
|
||||
// Read remaining characters
|
||||
while let Some(&c) = self.peek() {
|
||||
if c.is_alphanumeric() || c == '_' {
|
||||
self.advance();
|
||||
ident.push(c);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ident
|
||||
}
|
||||
|
||||
fn keyword_or_identifier(&mut self) -> Token {
|
||||
let first_ident = self.read_identifier();
|
||||
|
||||
// Check if it's a keyword first (keywords can't have namespaces)
|
||||
let keyword = match first_ident.as_str() {
|
||||
"fn" => Some(Token::Fn),
|
||||
"if" => Some(Token::If),
|
||||
"else" => Some(Token::Else),
|
||||
"while" => Some(Token::While),
|
||||
"loop" => Some(Token::Loop),
|
||||
"break" => Some(Token::Break),
|
||||
"return" => Some(Token::Return),
|
||||
"continue" => Some(Token::Continue),
|
||||
"include" => Some(Token::Include),
|
||||
"let" => Some(Token::Let),
|
||||
"const" => Some(Token::Const),
|
||||
"static" => Some(Token::Static),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
if let Some(kw) = keyword {
|
||||
return kw;
|
||||
}
|
||||
|
||||
// Not a keyword - check for namespace separator (::)
|
||||
// We need to peek TWO characters ahead without consuming anything
|
||||
if let Some(&':') = self.peek() {
|
||||
// We see one colon, but we need to check if there's another one after it
|
||||
// We can't peek two ahead directly, so we need a different approach
|
||||
|
||||
// Save the current position by using a temporary peekable iterator
|
||||
// Actually, we can't do that easily. Instead, let's just check:
|
||||
// If we see ':', temporarily advance and check the next char
|
||||
|
||||
// Create a temporary check
|
||||
let mut temp_chars = self.chars.clone();
|
||||
let first_peek = temp_chars.next(); // This is the ':' we already saw
|
||||
let second_peek = temp_chars.peek();
|
||||
|
||||
if let Some(&':') = second_peek {
|
||||
// It's :: - consume both colons
|
||||
self.advance(); // consume first :
|
||||
self.advance(); // consume second :
|
||||
|
||||
// Read the second identifier (the actual name)
|
||||
let second_ident = self.read_identifier();
|
||||
|
||||
// Return namespaced identifier
|
||||
return Token::Identifier(Name {
|
||||
namespace: Some(first_ident),
|
||||
name: second_ident,
|
||||
});
|
||||
}
|
||||
// else: It's a single colon (type annotation) - DON'T consume it
|
||||
// Just fall through and return the identifier
|
||||
}
|
||||
|
||||
// No namespace separator - just a regular identifier
|
||||
Token::Identifier(Name {
|
||||
namespace: None,
|
||||
name: first_ident,
|
||||
})
|
||||
}
|
||||
|
||||
fn read_number(&mut self) -> Result<u64, String> {
|
||||
let current = self.current.unwrap();
|
||||
|
||||
// Check for hex (0x) or binary (0b) prefix
|
||||
if current == '0' {
|
||||
if let Some(&next_char) = self.peek() {
|
||||
match next_char {
|
||||
'x' | 'X' => {
|
||||
self.advance(); // consume '0'
|
||||
self.advance(); // consume 'x'
|
||||
return self.read_hex_number();
|
||||
}
|
||||
'b' | 'B' => {
|
||||
self.advance(); // consume '0'
|
||||
self.advance(); // consume 'b'
|
||||
return self.read_binary_number();
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Read decimal number
|
||||
self.read_decimal_number()
|
||||
}
|
||||
|
||||
fn read_decimal_number(&mut self) -> Result<u64, String> {
|
||||
let mut num_str = String::new();
|
||||
|
||||
if let Some(c) = self.current {
|
||||
num_str.push(c);
|
||||
}
|
||||
|
||||
while let Some(&c) = self.peek() {
|
||||
if c.is_ascii_digit() {
|
||||
self.advance();
|
||||
num_str.push(c);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
num_str
|
||||
.parse::<u64>()
|
||||
.map_err(|_| format!("Invalid decimal number: {}", num_str))
|
||||
}
|
||||
|
||||
fn read_hex_number(&mut self) -> Result<u64, String> {
|
||||
let mut num_str = String::new();
|
||||
|
||||
// Read current character if it's a hex digit
|
||||
if let Some(c) = self.current {
|
||||
if c.is_ascii_hexdigit() {
|
||||
num_str.push(c);
|
||||
}
|
||||
}
|
||||
|
||||
while let Some(&c) = self.peek() {
|
||||
if c.is_ascii_hexdigit() {
|
||||
self.advance();
|
||||
num_str.push(c);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if num_str.is_empty() {
|
||||
return Err("Invalid hexadecimal number: no digits after 0x".to_string());
|
||||
}
|
||||
|
||||
u64::from_str_radix(&num_str, 16)
|
||||
.map_err(|_| format!("Invalid hexadecimal number: {}", num_str))
|
||||
}
|
||||
|
||||
fn read_binary_number(&mut self) -> Result<u64, String> {
|
||||
let mut num_str = String::new();
|
||||
|
||||
// Read current character if it's a binary digit
|
||||
if let Some(c) = self.current {
|
||||
if c == '0' || c == '1' {
|
||||
num_str.push(c);
|
||||
}
|
||||
}
|
||||
|
||||
while let Some(&c) = self.peek() {
|
||||
if c == '0' || c == '1' {
|
||||
self.advance();
|
||||
num_str.push(c);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if num_str.is_empty() {
|
||||
return Err("Invalid binary number: no digits after 0b".to_string());
|
||||
}
|
||||
|
||||
u64::from_str_radix(&num_str, 2)
|
||||
.map_err(|_| format!("Invalid binary number: {}", num_str))
|
||||
}
|
||||
|
||||
fn read_string(&mut self) -> Result<String, String> {
|
||||
self.advance(); // Skip the opening quote
|
||||
let mut s = String::new();
|
||||
|
||||
while let Some(c) = self.current {
|
||||
if c == '"' {
|
||||
return Ok(s);
|
||||
}
|
||||
|
||||
// Handle escape sequences
|
||||
if c == '\\' {
|
||||
self.advance();
|
||||
if let Some(escaped) = self.current {
|
||||
let escaped_char = match escaped {
|
||||
'n' => '\n',
|
||||
't' => '\t',
|
||||
'r' => '\r',
|
||||
'\\' => '\\',
|
||||
'"' => '"',
|
||||
_ => escaped, // For now, just use the character as-is
|
||||
};
|
||||
s.push(escaped_char);
|
||||
} else {
|
||||
return Err("Unexpected end of string after escape".to_string());
|
||||
}
|
||||
} else {
|
||||
s.push(c);
|
||||
}
|
||||
|
||||
self.advance();
|
||||
}
|
||||
|
||||
Err("Unterminated string literal".to_string())
|
||||
}
|
||||
|
||||
fn match_next(&mut self, expected: char) -> bool {
|
||||
match self.peek() {
|
||||
Some(&c) if c == expected => {
|
||||
self.advance();
|
||||
true
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn scan_single_char_token(&mut self, c: char) -> Option<Token> {
|
||||
match c {
|
||||
'(' => Some(Token::LeftParen),
|
||||
')' => Some(Token::RightParen),
|
||||
'{' => Some(Token::LeftBrace),
|
||||
'}' => Some(Token::RightBrace),
|
||||
';' => Some(Token::Semicolon),
|
||||
',' => Some(Token::Comma),
|
||||
'&' => Some(Token::Amphersand),
|
||||
'+' => Some(Token::Plus),
|
||||
'*' => Some(Token::Star),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn scan_operator(&mut self, c: char) -> Option<Token> {
|
||||
match c {
|
||||
'-' => Some(if self.match_next('>') {
|
||||
Token::RightArrow
|
||||
} else {
|
||||
Token::Minus
|
||||
}),
|
||||
'!' => Some(if self.match_next('=') {
|
||||
Token::BangEqual
|
||||
} else {
|
||||
Token::Bang
|
||||
}),
|
||||
'=' => Some(if self.match_next('=') {
|
||||
Token::EqualEqual
|
||||
} else {
|
||||
Token::Assign
|
||||
}),
|
||||
'<' => Some(if self.match_next('=') {
|
||||
Token::LessEqual
|
||||
} else {
|
||||
Token::Less
|
||||
}),
|
||||
'>' => Some(if self.match_next('=') {
|
||||
Token::GreaterEqual
|
||||
} else {
|
||||
Token::Greater
|
||||
}),
|
||||
':' => {
|
||||
// Single colon (for type annotations)
|
||||
// Note: :: is handled in keyword_or_identifier for namespaces
|
||||
Some(Token::Colon)
|
||||
}
|
||||
'/' => {
|
||||
// Check if it's a comment or division
|
||||
if let Some(&next) = self.peek() {
|
||||
if next == '/' || next == '*' {
|
||||
// It's a comment, don't consume it here
|
||||
// Let skip_whitespace_and_comments handle it
|
||||
None
|
||||
} else {
|
||||
Some(Token::Slash)
|
||||
}
|
||||
} else {
|
||||
Some(Token::Slash)
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next_token(&mut self) -> Token {
|
||||
self.skip_whitespace_and_comments();
|
||||
|
||||
let Some(c) = self.current else {
|
||||
return Token::Eof;
|
||||
};
|
||||
|
||||
// Try single-character tokens first
|
||||
if let Some(token) = self.scan_single_char_token(c) {
|
||||
self.advance();
|
||||
return token;
|
||||
}
|
||||
|
||||
// Try operators (may be multi-character)
|
||||
if let Some(token) = self.scan_operator(c) {
|
||||
self.advance();
|
||||
return token;
|
||||
}
|
||||
|
||||
// String literals
|
||||
if c == '"' {
|
||||
let token = match self.read_string() {
|
||||
Ok(s) => Token::String(s),
|
||||
Err(e) => {
|
||||
eprintln!("Lexer error on line {}: {}", self.line, e);
|
||||
// Skip to next quote or end
|
||||
while let Some(ch) = self.current {
|
||||
if ch == '"' || ch == '\n' {
|
||||
break;
|
||||
}
|
||||
self.advance();
|
||||
}
|
||||
Token::String(String::new())
|
||||
}
|
||||
};
|
||||
self.advance();
|
||||
return token;
|
||||
}
|
||||
|
||||
// Identifiers and keywords (including namespaced identifiers)
|
||||
if c.is_alphabetic() || c == '_' {
|
||||
let token = self.keyword_or_identifier();
|
||||
self.advance();
|
||||
return token;
|
||||
}
|
||||
|
||||
// Numbers (decimal, hex, binary)
|
||||
if c.is_ascii_digit() {
|
||||
let token = match self.read_number() {
|
||||
Ok(num) => Token::Integer(num),
|
||||
Err(e) => {
|
||||
eprintln!("Lexer error on line {}: {}", self.line, e);
|
||||
// Skip invalid number
|
||||
while let Some(&ch) = self.peek() {
|
||||
if !ch.is_alphanumeric() {
|
||||
break;
|
||||
}
|
||||
self.advance();
|
||||
}
|
||||
Token::Integer(0)
|
||||
}
|
||||
};
|
||||
self.advance();
|
||||
return token;
|
||||
}
|
||||
|
||||
// Unknown character - skip it
|
||||
eprintln!(
|
||||
"Lexer warning on line {}: Skipping unknown character '{}'",
|
||||
self.line, c
|
||||
);
|
||||
self.advance();
|
||||
self.next_token()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for Lexer<'a> {
|
||||
type Item = Token;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self.next_token() {
|
||||
Token::Eof => None,
|
||||
token => Some(token),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_basic() {
|
||||
// Placeholder test
|
||||
assert!(true);
|
||||
}
|
||||
}
|
||||
+38
-40
@@ -4,17 +4,12 @@ use std::path::Path;
|
||||
|
||||
use common::logging::log;
|
||||
|
||||
use crate::{
|
||||
codegen::CodeGenerator,
|
||||
parser::{ParseResult, Parser},
|
||||
semantic_analyser::Analyser,
|
||||
};
|
||||
use crate::specialised::build_specialised;
|
||||
|
||||
mod codegen;
|
||||
mod lexer;
|
||||
mod parser;
|
||||
mod registers;
|
||||
mod semantic_analyser;
|
||||
mod backend;
|
||||
mod frontend;
|
||||
mod model;
|
||||
mod specialised;
|
||||
|
||||
pub fn compile_file(
|
||||
input_path: &Path,
|
||||
@@ -22,43 +17,46 @@ pub fn compile_file(
|
||||
) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let input = std::fs::read_to_string(input_path).expect("Failed to read input file");
|
||||
|
||||
log("Tokenising Input...");
|
||||
let input_ext = input_path
|
||||
.extension()
|
||||
.and_then(|s| s.to_str())
|
||||
.unwrap_or("");
|
||||
|
||||
let lexer = lexer::Lexer::new(&input);
|
||||
let tokens = lexer.collect::<Vec<_>>();
|
||||
// println!("{tokens:?}");
|
||||
// check if we're using a specialised compiler
|
||||
if let Some(output) = build_specialised(input_ext, &input) {
|
||||
let result = match output {
|
||||
Ok(output) => output,
|
||||
Err(err) => return Err(format!("Compilation failed: {err:?}").into()),
|
||||
};
|
||||
|
||||
log(&format!("Parsing {} Tokens...", tokens.len()));
|
||||
std::fs::write(output_path, &result).expect("Failed to write output");
|
||||
|
||||
let mut parser = Parser::new(tokens);
|
||||
let ast = match parser.parse() {
|
||||
ParseResult::Accept(ast) => ast,
|
||||
ParseResult::Reject(e) => {
|
||||
eprintln!("Error: {e:?}");
|
||||
return Err("Parsing error".into());
|
||||
}
|
||||
ParseResult::Deny => {
|
||||
panic!("Parser denied parsing")
|
||||
}
|
||||
log(&format!(
|
||||
"Compilation Successful ✅ \n\tSource: {}\n\tOutput: {}\n",
|
||||
input_path.display(),
|
||||
output_path.display(),
|
||||
));
|
||||
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Parse the input using the frontend, providing the file extension and data.
|
||||
let ast = match frontend::compiler_frontend(input_ext, &input) {
|
||||
Ok(ast) => ast,
|
||||
Err(err) => return Err(format!("Compilation failed: {err:?}").into()),
|
||||
};
|
||||
// println!("{ast:#?}");
|
||||
|
||||
log("Analyzing AST...");
|
||||
log("Checking Type Information...");
|
||||
println!("Parsed AST: {:#?}", ast);
|
||||
|
||||
let analyser = Analyser::new();
|
||||
analyser.analyse(ast.clone()).unwrap();
|
||||
let output_ext = output_path
|
||||
.extension()
|
||||
.and_then(|s| s.to_str())
|
||||
.unwrap_or("");
|
||||
|
||||
log("Generating Code...");
|
||||
|
||||
// Code Gen
|
||||
let mut generator = CodeGenerator::new(ast);
|
||||
let result = match generator.generate() {
|
||||
Ok(code) => code,
|
||||
Err(e) => {
|
||||
eprintln!("Parsing error: {:?}", e);
|
||||
return Err("Code generation error".into());
|
||||
}
|
||||
// Generate the output using the backend with the parsed result.
|
||||
let result = match backend::compiler_backend(output_ext, &ast) {
|
||||
Ok(result) => result,
|
||||
Err(err) => return Err(format!("Compilation failed: {err:?}").into()),
|
||||
};
|
||||
|
||||
// println!("{result}");
|
||||
|
||||
@@ -1,12 +1,10 @@
|
||||
use std::path::Path;
|
||||
|
||||
use compiler;
|
||||
|
||||
fn main() {
|
||||
// read from input file: syntax "c_compiler <src.c> [output.dsa]"
|
||||
let args: Vec<String> = std::env::args().collect();
|
||||
if args.len() < 2 {
|
||||
eprintln!("Usage: c_compiler <src.c> [output.dsa]");
|
||||
eprintln!("Usage: c_compiler <src.dsc> [output.dsa]");
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,495 @@
|
||||
use core::fmt;
|
||||
|
||||
#[allow(unused)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum CompilerError {
|
||||
UnexpectedToken(String),
|
||||
UnexpectedEndOfInput,
|
||||
UnexpectedCharacter(char),
|
||||
Undefined(Name),
|
||||
InvalidSyntax(String),
|
||||
Generic(String),
|
||||
UnknownType,
|
||||
TypeMismatch(TypeId, TypeId),
|
||||
Unimplemented(String),
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||
pub struct Name {
|
||||
pub name: String,
|
||||
pub namespace: Option<String>,
|
||||
}
|
||||
impl Name {
|
||||
pub fn new(name: impl Into<String>, namespace: Option<String>) -> Self {
|
||||
Self {
|
||||
name: name.into(),
|
||||
namespace,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Program {
|
||||
pub declarations: Vec<Declaration>,
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Declaration {
|
||||
Function {
|
||||
name: String,
|
||||
return_type: TypeId,
|
||||
params: Vec<Variable>,
|
||||
body: Block,
|
||||
},
|
||||
Variable {
|
||||
var: Variable,
|
||||
init: Option<ConstExpr>,
|
||||
is_const: bool,
|
||||
},
|
||||
Dependency(Dependency),
|
||||
Struct {
|
||||
name: Name,
|
||||
fields: Vec<Variable>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Dependency {
|
||||
pub name: String,
|
||||
pub path: String,
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum TypeId {
|
||||
U8,
|
||||
U16,
|
||||
U32,
|
||||
I8,
|
||||
I16,
|
||||
I32,
|
||||
Bool,
|
||||
Char,
|
||||
Void,
|
||||
Ptr(Box<TypeId>),
|
||||
Ref(Box<TypeId>),
|
||||
Tuple(Vec<TypeId>),
|
||||
Array {
|
||||
r#type: Box<TypeId>,
|
||||
size: usize,
|
||||
},
|
||||
UnknownCustom {
|
||||
name: Name,
|
||||
generics: Vec<TypeId>,
|
||||
},
|
||||
Struct {
|
||||
name: Name,
|
||||
fields: Vec<TypeId>,
|
||||
generics: Vec<TypeId>,
|
||||
},
|
||||
}
|
||||
|
||||
impl TypeId {
|
||||
pub fn size(&self) -> usize {
|
||||
match self {
|
||||
Self::U8 => 1,
|
||||
Self::U16 => 2,
|
||||
Self::U32 => 4,
|
||||
Self::I8 => 1,
|
||||
Self::I16 => 2,
|
||||
Self::I32 => 4,
|
||||
Self::Bool => 1,
|
||||
Self::Char => 1,
|
||||
Self::Void => 0,
|
||||
Self::Ptr(t) => t.size(),
|
||||
Self::Ref(t) => t.size(),
|
||||
Self::Tuple(types) => types.iter().map(|t| t.size()).sum(),
|
||||
Self::Array { r#type, size } => r#type.size() * size,
|
||||
Self::UnknownCustom { .. } => 1, /* TODO: calculate type size during */
|
||||
// semantic analysis
|
||||
Self::Struct { fields, .. } => fields.iter().map(|t| t.size()).sum(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for TypeId {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::U8 => write!(f, "u8"),
|
||||
Self::U16 => write!(f, "u16"),
|
||||
Self::U32 => write!(f, "u32"),
|
||||
Self::I8 => write!(f, "i8"),
|
||||
Self::I16 => write!(f, "i16"),
|
||||
Self::I32 => write!(f, "i32"),
|
||||
Self::Bool => write!(f, "bool"),
|
||||
Self::Char => write!(f, "char"),
|
||||
Self::Void => write!(f, "void"),
|
||||
Self::Ptr(t) => write!(f, "*{}", t),
|
||||
Self::Ref(t) => write!(f, "&{}", t),
|
||||
Self::Tuple(elems) => write!(
|
||||
f,
|
||||
"({})",
|
||||
elems
|
||||
.iter()
|
||||
.map(|t| t.to_string())
|
||||
.collect::<Vec<String>>()
|
||||
.join(", ")
|
||||
),
|
||||
Self::Array { r#type, size } => write!(f, "[{}; {}]", r#type, size),
|
||||
Self::UnknownCustom { name, generics } => {
|
||||
write!(
|
||||
f,
|
||||
"{}<{}>",
|
||||
name,
|
||||
generics
|
||||
.iter()
|
||||
.map(|t| t.to_string())
|
||||
.collect::<Vec<String>>()
|
||||
.join(", ")
|
||||
)
|
||||
}
|
||||
Self::Struct {
|
||||
name,
|
||||
fields,
|
||||
generics,
|
||||
} => write!(
|
||||
f,
|
||||
"struct<{}> {} {{{}}}",
|
||||
generics
|
||||
.iter()
|
||||
.map(|t| t.to_string())
|
||||
.collect::<Vec<String>>()
|
||||
.join(", "),
|
||||
name,
|
||||
fields
|
||||
.iter()
|
||||
.map(|t| t.to_string())
|
||||
.collect::<Vec<String>>()
|
||||
.join(", ")
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub type Block = Vec<Statement>;
|
||||
|
||||
#[allow(unused)]
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct Variable {
|
||||
pub name: String,
|
||||
pub type_id: TypeId,
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Statement {
|
||||
Block(Block),
|
||||
Declaration {
|
||||
var: Variable,
|
||||
value: Option<Expression>,
|
||||
},
|
||||
Assign {
|
||||
varname: String,
|
||||
operator: AssignmentOperator,
|
||||
value: Expression,
|
||||
},
|
||||
PtrWrite {
|
||||
ptr: Expression,
|
||||
value: Expression,
|
||||
},
|
||||
Expression {
|
||||
expr: Expression,
|
||||
},
|
||||
If {
|
||||
condition: Expression,
|
||||
then_stmt: Block,
|
||||
else_stmt: Block,
|
||||
},
|
||||
While {
|
||||
condition: Expression,
|
||||
body: Vec<Statement>,
|
||||
},
|
||||
Loop(Block),
|
||||
Defer(Call),
|
||||
Break,
|
||||
Continue,
|
||||
Return(Option<Expression>),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ConstExpr {
|
||||
Number(i32),
|
||||
String(String),
|
||||
}
|
||||
|
||||
impl fmt::Display for ConstExpr {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
ConstExpr::Number(n) => write!(f, "{}", n),
|
||||
ConstExpr::String(s) => write!(f, "\"{}\"", s),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Expression {
|
||||
Empty,
|
||||
Binary {
|
||||
op: BinaryOperator,
|
||||
left: Box<Expression>,
|
||||
right: Box<Expression>,
|
||||
|
||||
// Post-Semantic Analysis
|
||||
type_id: Option<TypeId>,
|
||||
},
|
||||
Unary {
|
||||
op: UnaryOperator,
|
||||
operand: Box<Expression>,
|
||||
|
||||
// Post-Semantic Analysis
|
||||
type_id: Option<TypeId>,
|
||||
},
|
||||
UnaryPostfix {
|
||||
op: UnaryOperator,
|
||||
operand: Box<Expression>,
|
||||
|
||||
// Post-Semantic Analysis
|
||||
type_id: Option<TypeId>,
|
||||
},
|
||||
Variable {
|
||||
name: Name,
|
||||
expr_type: Option<TypeId>,
|
||||
},
|
||||
TypeCast {
|
||||
expr: Box<Expression>,
|
||||
target_type: TypeId,
|
||||
|
||||
// Post-Semantic Analysis
|
||||
type_id: Option<TypeId>,
|
||||
},
|
||||
IndexAccess {
|
||||
expr: Box<Expression>,
|
||||
index: Box<Expression>,
|
||||
|
||||
// Post-Semantic Analysis
|
||||
type_id: Option<TypeId>,
|
||||
},
|
||||
MemberAccess {
|
||||
expr: Box<Expression>,
|
||||
field_name: Name,
|
||||
|
||||
// Post-Semantic Analysis
|
||||
type_id: Option<TypeId>,
|
||||
},
|
||||
Call {
|
||||
func: Call,
|
||||
|
||||
// Post-Semantic Analysis
|
||||
type_id: Option<TypeId>,
|
||||
},
|
||||
Number(Number),
|
||||
StringLiteral(String),
|
||||
CharLiteral(char),
|
||||
ArrayLiteral {
|
||||
elements: Vec<Expression>,
|
||||
type_id: Option<TypeId>,
|
||||
},
|
||||
StructLiteral {
|
||||
name: Name,
|
||||
fields: Vec<(Name, Expression)>,
|
||||
type_id: Option<TypeId>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Number {
|
||||
Signed(i32, Option<TypeId>),
|
||||
Unsigned(u32, Option<TypeId>),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Call {
|
||||
pub name: Name,
|
||||
pub args: Vec<Expression>,
|
||||
}
|
||||
|
||||
impl Expression {
|
||||
pub fn is_pure(&self) -> bool {
|
||||
match self {
|
||||
Expression::Number { .. } => true,
|
||||
Expression::StringLiteral(_) => true,
|
||||
Expression::CharLiteral(_) => true,
|
||||
Expression::Call { .. } => false,
|
||||
Expression::Binary { left, right, .. } => left.is_pure() && right.is_pure(),
|
||||
Expression::Unary { operand, .. } => operand.is_pure(),
|
||||
Expression::UnaryPostfix { operand, .. } => operand.is_pure(),
|
||||
Expression::Empty => true,
|
||||
Expression::Variable { .. } => true,
|
||||
Expression::TypeCast { expr, .. } => expr.is_pure(),
|
||||
Expression::IndexAccess { expr, index, .. } => {
|
||||
expr.is_pure() && index.is_pure()
|
||||
}
|
||||
Expression::MemberAccess { expr, .. } => expr.is_pure(),
|
||||
Expression::ArrayLiteral { elements, .. } => {
|
||||
elements.iter().all(|element| element.is_pure())
|
||||
}
|
||||
Expression::StructLiteral { fields, .. } => {
|
||||
fields.iter().all(|(_, expr)| expr.is_pure())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn type_id(&self) -> Result<TypeId, CompilerError> {
|
||||
match self {
|
||||
Expression::Number(
|
||||
Number::Signed(_, type_id) | Number::Unsigned(_, type_id),
|
||||
) => type_id.clone().ok_or(CompilerError::UnknownType),
|
||||
Expression::StringLiteral(_) => Ok(TypeId::Ptr(Box::new(TypeId::Char))),
|
||||
Expression::CharLiteral(_) => Ok(TypeId::Char),
|
||||
Expression::Call { type_id, .. } => {
|
||||
type_id.clone().ok_or(CompilerError::UnknownType)
|
||||
}
|
||||
Expression::Binary { type_id, .. } => {
|
||||
type_id.clone().ok_or(CompilerError::UnknownType)
|
||||
}
|
||||
Expression::Unary { type_id, .. } => {
|
||||
type_id.clone().ok_or(CompilerError::UnknownType)
|
||||
}
|
||||
Expression::UnaryPostfix { type_id, .. } => {
|
||||
type_id.clone().ok_or(CompilerError::UnknownType)
|
||||
}
|
||||
Expression::Empty => Ok(TypeId::Void),
|
||||
Expression::Variable { expr_type, .. } => {
|
||||
expr_type.clone().ok_or(CompilerError::UnknownType)
|
||||
}
|
||||
Expression::TypeCast { type_id, .. } => {
|
||||
type_id.clone().ok_or(CompilerError::UnknownType)
|
||||
}
|
||||
Expression::IndexAccess { expr, .. } => expr.type_id(),
|
||||
Expression::MemberAccess { expr, .. } => expr.type_id(),
|
||||
Expression::ArrayLiteral { elements, .. } => {
|
||||
let element_type = elements
|
||||
.first()
|
||||
.map_or(TypeId::Void, |e| e.type_id().unwrap_or(TypeId::Void));
|
||||
Ok(TypeId::Array {
|
||||
r#type: Box::new(element_type),
|
||||
size: elements.len(),
|
||||
})
|
||||
}
|
||||
Expression::StructLiteral { name, fields, .. } => {
|
||||
let fields = fields
|
||||
.iter()
|
||||
.map(|(_, expr)| expr.type_id())
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
Ok(TypeId::Struct {
|
||||
name: name.clone(),
|
||||
fields,
|
||||
generics: Vec::new(),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum AssignmentOperator {
|
||||
Assign,
|
||||
AddAssign,
|
||||
SubAssign,
|
||||
MulAssign,
|
||||
DivAssign,
|
||||
ModAssign,
|
||||
AndAssign,
|
||||
OrAssign,
|
||||
XorAssign,
|
||||
LeftShiftAssign,
|
||||
RightShiftAssign,
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum BinaryOperator {
|
||||
// arithmetic
|
||||
Add,
|
||||
Sub,
|
||||
Mul,
|
||||
Div,
|
||||
Mod,
|
||||
|
||||
// comparison
|
||||
Equal,
|
||||
NotEqual,
|
||||
LessThan,
|
||||
GreaterThan,
|
||||
LessOrEqual,
|
||||
GreaterOrEqual,
|
||||
|
||||
// bitwise
|
||||
BitwiseAnd,
|
||||
BitwiseOr,
|
||||
BitwiseXor,
|
||||
|
||||
// logical
|
||||
LogicalAnd,
|
||||
LogicalOr,
|
||||
|
||||
// shift
|
||||
LeftShift,
|
||||
RightShift,
|
||||
}
|
||||
|
||||
impl fmt::Display for BinaryOperator {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
Self::Add => write!(f, "+"),
|
||||
Self::Sub => write!(f, "-"),
|
||||
Self::Mul => write!(f, "*"),
|
||||
Self::Div => write!(f, "/"),
|
||||
Self::Mod => write!(f, "%"),
|
||||
Self::Equal => write!(f, "=="),
|
||||
Self::NotEqual => write!(f, "!="),
|
||||
Self::LessThan => write!(f, "<"),
|
||||
Self::GreaterThan => write!(f, ">"),
|
||||
Self::LessOrEqual => write!(f, "<="),
|
||||
Self::GreaterOrEqual => write!(f, ">="),
|
||||
Self::BitwiseAnd => write!(f, "&"),
|
||||
Self::BitwiseOr => write!(f, "|"),
|
||||
Self::BitwiseXor => write!(f, "^"),
|
||||
Self::LogicalAnd => write!(f, "&&"),
|
||||
Self::LogicalOr => write!(f, "||"),
|
||||
Self::LeftShift => write!(f, "<<"),
|
||||
Self::RightShift => write!(f, ">>"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum UnaryOperator {
|
||||
Plus,
|
||||
Minus,
|
||||
AddressOf,
|
||||
Dereference,
|
||||
BitwiseNot,
|
||||
LogicalNot,
|
||||
Increment,
|
||||
Decrement,
|
||||
SizeOf,
|
||||
}
|
||||
|
||||
impl fmt::Display for UnaryOperator {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
Self::Increment => write!(f, "++"),
|
||||
Self::Decrement => write!(f, "--"),
|
||||
Self::Plus => write!(f, "+"),
|
||||
Self::Minus => write!(f, "-"),
|
||||
Self::Dereference => write!(f, "*"),
|
||||
Self::AddressOf => write!(f, "&"),
|
||||
Self::BitwiseNot => write!(f, "~"),
|
||||
Self::LogicalNot => write!(f, "!"),
|
||||
Self::SizeOf => write!(f, "sizeof"),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,790 +0,0 @@
|
||||
use crate::lexer::{Name, Token};
|
||||
use crate::{expect_tt, expect_value};
|
||||
use core::fmt;
|
||||
use std::ops::{ControlFlow, FromResidual, Try};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ParseResult<T, E> {
|
||||
Accept(T),
|
||||
Deny,
|
||||
Reject(E),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum CompilerError {
|
||||
UnexpectedToken(Token),
|
||||
UnexpectedEndOfInput,
|
||||
UnexpectedCharacter(char),
|
||||
Undefined(Name),
|
||||
InvalidSyntax(String),
|
||||
Generic(String),
|
||||
}
|
||||
|
||||
pub struct Parser {
|
||||
tokens: Vec<Token>,
|
||||
idx: usize,
|
||||
}
|
||||
|
||||
impl Parser {
|
||||
pub fn new(tokens: Vec<Token>) -> Self {
|
||||
Self { tokens, idx: 0 }
|
||||
}
|
||||
|
||||
pub fn parse(&mut self) -> ParseResult<Program, CompilerError> {
|
||||
let mut declarations = Vec::new();
|
||||
|
||||
while let ParseResult::Accept(_) = self.peek_next() {
|
||||
declarations.push(self.parse_declaration()?);
|
||||
}
|
||||
|
||||
ParseResult::Accept(Program { declarations })
|
||||
}
|
||||
|
||||
fn parse_declaration(&mut self) -> ParseResult<Declaration, CompilerError> {
|
||||
if expect_tt!(self.peek_next()?, Fn).accepted() {
|
||||
return self.parse_func();
|
||||
}
|
||||
|
||||
if expect_tt!(self.peek_next()?, Include).accepted() {
|
||||
// expect include keyword
|
||||
let _ = self.next();
|
||||
|
||||
// expect namespace identifier
|
||||
let name = expect_value!(self.next()?, Identifier)?;
|
||||
|
||||
// expect colon
|
||||
let _ = expect_tt!(self.next()?, Colon)?;
|
||||
|
||||
// expect string literal (module path)
|
||||
let path = expect_value!(self.next()?, String)?;
|
||||
|
||||
// expect semicolon
|
||||
let _ = expect_tt!(self.next()?, Semicolon)?;
|
||||
|
||||
return ParseResult::Accept(Declaration::Dependency(Dependency {
|
||||
name: name.name,
|
||||
path,
|
||||
}));
|
||||
}
|
||||
|
||||
if expect_tt!(self.peek_next()?, Const, Static).accepted() {
|
||||
let is_const = match self.next()? {
|
||||
Token::Const => true,
|
||||
Token::Static => false,
|
||||
_ => {
|
||||
return ParseResult::Reject(CompilerError::Generic(String::from(
|
||||
"This can't happen!",
|
||||
)));
|
||||
}
|
||||
};
|
||||
|
||||
let var = self.parse_var_decl()?;
|
||||
|
||||
let _ = expect_tt!(self.next()?, Assign)?;
|
||||
|
||||
let value = self.next()?;
|
||||
let init = match value {
|
||||
Token::String(x) => Some(ConstExpr::String(x)),
|
||||
Token::Integer(x) => Some(ConstExpr::Number(x as i32)),
|
||||
_ => return ParseResult::Reject(CompilerError::UnexpectedToken(value)),
|
||||
};
|
||||
|
||||
let _ = expect_tt!(self.next()?, Semicolon)?;
|
||||
|
||||
return ParseResult::Accept(Declaration::Variable {
|
||||
var,
|
||||
init,
|
||||
is_const,
|
||||
});
|
||||
}
|
||||
|
||||
ParseResult::Reject(CompilerError::UnexpectedEndOfInput)
|
||||
}
|
||||
|
||||
fn parse_func(&mut self) -> ParseResult<Declaration, CompilerError> {
|
||||
// expect function keyword
|
||||
let _ = expect_tt!(self.next()?, Fn);
|
||||
// expect function name
|
||||
let name = expect_value!(self.next()?, Identifier)?;
|
||||
|
||||
// expect left paren
|
||||
let _ = expect_tt!(self.next()?, LeftParen)?;
|
||||
|
||||
let mut params = Vec::new();
|
||||
while expect_tt!(self.peek_next()?, Identifier).accepted() {
|
||||
let arg = self.parse_var_decl()?;
|
||||
params.push(arg);
|
||||
|
||||
if expect_tt!(self.peek_next()?, Comma).accepted() {
|
||||
self.next()?;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// expect right paren
|
||||
let _ = expect_tt!(self.next()?, RightParen)?;
|
||||
|
||||
// see if we can parse the return type!
|
||||
let mut return_type = TypeId::Void;
|
||||
if expect_tt!(self.peek_next()?, RightArrow).accepted() {
|
||||
let _ = self.next();
|
||||
return_type = self.parse_type()?;
|
||||
}
|
||||
|
||||
// expect vald block
|
||||
if expect_tt!(self.peek_next()?, LeftBrace).accepted() {
|
||||
ParseResult::Accept(Declaration::Function {
|
||||
name: name.name,
|
||||
params,
|
||||
return_type,
|
||||
body: self.parse_block()?,
|
||||
})
|
||||
} else {
|
||||
ParseResult::Reject(CompilerError::UnexpectedToken(self.peek_next()?))
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_block(&mut self) -> ParseResult<Block, CompilerError> {
|
||||
// expect left brace
|
||||
let _ = expect_tt!(self.next()?, LeftBrace)?;
|
||||
|
||||
let mut block = Vec::new();
|
||||
while !expect_tt!(self.peek_next()?, RightBrace).accepted() {
|
||||
block.push(self.parse_statement()?);
|
||||
}
|
||||
|
||||
// expect right brace
|
||||
let _ = expect_tt!(self.next()?, RightBrace)?;
|
||||
|
||||
ParseResult::Accept(block)
|
||||
}
|
||||
|
||||
fn parse_statement(&mut self) -> ParseResult<Statement, CompilerError> {
|
||||
// handle if statements
|
||||
if expect_tt!(self.peek_next()?, If).accepted() {
|
||||
self.next()?;
|
||||
|
||||
let condition = self.parse_expression()?;
|
||||
|
||||
let then_stmt = self.parse_block()?;
|
||||
|
||||
if !expect_tt!(self.peek_next()?, Else).accepted() {
|
||||
return ParseResult::Accept(Statement::If {
|
||||
condition,
|
||||
then_stmt,
|
||||
else_stmt: vec![],
|
||||
});
|
||||
}
|
||||
|
||||
let _ = expect_tt!(self.next()?, Else)?;
|
||||
|
||||
let else_stmt = self.parse_block()?;
|
||||
|
||||
return ParseResult::Accept(Statement::If {
|
||||
condition,
|
||||
then_stmt,
|
||||
else_stmt,
|
||||
});
|
||||
}
|
||||
|
||||
// handle while loops
|
||||
if expect_tt!(self.peek_next()?, While).accepted() {
|
||||
self.next()?;
|
||||
|
||||
// expect valid expression
|
||||
let expr = self.parse_expression()?;
|
||||
|
||||
// expect valid block after
|
||||
let block = self.parse_block()?;
|
||||
|
||||
// return result
|
||||
return ParseResult::Accept(Statement::While {
|
||||
condition: expr,
|
||||
body: block,
|
||||
});
|
||||
}
|
||||
|
||||
// handle indefinite loops
|
||||
if expect_tt!(self.peek_next()?, Loop).accepted() {
|
||||
self.next()?;
|
||||
|
||||
// parse the inner block
|
||||
return ParseResult::Accept(Statement::Loop(self.parse_block()?));
|
||||
}
|
||||
|
||||
if expect_tt!(self.peek_next()?, Return).accepted() {
|
||||
self.next()?;
|
||||
|
||||
// handle case where nothing is returned
|
||||
if expect_tt!(self.peek_next()?, Semicolon).accepted() {
|
||||
return ParseResult::Accept(Statement::Return(None));
|
||||
}
|
||||
|
||||
let expr = self.parse_expression()?;
|
||||
expect_tt!(self.next()?, Semicolon)?;
|
||||
|
||||
return ParseResult::Accept(Statement::Return(Some(expr)));
|
||||
}
|
||||
|
||||
if expect_tt!(self.peek_next()?, Break).accepted() {
|
||||
self.next()?;
|
||||
|
||||
// expect semicolon
|
||||
expect_tt!(self.next()?, Semicolon)?;
|
||||
|
||||
// return result
|
||||
return ParseResult::Accept(Statement::Break);
|
||||
}
|
||||
|
||||
if expect_tt!(self.peek_next()?, Continue).accepted() {
|
||||
self.next()?;
|
||||
|
||||
// expect semicolon
|
||||
expect_tt!(self.next()?, Semicolon)?;
|
||||
|
||||
// return result
|
||||
return ParseResult::Accept(Statement::Continue);
|
||||
}
|
||||
|
||||
// handle writes to pointers!
|
||||
if expect_tt!(self.peek_next()?, Star).accepted() {
|
||||
self.next()?;
|
||||
|
||||
let left = if expect_tt!(self.peek_next()?, Identifier).accepted() {
|
||||
let identifier = expect_value!(self.next()?, Identifier)?;
|
||||
|
||||
Expression::Variable {
|
||||
name: identifier,
|
||||
expr_type: None,
|
||||
}
|
||||
} else if expect_tt!(self.peek_next()?, LeftParen).accepted() {
|
||||
self.next()?;
|
||||
|
||||
let expr = self.parse_expression()?;
|
||||
|
||||
let _ = expect_tt!(self.next()?, RightParen).accepted();
|
||||
|
||||
expr
|
||||
} else {
|
||||
return ParseResult::Reject(CompilerError::UnexpectedToken(
|
||||
self.peek_next()?,
|
||||
));
|
||||
};
|
||||
|
||||
let _ = expect_tt!(self.next()?, Assign)?;
|
||||
|
||||
let right = self.parse_expression()?;
|
||||
|
||||
// expect semicolon
|
||||
expect_tt!(self.next()?, Semicolon)?;
|
||||
|
||||
// return result
|
||||
return ParseResult::Accept(Statement::PtrWrite {
|
||||
ptr: left,
|
||||
value: right,
|
||||
});
|
||||
}
|
||||
|
||||
// handle let statements (declarations)
|
||||
if expect_tt!(self.peek_next()?, Let).accepted() {
|
||||
self.next();
|
||||
|
||||
// expect variable name and type.
|
||||
let name = self.parse_var_decl()?;
|
||||
|
||||
// handle uninitialised variable case
|
||||
if expect_tt!(self.peek_next()?, Semicolon).accepted() {
|
||||
self.next();
|
||||
return ParseResult::Accept(Statement::Declaration {
|
||||
var: name,
|
||||
value: None,
|
||||
});
|
||||
}
|
||||
|
||||
// handle initialised case
|
||||
// expect equals
|
||||
let _ = expect_tt!(self.next()?, Assign)?;
|
||||
|
||||
// expect a valid expression
|
||||
let expr = self.parse_expression()?;
|
||||
|
||||
let _ = expect_tt!(self.next()?, Semicolon);
|
||||
|
||||
// return statement
|
||||
return ParseResult::Accept(Statement::Declaration {
|
||||
var: name,
|
||||
value: Some(expr),
|
||||
});
|
||||
}
|
||||
|
||||
// handle assignment without "let"
|
||||
let name = expect_value!(self.peek_next()?, Identifier);
|
||||
if name.accepted() {
|
||||
let varname = name?;
|
||||
if expect_tt!(self.peek(1)?, LeftParen).accepted() {
|
||||
let expr = self.parse_expression()?; // a function call expr
|
||||
let _ = expect_tt!(self.next()?, Semicolon)?;
|
||||
return ParseResult::Accept(Statement::Expression { expr });
|
||||
}
|
||||
|
||||
self.next()?;
|
||||
let _ = expect_tt!(self.next()?, Assign)?;
|
||||
|
||||
let value = self.parse_expression()?;
|
||||
|
||||
let _ = expect_tt!(self.next()?, Semicolon);
|
||||
|
||||
return ParseResult::Accept(Statement::Assign {
|
||||
varname: varname.name,
|
||||
value,
|
||||
});
|
||||
}
|
||||
|
||||
ParseResult::Reject(CompilerError::UnexpectedToken(self.peek_next()?))
|
||||
}
|
||||
|
||||
fn parse_expression(&mut self) -> ParseResult<Expression, CompilerError> {
|
||||
self.parse_comparison()
|
||||
}
|
||||
|
||||
fn parse_comparison(&mut self) -> ParseResult<Expression, CompilerError> {
|
||||
let mut expr = self.parse_additive()?;
|
||||
|
||||
while let Some(op) = match self.peek_next()? {
|
||||
Token::EqualEqual => Some(BinaryOperator::Ne),
|
||||
Token::BangEqual => Some(BinaryOperator::Ne),
|
||||
Token::Less => Some(BinaryOperator::Lt),
|
||||
Token::Greater => Some(BinaryOperator::Gt),
|
||||
Token::LessEqual => Some(BinaryOperator::Le),
|
||||
Token::GreaterEqual => Some(BinaryOperator::Ge),
|
||||
_ => None,
|
||||
} {
|
||||
self.next()?;
|
||||
let right = Box::new(self.parse_additive()?);
|
||||
expr = Expression::Binary {
|
||||
op,
|
||||
left: Box::new(expr),
|
||||
right,
|
||||
}
|
||||
}
|
||||
|
||||
ParseResult::Accept(expr)
|
||||
}
|
||||
|
||||
fn parse_additive(&mut self) -> ParseResult<Expression, CompilerError> {
|
||||
let left = self.parse_multiplicative()?;
|
||||
|
||||
let op = match self.peek_next()? {
|
||||
Token::Plus => BinaryOperator::Add,
|
||||
Token::Minus => BinaryOperator::Sub,
|
||||
_ => return ParseResult::Accept(left),
|
||||
};
|
||||
|
||||
self.next()?;
|
||||
ParseResult::Accept(Expression::Binary {
|
||||
op,
|
||||
left: Box::new(left),
|
||||
right: Box::new(self.parse_additive()?),
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_multiplicative(&mut self) -> ParseResult<Expression, CompilerError> {
|
||||
let left = self.parse_unary()?;
|
||||
|
||||
let op = match self.peek_next()? {
|
||||
Token::Star => BinaryOperator::Mul,
|
||||
Token::Slash => BinaryOperator::Div,
|
||||
_ => return ParseResult::Accept(left),
|
||||
};
|
||||
|
||||
self.next()?;
|
||||
ParseResult::Accept(Expression::Binary {
|
||||
op,
|
||||
left: Box::new(left),
|
||||
right: Box::new(self.parse_multiplicative()?),
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_unary(&mut self) -> ParseResult<Expression, CompilerError> {
|
||||
let op = match self.peek_next()? {
|
||||
Token::Plus => UnaryOperator::Plus,
|
||||
Token::Minus => UnaryOperator::Minus,
|
||||
Token::Star => UnaryOperator::Dereference,
|
||||
Token::Amphersand => UnaryOperator::Reference,
|
||||
_ => return ParseResult::Accept(self.parse_primary()?),
|
||||
};
|
||||
|
||||
self.next()?;
|
||||
let operand = Box::new(self.parse_unary()?);
|
||||
ParseResult::Accept(Expression::Unary { op, operand })
|
||||
}
|
||||
|
||||
fn parse_primary(&mut self) -> ParseResult<Expression, CompilerError> {
|
||||
match self.peek_next()? {
|
||||
Token::Integer(value) => {
|
||||
self.next()?;
|
||||
ParseResult::Accept(Expression::Number(value as isize))
|
||||
}
|
||||
Token::String(value) => {
|
||||
self.next()?;
|
||||
ParseResult::Accept(Expression::StringLiteral(value))
|
||||
}
|
||||
Token::Identifier(_) => {
|
||||
let name = expect_value!(self.next()?, Identifier)?;
|
||||
|
||||
if matches!(self.peek_next()?, Token::LeftParen) {
|
||||
// Function call
|
||||
self.next()?;
|
||||
let mut args = Vec::new();
|
||||
|
||||
if !matches!(self.peek_next()?, Token::RightParen) {
|
||||
args.push(self.parse_expression()?);
|
||||
|
||||
while matches!(self.peek_next()?, Token::Comma) {
|
||||
self.next()?;
|
||||
args.push(self.parse_expression()?);
|
||||
}
|
||||
}
|
||||
|
||||
let _ = expect_tt!(self.next()?, RightParen)?;
|
||||
|
||||
ParseResult::Accept(Expression::Call { name, args })
|
||||
} else {
|
||||
ParseResult::Accept(Expression::Variable {
|
||||
name,
|
||||
expr_type: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
Token::LeftParen => {
|
||||
self.next()?;
|
||||
let expr = self.parse_expression()?;
|
||||
let _ = expect_tt!(self.next()?, RightParen)?;
|
||||
ParseResult::Accept(expr)
|
||||
}
|
||||
_ => ParseResult::Reject(CompilerError::UnexpectedToken(self.peek_next()?)),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_var_decl(&mut self) -> ParseResult<Variable, CompilerError> {
|
||||
let name = expect_value!(self.next()?, Identifier)?;
|
||||
|
||||
let _ = expect_tt!(self.next()?, Colon)?;
|
||||
|
||||
let type_id = self.parse_type()?;
|
||||
|
||||
ParseResult::Accept(Variable {
|
||||
name: name.name,
|
||||
type_id,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_type(&mut self) -> ParseResult<TypeId, CompilerError> {
|
||||
// get the type name incl namespace
|
||||
let typename = expect_value!(self.next()?, Identifier)?;
|
||||
|
||||
match typename.name.as_str() {
|
||||
"u32" => ParseResult::Accept(TypeId::U32),
|
||||
"u16" => ParseResult::Accept(TypeId::U16),
|
||||
"u8" => ParseResult::Accept(TypeId::U8),
|
||||
"i32" => ParseResult::Accept(TypeId::I32),
|
||||
"i16" => ParseResult::Accept(TypeId::I16),
|
||||
"i8" => ParseResult::Accept(TypeId::I8),
|
||||
"void" => ParseResult::Accept(TypeId::Void),
|
||||
"char" => ParseResult::Accept(TypeId::Char),
|
||||
"str" => ParseResult::Accept(TypeId::Ptr(Box::new(TypeId::Char))),
|
||||
_ => todo!("Implement parsing for other types!!"),
|
||||
}
|
||||
}
|
||||
|
||||
fn next(&mut self) -> ParseResult<Token, CompilerError> {
|
||||
if self.idx >= self.tokens.len() {
|
||||
ParseResult::Reject(CompilerError::UnexpectedEndOfInput)
|
||||
} else {
|
||||
let token = self.tokens[self.idx].clone();
|
||||
self.idx += 1;
|
||||
ParseResult::Accept(token)
|
||||
}
|
||||
}
|
||||
|
||||
fn peek_next(&self) -> ParseResult<Token, CompilerError> {
|
||||
if self.idx >= self.tokens.len() {
|
||||
ParseResult::Reject(CompilerError::UnexpectedEndOfInput)
|
||||
} else {
|
||||
ParseResult::Accept(self.tokens[self.idx].clone())
|
||||
}
|
||||
}
|
||||
|
||||
fn peek(&self, offset: usize) -> ParseResult<Token, CompilerError> {
|
||||
if self.idx + offset >= self.tokens.len() {
|
||||
ParseResult::Reject(CompilerError::UnexpectedEndOfInput)
|
||||
} else {
|
||||
ParseResult::Accept(self.tokens[self.idx + offset].clone())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Program {
|
||||
pub declarations: Vec<Declaration>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Declaration {
|
||||
Function {
|
||||
name: String,
|
||||
return_type: TypeId,
|
||||
params: Vec<Variable>,
|
||||
body: Block,
|
||||
},
|
||||
Variable {
|
||||
var: Variable,
|
||||
init: Option<ConstExpr>,
|
||||
is_const: bool,
|
||||
},
|
||||
Dependency(Dependency),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Dependency {
|
||||
pub name: String,
|
||||
pub path: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum TypeId {
|
||||
U8,
|
||||
U16,
|
||||
U32,
|
||||
I8,
|
||||
I16,
|
||||
I32,
|
||||
Char,
|
||||
Void,
|
||||
Ptr(Box<TypeId>),
|
||||
Ref(Box<TypeId>),
|
||||
Array(Box<TypeId>, usize),
|
||||
Struct { name: Name, fields: Vec<Variable> },
|
||||
}
|
||||
|
||||
pub type Block = Vec<Statement>;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Variable {
|
||||
pub name: String,
|
||||
pub type_id: TypeId,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Statement {
|
||||
Block(Block),
|
||||
Declaration {
|
||||
var: Variable,
|
||||
value: Option<Expression>,
|
||||
},
|
||||
Assign {
|
||||
varname: String,
|
||||
value: Expression,
|
||||
},
|
||||
PtrWrite {
|
||||
ptr: Expression,
|
||||
value: Expression,
|
||||
},
|
||||
Expression {
|
||||
expr: Expression,
|
||||
},
|
||||
If {
|
||||
condition: Expression,
|
||||
then_stmt: Block,
|
||||
else_stmt: Block,
|
||||
},
|
||||
While {
|
||||
condition: Expression,
|
||||
body: Vec<Statement>,
|
||||
},
|
||||
Loop(Block),
|
||||
Break,
|
||||
Continue,
|
||||
Return(Option<Expression>),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ConstExpr {
|
||||
Number(i32),
|
||||
String(String),
|
||||
}
|
||||
|
||||
impl fmt::Display for ConstExpr {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
ConstExpr::Number(n) => write!(f, "{}", n),
|
||||
ConstExpr::String(s) => write!(f, "\"{}\"", s),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Expression {
|
||||
Empty,
|
||||
Binary {
|
||||
op: BinaryOperator,
|
||||
left: Box<Expression>,
|
||||
right: Box<Expression>,
|
||||
},
|
||||
Unary {
|
||||
op: UnaryOperator,
|
||||
operand: Box<Expression>,
|
||||
},
|
||||
Variable {
|
||||
name: Name,
|
||||
expr_type: Option<TypeId>,
|
||||
},
|
||||
Call {
|
||||
name: Name,
|
||||
args: Vec<Expression>,
|
||||
},
|
||||
Number(isize),
|
||||
StringLiteral(String),
|
||||
CharLiteral(char),
|
||||
}
|
||||
|
||||
impl Expression {
|
||||
pub fn is_pure(&self) -> bool {
|
||||
match self {
|
||||
Expression::Number(_) => true,
|
||||
Expression::StringLiteral(_) => true,
|
||||
Expression::CharLiteral(_) => true,
|
||||
Expression::Call { name, args } => false, /* TODO: will require checking */
|
||||
// if the associated function
|
||||
// body is pure
|
||||
Expression::Binary { left, right, .. } => left.is_pure() && right.is_pure(),
|
||||
Expression::Unary { op, operand } => operand.is_pure(),
|
||||
Expression::Empty => true,
|
||||
Expression::Variable { name, expr_type } => true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum BinaryOperator {
|
||||
Add,
|
||||
Sub,
|
||||
Mul,
|
||||
Div,
|
||||
Eq,
|
||||
Ne,
|
||||
Lt,
|
||||
Gt,
|
||||
Le,
|
||||
Ge,
|
||||
}
|
||||
|
||||
impl fmt::Display for BinaryOperator {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
BinaryOperator::Add => write!(f, "+"),
|
||||
BinaryOperator::Sub => write!(f, "-"),
|
||||
BinaryOperator::Mul => write!(f, "*"),
|
||||
BinaryOperator::Div => write!(f, "/"),
|
||||
BinaryOperator::Eq => write!(f, "=="),
|
||||
BinaryOperator::Ne => write!(f, "!="),
|
||||
BinaryOperator::Lt => write!(f, "<"),
|
||||
BinaryOperator::Gt => write!(f, ">"),
|
||||
BinaryOperator::Le => write!(f, "<="),
|
||||
BinaryOperator::Ge => write!(f, ">="),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum UnaryOperator {
|
||||
Plus,
|
||||
Minus,
|
||||
Reference,
|
||||
Dereference,
|
||||
}
|
||||
|
||||
impl fmt::Display for UnaryOperator {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
UnaryOperator::Plus => write!(f, "+"),
|
||||
UnaryOperator::Minus => write!(f, "-"),
|
||||
UnaryOperator::Dereference => write!(f, "*"),
|
||||
UnaryOperator::Reference => write!(f, "&"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, E> ParseResult<T, E> {
|
||||
pub fn accepted(&self) -> bool {
|
||||
matches!(self, ParseResult::Accept(_))
|
||||
}
|
||||
}
|
||||
|
||||
pub enum ParseResultResidual<T> {
|
||||
Deny,
|
||||
Reject(T),
|
||||
}
|
||||
|
||||
impl<T, E> Try for ParseResult<T, E> {
|
||||
type Output = T;
|
||||
type Residual = ParseResultResidual<E>;
|
||||
|
||||
fn from_output(output: T) -> Self {
|
||||
ParseResult::Accept(output)
|
||||
}
|
||||
|
||||
fn branch(self) -> ControlFlow<Self::Residual, Self::Output> {
|
||||
match self {
|
||||
ParseResult::Accept(v) => ControlFlow::Continue(v),
|
||||
ParseResult::Deny => ControlFlow::Break(ParseResultResidual::Deny),
|
||||
ParseResult::Reject(e) => ControlFlow::Break(ParseResultResidual::Reject(e)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, E> FromResidual for ParseResult<T, E> {
|
||||
fn from_residual(residual: ParseResultResidual<E>) -> Self {
|
||||
match residual {
|
||||
ParseResultResidual::Deny => ParseResult::Deny,
|
||||
ParseResultResidual::Reject(e) => ParseResult::Reject(e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! expect_tt {
|
||||
($token:expr, $($variant:ident),+) => {{
|
||||
let token = $token.clone();
|
||||
let tt = token.tt().to_string();
|
||||
|
||||
let mut vs = String::new();
|
||||
$(
|
||||
let s = stringify!($variant);
|
||||
vs.push_str(s);
|
||||
vs.push_str("|");
|
||||
)+
|
||||
|
||||
match tt.as_str() {
|
||||
$(
|
||||
stringify!($variant) => ParseResult::Accept(token),
|
||||
)+
|
||||
_ => {
|
||||
// let expected = format!("[{}]", vec![$(stringify!($variant)),+].join(" | "));
|
||||
ParseResult::Reject(CompilerError::UnexpectedToken(token))
|
||||
}
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! expect_value {
|
||||
($expr:expr, $variant:ident) => {{
|
||||
let tok = $expr;
|
||||
match tok.clone() {
|
||||
Token::$variant(value) => ParseResult::Accept(value),
|
||||
_ => ParseResult::Reject(CompilerError::UnexpectedToken(tok)),
|
||||
}
|
||||
}};
|
||||
}
|
||||
@@ -1,398 +0,0 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::parser::CompilerError;
|
||||
|
||||
/// Register allocator for DSA assembly generation
|
||||
/// Manages general-purpose registers (rg0-rgf) and handles stack spilling
|
||||
pub struct RegisterAllocator {
|
||||
/// Available general-purpose registers
|
||||
available_registers: Vec<String>,
|
||||
|
||||
/// Maps variable names to their current location (register or stack offset)
|
||||
variable_locations: HashMap<String, Location>,
|
||||
|
||||
/// Maps registers to the variables they currently hold
|
||||
register_contents: HashMap<String, String>,
|
||||
|
||||
/// Current stack offset for local variables (relative to bpr)
|
||||
/// Starts at -4 (going downward from base pointer)
|
||||
stack_offset: i32,
|
||||
|
||||
/// Track which registers are currently in use
|
||||
in_use: HashMap<String, bool>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Location {
|
||||
Register(String),
|
||||
Stack(i32), // offset from bpr
|
||||
}
|
||||
|
||||
impl RegisterAllocator {
|
||||
pub fn new() -> Self {
|
||||
// Initialize with available GP registers (rg0-rgf = 16 registers)
|
||||
let registers = vec![
|
||||
"rg0", "rg1", "rg2", "rg3", "rg4", "rg5", "rg6", "rg7", "rg8", "rg9", "rga",
|
||||
"rgb", "rgc", "rgd", "rge", "rgf",
|
||||
]
|
||||
.into_iter()
|
||||
.map(String::from)
|
||||
.collect();
|
||||
|
||||
RegisterAllocator {
|
||||
available_registers: registers,
|
||||
variable_locations: HashMap::new(),
|
||||
register_contents: HashMap::new(),
|
||||
stack_offset: -4, // Start at -4 (first local below saved bpr)
|
||||
in_use: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Allocate a temporary register for expression evaluation
|
||||
/// Returns the register name and optionally assembly code to save it
|
||||
pub fn alloc_temp(&mut self) -> Result<(String, Vec<String>), CompilerError> {
|
||||
let mut code = Vec::new();
|
||||
|
||||
// Try to find an unused register
|
||||
for reg in &self.available_registers {
|
||||
if !self.in_use.get(reg).unwrap_or(&false) {
|
||||
self.in_use.insert(reg.clone(), true);
|
||||
return Ok((reg.clone(), code));
|
||||
}
|
||||
}
|
||||
|
||||
// All registers in use - need to spill one
|
||||
// Choose the first register with a variable we can spill
|
||||
// Find a register to spill
|
||||
let reg_to_spill = self
|
||||
.available_registers
|
||||
.iter()
|
||||
.find(|reg| self.register_contents.contains_key(*reg))
|
||||
.cloned();
|
||||
|
||||
if let Some(reg) = reg_to_spill {
|
||||
// Spill this variable to stack
|
||||
let spill_code = self.spill_register(®)?;
|
||||
code.extend(spill_code);
|
||||
|
||||
self.in_use.insert(reg.clone(), true);
|
||||
return Ok((reg, code));
|
||||
}
|
||||
|
||||
Err(CompilerError::Generic(
|
||||
"All registers are used up yet there are no variables to spill to the stack"
|
||||
.to_string(),
|
||||
))
|
||||
}
|
||||
|
||||
/// Free a temporary register after use
|
||||
/// NOTE: This will NOT free registers that contain variables!
|
||||
/// Variables persist throughout their scope and must not be freed
|
||||
pub fn free_temp(&mut self, reg: &str) {
|
||||
// Check if this register contains a variable
|
||||
if self.register_contents.contains_key(reg) {
|
||||
// This register holds a variable - don't free it!
|
||||
// Variables are only freed when they go out of scope via free_var()
|
||||
return;
|
||||
}
|
||||
|
||||
// This is a true temporary - safe to free
|
||||
self.in_use.insert(reg.to_string(), false);
|
||||
}
|
||||
|
||||
/// Allocate a register for a named variable
|
||||
/// Returns the register and any necessary assembly code
|
||||
pub fn alloc_var(
|
||||
&mut self,
|
||||
var_name: &str,
|
||||
) -> Result<(String, Vec<String>), CompilerError> {
|
||||
if let Some(location) = self.variable_locations.get(var_name).cloned() {
|
||||
match location {
|
||||
Location::Register(reg) => {
|
||||
return Ok((reg.clone(), Vec::new()));
|
||||
}
|
||||
Location::Stack(offset) => {
|
||||
// Variable was pushed, need to calculate actual position
|
||||
let (reg, mut code) = self.alloc_temp()?;
|
||||
|
||||
// Load from bpr + offset (offset is negative)
|
||||
code.push(format!("\tsubi bpr {} {}", -(offset + 4), reg));
|
||||
code.push(format!(
|
||||
"\tldw {}, {} // bpr{}: {}",
|
||||
reg,
|
||||
reg,
|
||||
offset - 4,
|
||||
var_name
|
||||
));
|
||||
|
||||
// Update location to register
|
||||
self.variable_locations
|
||||
.insert(var_name.to_string(), Location::Register(reg.clone()));
|
||||
self.register_contents
|
||||
.insert(reg.clone(), var_name.to_string());
|
||||
|
||||
return Ok((reg, code));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Variable doesn't have a location yet, allocate a new register
|
||||
let (reg, code) = self.alloc_temp()?;
|
||||
self.variable_locations
|
||||
.insert(var_name.to_string(), Location::Register(reg.clone()));
|
||||
self.register_contents
|
||||
.insert(reg.clone(), var_name.to_string());
|
||||
|
||||
Ok((reg, code))
|
||||
}
|
||||
|
||||
/// Get the current location of a variable
|
||||
pub fn get_var_location(&self, var_name: &str) -> Option<&Location> {
|
||||
self.variable_locations.get(var_name)
|
||||
}
|
||||
|
||||
/// Load a variable into a register (allocating if necessary)
|
||||
/// Returns the register and assembly code to load it
|
||||
pub fn load_var(
|
||||
&mut self,
|
||||
var_name: &str,
|
||||
) -> Result<(String, Vec<String>), CompilerError> {
|
||||
self.alloc_var(var_name)
|
||||
}
|
||||
|
||||
/// Store a value from a register into a variable
|
||||
/// Updates tracking and returns any necessary assembly code
|
||||
pub fn store_var(&mut self, var_name: &str, source_reg: &str) -> Vec<String> {
|
||||
let mut code = Vec::new();
|
||||
|
||||
// Check if variable already has a location
|
||||
if let Some(location) = self.variable_locations.get(var_name) {
|
||||
match location {
|
||||
Location::Register(dest_reg) => {
|
||||
if dest_reg != source_reg {
|
||||
code.push(format!(
|
||||
"\tmov {}, {} // var {}",
|
||||
source_reg, dest_reg, var_name
|
||||
));
|
||||
}
|
||||
}
|
||||
Location::Stack(offset) => {
|
||||
code.push(format!(
|
||||
"\tstw {}, bpr, {} // var {}",
|
||||
source_reg, offset, var_name
|
||||
));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Variable doesn't exist yet, we can just use the same reg.
|
||||
|
||||
// self.variable_locations.insert(
|
||||
// var_name.to_string(),
|
||||
// Location::Register(source_reg.to_string()),
|
||||
// );
|
||||
// self.register_contents
|
||||
// .insert(source_reg.to_string(), var_name.to_string());
|
||||
// self.in_use.insert(source_reg.to_string(), true);
|
||||
|
||||
let source_reg = source_reg.to_string();
|
||||
|
||||
// if we can avoid a move, absolutely do that.
|
||||
if self.available_registers.contains(&source_reg) {
|
||||
self.variable_locations
|
||||
.insert(var_name.to_string(), Location::Register(source_reg.clone()));
|
||||
self.register_contents
|
||||
.insert(source_reg.clone(), var_name.to_string());
|
||||
self.in_use.insert(source_reg, true);
|
||||
} else if let Some(free_reg) = self.find_free_register() {
|
||||
code.push(format!("\tmov {}, {}", source_reg, free_reg));
|
||||
self.variable_locations
|
||||
.insert(var_name.to_string(), Location::Register(free_reg.clone()));
|
||||
self.register_contents
|
||||
.insert(free_reg.clone(), var_name.to_string());
|
||||
self.in_use.insert(free_reg, true);
|
||||
} else {
|
||||
// No free registers - allocate on stack
|
||||
// code.push(format!("\tstw {}, bpr, {}", source_reg, self.stack_offset));
|
||||
// self.variable_locations
|
||||
// .insert(var_name.to_string(), Location::Stack(self.stack_offset));
|
||||
// self.stack_offset -= 4; // Move to next stack slot
|
||||
//
|
||||
todo!(
|
||||
"we should spill other registers and keep this variable on the stack as it's more recent!"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
code
|
||||
}
|
||||
|
||||
/// Spill a register to the stack
|
||||
/// Returns assembly code to perform the spill
|
||||
pub fn spill_register(&mut self, reg: &str) -> Result<Vec<String>, CompilerError> {
|
||||
let mut code = Vec::new();
|
||||
|
||||
if let Some(var_name) = self.register_contents.get(reg).cloned() {
|
||||
// PUSH register to stack (spr decrements automatically)
|
||||
code.push(format!(
|
||||
"\tpush {} // bpr{}: {}",
|
||||
reg, self.stack_offset, var_name
|
||||
));
|
||||
|
||||
// Track that we pushed one word
|
||||
self.stack_offset -= 4;
|
||||
|
||||
// Update variable location - it's now at current spr
|
||||
// Note: We track offset from bpr for consistency
|
||||
self.variable_locations
|
||||
.insert(var_name.clone(), Location::Stack(self.stack_offset));
|
||||
|
||||
// Remove from register tracking
|
||||
self.register_contents.remove(reg);
|
||||
}
|
||||
|
||||
Ok(code)
|
||||
}
|
||||
|
||||
/// Find a free register (not currently in use)
|
||||
fn find_free_register(&self) -> Option<String> {
|
||||
for reg in &self.available_registers {
|
||||
if !self.in_use.get(reg).unwrap_or(&false) {
|
||||
return Some(reg.clone());
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Spill all registers to stack (useful before function calls)
|
||||
pub fn spill_all(&mut self) -> Vec<String> {
|
||||
let mut code = Vec::new();
|
||||
|
||||
let regs_to_spill: Vec<String> = self.register_contents.keys().cloned().collect();
|
||||
|
||||
for reg in regs_to_spill {
|
||||
if let Ok(spill_code) = self.spill_register(®) {
|
||||
code.extend(spill_code);
|
||||
}
|
||||
}
|
||||
|
||||
code
|
||||
}
|
||||
|
||||
/// Get the total stack offset
|
||||
pub fn get_stack_offset(&self) -> i32 {
|
||||
self.stack_offset
|
||||
}
|
||||
|
||||
/// Get the total stack space needed for local variables
|
||||
pub fn get_stack_size(&self) -> i32 {
|
||||
-self.stack_offset // Convert negative offset to positive size
|
||||
}
|
||||
|
||||
/// Reset allocator for a new function
|
||||
pub fn reset(&mut self) {
|
||||
self.variable_locations.clear();
|
||||
self.register_contents.clear();
|
||||
self.stack_offset = -4;
|
||||
self.in_use.clear();
|
||||
}
|
||||
|
||||
/// Mark a variable as dead (no longer needed)
|
||||
/// Frees its register if it's in one
|
||||
pub fn free_var(&mut self, var_name: &str) {
|
||||
if let Some(Location::Register(reg)) = self.variable_locations.get(var_name) {
|
||||
let reg = reg.clone();
|
||||
self.register_contents.remove(®);
|
||||
self.in_use.insert(reg, false);
|
||||
}
|
||||
self.variable_locations.remove(var_name);
|
||||
}
|
||||
|
||||
/// Get list of registers that contain variables and are in use
|
||||
/// These need to be saved before function calls
|
||||
pub fn get_caller_saved_registers(&self) -> Vec<String> {
|
||||
self.register_contents
|
||||
.iter()
|
||||
.filter(|(reg, _)| *self.in_use.get(*reg).unwrap_or(&false))
|
||||
.map(|(reg, _)| reg.clone())
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Save caller-saved registers before a function call
|
||||
/// Returns assembly code to save them
|
||||
pub fn save_caller_saved(&mut self) -> Vec<String> {
|
||||
let mut code = Vec::new();
|
||||
|
||||
// For simplicity, save all currently used registers
|
||||
// In a more sophisticated compiler, you'd only save registers that are live
|
||||
for (reg, var_name) in self.register_contents.clone() {
|
||||
if *self.in_use.get(®).unwrap_or(&false) {
|
||||
code.push(format!("\tpush {}", reg));
|
||||
}
|
||||
}
|
||||
|
||||
code
|
||||
}
|
||||
|
||||
/// Restore caller-saved registers after a function call
|
||||
/// Returns assembly code to restore them
|
||||
pub fn restore_caller_saved(&mut self, saved_regs: &[String]) -> Vec<String> {
|
||||
let mut code = Vec::new();
|
||||
|
||||
// Restore in reverse order (LIFO)
|
||||
for reg in saved_regs.iter().rev() {
|
||||
code.push(format!("\tpop {}", reg));
|
||||
}
|
||||
|
||||
code
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_basic_allocation() {
|
||||
let mut allocator = RegisterAllocator::new();
|
||||
|
||||
let (reg1, code1) = allocator.alloc_temp().unwrap();
|
||||
assert_eq!(code1.len(), 0); // No spill needed
|
||||
assert_eq!(reg1, "rg0");
|
||||
|
||||
let (reg2, code2) = allocator.alloc_temp().unwrap();
|
||||
assert_eq!(code2.len(), 0);
|
||||
assert_eq!(reg2, "rg1");
|
||||
|
||||
allocator.free_temp(®1);
|
||||
|
||||
let (reg3, code3) = allocator.alloc_temp().unwrap();
|
||||
assert_eq!(code3.len(), 0);
|
||||
assert_eq!(reg3, "rg0"); // Reuses freed register
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_variable_allocation() {
|
||||
let mut allocator = RegisterAllocator::new();
|
||||
|
||||
let (reg, _) = allocator.alloc_var("x").unwrap();
|
||||
assert_eq!(reg, "rg0");
|
||||
|
||||
// Requesting same variable again should return same register
|
||||
let (reg2, _) = allocator.alloc_var("x").unwrap();
|
||||
assert_eq!(reg2, "rg0");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stack_allocation() {
|
||||
let mut allocator = RegisterAllocator::new();
|
||||
|
||||
// Allocate all 16 registers
|
||||
for i in 0..16 {
|
||||
allocator.alloc_var(&format!("var{}", i)).unwrap();
|
||||
}
|
||||
|
||||
// Next allocation should spill to stack
|
||||
let (reg, code) = allocator.alloc_var("var16").unwrap();
|
||||
assert!(code.len() > 0); // Should have spill code
|
||||
}
|
||||
}
|
||||
@@ -1,13 +0,0 @@
|
||||
use crate::parser::{CompilerError, Program};
|
||||
|
||||
pub struct Analyser;
|
||||
|
||||
impl Analyser {
|
||||
pub fn new() -> Self {
|
||||
Self
|
||||
}
|
||||
|
||||
pub fn analyse(&self, ast: Program) -> Result<(), CompilerError> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,135 @@
|
||||
#[must_use]
|
||||
pub fn build(src: &str) -> String {
|
||||
parse(src).join("\n")
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
#[expect(clippy::too_many_lines)]
|
||||
pub fn parse(src: &str) -> Vec<String> {
|
||||
let stack = "0x10000";
|
||||
let acc = "acc";
|
||||
let rga = "rga";
|
||||
|
||||
let bpr = "bpr";
|
||||
let spr = "spr";
|
||||
let mut instrs = Vec::<String>::new();
|
||||
|
||||
// Define symbols
|
||||
let print_start = "print";
|
||||
|
||||
let tokens = lex(src);
|
||||
|
||||
let mut idstack = Vec::<u32>::new();
|
||||
|
||||
// set up a stack
|
||||
instrs.push(format!("\tlwi {}, {}", stack, bpr));
|
||||
instrs.push(format!("\tmov {}, {}", bpr, spr));
|
||||
// set up the data pointer
|
||||
instrs.push(format!("{}: \t lwi 0x30000, {}", "main", rga));
|
||||
|
||||
for (id, tok) in tokens.iter().enumerate() {
|
||||
match tok {
|
||||
BfToken::Inc => {
|
||||
instrs.push(format!("\tinc {}", acc));
|
||||
}
|
||||
BfToken::Dec => {
|
||||
instrs.push(format!("\tdec {}", acc));
|
||||
}
|
||||
BfToken::IncPtr => {
|
||||
instrs.push(format!("\tstw {}, {}, 0", acc, rga));
|
||||
instrs.push(format!("\taddi {}, 4, {}", rga, rga));
|
||||
instrs.push(format!("\tlwd {}, {}, 0", rga, acc));
|
||||
}
|
||||
BfToken::DecPtr => {
|
||||
instrs.push(format!("\tstw {}, {}, 0", acc, rga));
|
||||
instrs.push(format!("\tsubi {}, 4, {}", rga, rga));
|
||||
instrs.push(format!("\tlwd {}, {}, 0", rga, acc));
|
||||
}
|
||||
BfToken::Out => {
|
||||
instrs.push(format!("\tpush {}", acc));
|
||||
instrs.push(format!("\tcall {}", print_start));
|
||||
instrs.push(format!("\tpop zero"));
|
||||
}
|
||||
BfToken::In => {
|
||||
instrs.push(format!("\tlwd 0x40000, {}, 0", acc));
|
||||
}
|
||||
BfToken::Forward => {
|
||||
let loop_start = format!("loop_start_{}", id);
|
||||
let loop_end = format!("loop_end_{}", id);
|
||||
idstack.push(id as u32);
|
||||
instrs.push(format!("\tcmp {}, zero", acc));
|
||||
instrs.push(format!("\tjeq {}, zero", loop_end));
|
||||
instrs.push(format!("{}: \tnop", loop_start));
|
||||
}
|
||||
BfToken::Back => {
|
||||
if let Some(start_id) = idstack.pop() {
|
||||
let loop_start = format!("loop_start_{}", start_id);
|
||||
let loop_end = format!("loop_end_{}", start_id);
|
||||
instrs.push(format!("\tcmp {}, zero", acc));
|
||||
instrs.push(format!("\tjne {}, zero", loop_start));
|
||||
instrs.push(format!("{}: \tnop", loop_end));
|
||||
} else {
|
||||
eprintln!("Warning: Unmatched ']' at position {}", id);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
instrs.push("\thlt".to_string());
|
||||
|
||||
insert_lib(&mut instrs);
|
||||
|
||||
instrs
|
||||
}
|
||||
|
||||
fn insert_lib(instrs: &mut Vec<String>) {
|
||||
let bpr = "bpr";
|
||||
let spr = "spr";
|
||||
let rg0 = "rg0";
|
||||
let rg1 = "rg1";
|
||||
|
||||
let print_start = "print";
|
||||
let current = "current";
|
||||
instrs.push(format!("\tdw {}, 0x20000", current));
|
||||
instrs.push(format!("{}: \tpush {}", print_start, bpr));
|
||||
instrs.push(format!("\tmov {}, {}", spr, bpr));
|
||||
instrs.push(format!("\tlwd {}, {}, 8", bpr, rg0));
|
||||
instrs.push(format!("\tlwd {}, {}, 0", current, rg1));
|
||||
instrs.push(format!("\tstb {}, {}, 0", rg0, rg1));
|
||||
instrs.push(format!("\taddi {}, 1, {}", rg1, rg1));
|
||||
instrs.push(format!("\tstw {}, {}, 0", rg1, current));
|
||||
instrs.push(format!("\tmov {}, {}", bpr, spr));
|
||||
instrs.push(format!("\tpop {}", bpr));
|
||||
instrs.push("\treturn".to_string());
|
||||
}
|
||||
|
||||
enum BfToken {
|
||||
Inc,
|
||||
Dec,
|
||||
IncPtr,
|
||||
DecPtr,
|
||||
Out,
|
||||
In,
|
||||
Forward,
|
||||
Back,
|
||||
}
|
||||
|
||||
fn lex(src: &str) -> Vec<BfToken> {
|
||||
src.chars()
|
||||
.filter_map(|c| match c {
|
||||
'+' => Some(BfToken::Inc),
|
||||
'-' => Some(BfToken::Dec),
|
||||
'>' => Some(BfToken::IncPtr),
|
||||
'<' => Some(BfToken::DecPtr),
|
||||
'.' => Some(BfToken::Out),
|
||||
',' => Some(BfToken::In),
|
||||
'[' => Some(BfToken::Forward),
|
||||
']' => Some(BfToken::Back),
|
||||
_ => None,
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn _create_symbol(id: u32) -> String {
|
||||
format!("label_{}", id)
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
use crate::model::CompilerError;
|
||||
|
||||
pub mod brainf;
|
||||
|
||||
pub fn build_specialised(ext: &str, data: &str) -> Option<Result<String, CompilerError>> {
|
||||
match ext {
|
||||
"bf" => {
|
||||
let res = brainf::build(data);
|
||||
Some(Ok(res))
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,429 @@
|
||||
# DSA Instruction Set Architecture Specification
|
||||
|
||||
## Overview
|
||||
|
||||
The Damn Simple Architecture (DSA) is a 32-bit RISC-style architecture designed for simplicity and educational purposes. This document provides the complete instruction set architecture specification, including all hardware instructions, registers, and encoding formats.
|
||||
|
||||
## Data Types and Sizes
|
||||
|
||||
| Type | Size | Alignment |
|
||||
|------|------|-----------|
|
||||
| Byte | 8 bits | 1-byte aligned |
|
||||
| Halfword | 16 bits | 2-byte aligned |
|
||||
| Word | 32 bits | 4-byte aligned |
|
||||
|
||||
**Note on Endianness:**
|
||||
- Instructions and numeric data in memory: Little-endian
|
||||
- Data defined via `db/dh/dw` directives: Big-endian (assembler-specific)
|
||||
|
||||
## Registers
|
||||
|
||||
DSA provides 32 programmer-accessible registers plus several internal system registers.
|
||||
|
||||
### Programmer-Accessible Registers
|
||||
|
||||
| Hex | Register | Type | Description |
|
||||
|-----|----------|------|-------------|
|
||||
| 0x00-0x0F | **rg0-rgf** | General Purpose | 16 general-purpose registers for variables and temporary values |
|
||||
| 0x10 | **acc** | Special | Accumulator for calculations and temporary storage<br/>⚠️ Used as scratch by pseudo-instructions - volatile |
|
||||
| 0x11 | **spr** | Special | Stack pointer - points to top of stack |
|
||||
| 0x12 | **bpr** | Special | Base pointer - used for stack frame management |
|
||||
| 0x13 | **ret** | Special | Return address register - used for function returns |
|
||||
| 0x14 | **idr** | Privileged | Interrupt descriptor table address<br/>Read/write triggers protection fault in user mode |
|
||||
| 0x15 | **mmr** | Privileged | Hardware memory map table address<br/>Read/write triggers protection fault in user mode |
|
||||
| 0x16 | **zero** | Read-only | Constant zero value<br/>Reads always return 0, writes are discarded |
|
||||
| 0x17 | **noreg** | Placeholder | Indicates unused register field<br/>Read/write triggers illegal instruction fault<br/>Can also be referenced as **null** |
|
||||
| 0x18-0x1F | - | Reserved | Reserved for future use |
|
||||
|
||||
**System Registers (indices 0x18-0x1C):**
|
||||
These exist in the encoding space but are internal to the CPU implementation:
|
||||
|
||||
| Hex | Register | Description |
|
||||
|-----|----------|-------------|
|
||||
| 0x18 | **mar** | Memory Address Register (CPU internal) |
|
||||
| 0x19 | **mdr** | Memory Data Register (CPU internal) |
|
||||
| 0x1A | **sts** | Status Register (CPU internal) |
|
||||
| 0x1B | **cir** | Current Instruction Register (CPU internal) |
|
||||
| 0x1C | **pcx** | Program Counter (read-only, special access) |
|
||||
|
||||
**Note on PCX (Program Counter):**
|
||||
- PCX can be read in certain contexts (e.g., stored during CALL)
|
||||
- Writing to PCX triggers a protection fault
|
||||
- PCX is automatically updated by jump and branch instructions
|
||||
|
||||
### Status Register (STS) Layout
|
||||
|
||||
The status register is a 32-bit register with the following flag bits:
|
||||
|
||||
| Bit | Name | Description | Boot Value |
|
||||
|-----|------|-------------|------------|
|
||||
| 0 | **Equal** | Set if last comparison result was equal | 0 |
|
||||
| 1 | **GreaterThan** | Set if last comparison result was greater than | 0 |
|
||||
| 2 | **GreaterThanOrEqual** | Set if last comparison was greater than or equal | 0 |
|
||||
| 3 | **LessThan** | Set if last comparison result was less than | 0 |
|
||||
| 4 | **LessThanOrEqual** | Set if last comparison was less than or equal | 0 |
|
||||
| 5 | **Zero** | Set if last arithmetic/logic operation result was zero | 0 |
|
||||
| 6-31 | - | Reserved | 0 |
|
||||
|
||||
## Instruction Encoding Formats
|
||||
|
||||
DSA uses three instruction encoding formats:
|
||||
|
||||
### R-Type (Register) Instructions
|
||||
|
||||
Used for operations with register operands only, including shifts.
|
||||
|
||||
```
|
||||
31-26 | 25-21 | 20-16 | 15-11 | 10-6 | 5-0
|
||||
--------+---------+---------+---------+--------+-------
|
||||
Opcode | SrcReg1 | SrcReg2 | DestReg | ShiftAmt | Unused
|
||||
```
|
||||
|
||||
- **Opcode** (6 bits): Instruction operation code
|
||||
- **SrcReg1** (5 bits): First source register
|
||||
- **SrcReg2** (5 bits): Second source register
|
||||
- **DestReg** (5 bits): Destination register
|
||||
- **ShiftAmt** (5 bits): Shift amount (for shift instructions only, must be 0 otherwise)
|
||||
- **Unused** (6 bits): Must be 0
|
||||
|
||||
**Important Rules:**
|
||||
- ShiftAmt must be 0 for non-shift instructions (else illegal instruction fault)
|
||||
- Unused register fields must be set to `noreg` (0x17) if not used
|
||||
- Using registers in unexpected positions may cause illegal instruction fault
|
||||
|
||||
### I-Type (Immediate) Instructions
|
||||
|
||||
Used for operations with a 16-bit immediate value.
|
||||
|
||||
```
|
||||
31-26 | 25-21 | 20-16 | 15-0
|
||||
--------+---------+---------+-------------
|
||||
Opcode | SrcReg | DestReg | 16-bit Immediate
|
||||
```
|
||||
|
||||
- **Opcode** (6 bits): Instruction operation code
|
||||
- **SrcReg** (5 bits): Source register (base for memory ops)
|
||||
- **DestReg** (5 bits): Destination register (or offset register for jumps)
|
||||
- **Immediate** (16 bits): Signed 16-bit immediate value or offset
|
||||
|
||||
**Usage:**
|
||||
- Arithmetic: Immediate is a signed value
|
||||
- Memory access: Immediate is a signed byte offset from base address
|
||||
- Branches: Immediate is a signed offset added to base register
|
||||
- Literal loads: Immediate is unsigned 16-bit value
|
||||
|
||||
### J-Type (Jump) Instructions
|
||||
|
||||
Used for absolute jumps with large address ranges.
|
||||
|
||||
```
|
||||
31-26 | 25-0
|
||||
--------+----------------------
|
||||
Opcode | 26-bit Address
|
||||
```
|
||||
|
||||
- **Opcode** (6 bits): Jump instruction code
|
||||
- **Address** (26 bits): Partial address for jump
|
||||
|
||||
**Address Calculation:**
|
||||
1. Left-shift the 26-bit address by 2 (word alignment)
|
||||
2. OR with upper 4 bits of current PCX
|
||||
3. Result is final 32-bit jump address
|
||||
|
||||
**Jump Range:** 256MB region around current PC (±128MB)
|
||||
|
||||
**Note:** J-type instructions are defined but currently unused. Use I-type JMP with register addressing for all jumps.
|
||||
|
||||
## Hardware Instructions
|
||||
|
||||
### Data Movement
|
||||
|
||||
| Hex | Mnemonic | Type | Operands | Description |
|
||||
|-----|----------|------|----------|-------------|
|
||||
| 0x00 | **NOP** | R | - | No operation - does nothing |
|
||||
| 0x01 | **MOV** | R | SrcReg, DestReg | Copy value from SrcReg to DestReg |
|
||||
| 0x02 | **MOVS** | R | SrcReg, DestReg | Copy with sign extension to fill 32 bits |
|
||||
|
||||
**MOV/MOVS Details:**
|
||||
- MOV performs direct copy (all 32 bits)
|
||||
- MOVS sign-extends the value (useful after byte/halfword loads)
|
||||
- Both instructions set the Zero flag if result is zero
|
||||
|
||||
### Memory Access - Load Instructions
|
||||
|
||||
All loads require proper alignment or trigger an alignment fault.
|
||||
|
||||
| Hex | Mnemonic | Type | Operands | Description |
|
||||
|-----|----------|------|----------|-------------|
|
||||
| 0x03 | **LDB** | I | BaseReg, DestReg, Offset | Load byte (8-bit), zero-extend to 32 bits |
|
||||
| 0x04 | **LDBS** | I | BaseReg, DestReg, Offset | Load byte (8-bit), sign-extend to 32 bits |
|
||||
| 0x05 | **LDH** | I | BaseReg, DestReg, Offset | Load halfword (16-bit), zero-extend to 32 bits |
|
||||
| 0x06 | **LDHS** | I | BaseReg, DestReg, Offset | Load halfword (16-bit), sign-extend to 32 bits |
|
||||
| 0x07 | **LDW** | I | BaseReg, DestReg, Offset | Load word (32-bit) |
|
||||
|
||||
**Load Operation:**
|
||||
- Effective address = BaseReg + SignExtend(Offset)
|
||||
- Offset is a signed 16-bit value
|
||||
- Alignment requirements:
|
||||
- LDB/LDBS: No alignment required (byte-aligned)
|
||||
- LDH/LDHS: Must be 2-byte aligned
|
||||
- LDW: Must be 4-byte aligned
|
||||
|
||||
**Encoding Note:**
|
||||
In machine code, the order is: BaseReg (SrcReg field), DestReg field, Offset (Immediate field)
|
||||
|
||||
### Memory Access - Store Instructions
|
||||
|
||||
All stores require proper alignment or trigger an alignment fault.
|
||||
|
||||
| Hex | Mnemonic | Type | Operands | Description |
|
||||
|-----|----------|------|----------|-------------|
|
||||
| 0x08 | **STB** | I | SrcReg, BaseReg, Offset | Store byte (8-bit) to memory |
|
||||
| 0x09 | **STH** | I | SrcReg, BaseReg, Offset | Store halfword (16-bit) to memory |
|
||||
| 0x0A | **STW** | I | SrcReg, BaseReg, Offset | Store word (32-bit) to memory |
|
||||
|
||||
**Store Operation:**
|
||||
- Effective address = BaseReg + SignExtend(Offset)
|
||||
- Offset is a signed 16-bit value
|
||||
- Only the relevant bits are stored (8, 16, or 32)
|
||||
- Alignment requirements:
|
||||
- STB: No alignment required (byte-aligned)
|
||||
- STH: Must be 2-byte aligned
|
||||
- STW: Must be 4-byte aligned
|
||||
|
||||
**Encoding Note:**
|
||||
In machine code: SrcReg (SrcReg field), BaseReg (DestReg field), Offset (Immediate field)
|
||||
|
||||
### Immediate Load Instructions
|
||||
|
||||
| Hex | Mnemonic | Type | Operands | Description |
|
||||
|-----|----------|------|----------|-------------|
|
||||
| 0x0B | **LLI** | I | Value, DestReg | Load 16-bit value into lower 16 bits<br/>⚠️ **CLEARS upper 16 bits!** |
|
||||
| 0x0C | **LUI** | I | Value, DestReg | Load 16-bit value into upper 16 bits<br/>Lower 16 bits unchanged |
|
||||
|
||||
**Usage for 32-bit Values:**
|
||||
```
|
||||
LLI 0x1234, rg0 ; rg0 = 0x00001234
|
||||
LUI 0xABCD, rg0 ; rg0 = 0xABCD1234
|
||||
```
|
||||
|
||||
**⚠️ CRITICAL:** Always execute LLI before LUI, as LLI clears the upper 16 bits!
|
||||
|
||||
**Note on LUI:** The assembler may shift the immediate value right by 16 bits when encoding, so specify the upper 16 bits directly (e.g., `LUI 0xABCD, rg0` not `LUI 0xABCD0000, rg0`).
|
||||
|
||||
**Encoding Note:**
|
||||
In machine code: Value (Immediate field), DestReg (SrcReg field for LLI, SrcReg field for LUI)
|
||||
|
||||
### Jump and Branch Instructions
|
||||
|
||||
| Hex | Mnemonic | Type | Operands | Description |
|
||||
|-----|----------|------|----------|-------------|
|
||||
| 0x0D | **JMP** | I | Offset, BaseReg | Unconditional jump to (BaseReg + Offset) |
|
||||
| 0x0E | **JEQ** | I | Offset, BaseReg | Jump if Equal flag set |
|
||||
| 0x0F | **JNE** | I | Offset, BaseReg | Jump if Equal flag NOT set |
|
||||
| 0x10 | **JGT** | I | Offset, BaseReg | Jump if GreaterThan flag set |
|
||||
| 0x11 | **JGE** | I | Offset, BaseReg | Jump if GreaterThan OR Equal flag set |
|
||||
| 0x12 | **JLT** | I | Offset, BaseReg | Jump if LessThan flag set |
|
||||
| 0x13 | **JLE** | I | Offset, BaseReg | Jump if LessThan OR Equal flag set |
|
||||
|
||||
**Jump Calculation:**
|
||||
- Target address = BaseReg + SignExtend(Offset)
|
||||
- If BaseReg = zero, this becomes absolute addressing with Offset
|
||||
- If BaseReg = ret, this becomes return-style addressing
|
||||
- Conditional jumps check flags in STS register
|
||||
|
||||
**Common Patterns:**
|
||||
```
|
||||
JMP label, zero ; Absolute jump to label address
|
||||
JMP 0, ret ; Jump to address in ret register
|
||||
JMP 4, ret ; Jump to (ret + 4)
|
||||
```
|
||||
|
||||
**Encoding Note:**
|
||||
In machine code: Offset (Immediate field), BaseReg (SrcReg field) (DestReg unused, set to noreg)
|
||||
|
||||
### Comparison
|
||||
|
||||
| Hex | Mnemonic | Type | Operands | Description |
|
||||
|-----|----------|------|----------|-------------|
|
||||
| 0x14 | **CMP** | R | Reg1, Reg2 | Compare Reg1 with Reg2, set flags in STS |
|
||||
|
||||
**Flag Setting:**
|
||||
- Equal: Set if Reg1 == Reg2
|
||||
- GreaterThan: Set if Reg1 > Reg2 (signed)
|
||||
- GreaterThanOrEqual: Set if Reg1 >= Reg2 (signed)
|
||||
- LessThan: Set if Reg1 < Reg2 (signed)
|
||||
- LessThanOrEqual: Set if Reg1 <= Reg2 (signed)
|
||||
- Zero: Set if (Reg1 - Reg2) == 0 (same as Equal)
|
||||
|
||||
**Encoding Note:**
|
||||
DestReg and ShiftAmt fields unused (set to noreg and 0)
|
||||
|
||||
### Arithmetic Instructions
|
||||
|
||||
| Hex | Mnemonic | Type | Operands | Description |
|
||||
|-----|----------|------|----------|-------------|
|
||||
| 0x15 | **INC** | R | Reg | Increment register by 1 |
|
||||
| 0x16 | **DEC** | R | Reg | Decrement register by 1 |
|
||||
| 0x19 | **ADD** | R | Src1, Src2, Dest | Dest = Src1 + Src2 |
|
||||
| 0x1A | **SUB** | R | Src1, Src2, Dest | Dest = Src1 - Src2 |
|
||||
| 0x25 | **IADD** | I | Src, Literal, Dest | Dest = Src + SignExtend(Literal) |
|
||||
| 0x26 | **ISUB** | I | Src, Literal, Dest | Dest = Src - SignExtend(Literal) |
|
||||
|
||||
**Flag Effects:**
|
||||
- Zero flag set if result is zero
|
||||
- Other flags undefined after arithmetic (use CMP for comparisons)
|
||||
|
||||
**Encoding Notes:**
|
||||
- INC/DEC: Reg in SrcReg1 field, DestReg set to noreg
|
||||
- IADD/ISUB: Immediate is signed 16-bit value, all three operands required
|
||||
|
||||
### Bitwise Logical Operations
|
||||
|
||||
| Hex | Mnemonic | Type | Operands | Description |
|
||||
|-----|----------|------|----------|-------------|
|
||||
| 0x1B | **AND** | R | Src1, Src2, Dest | Dest = Src1 & Src2 (bitwise AND) |
|
||||
| 0x1C | **OR** | R | Src1, Src2, Dest | Dest = Src1 \| Src2 (bitwise OR) |
|
||||
| 0x1D | **NOT** | R | Src, Dest | Dest = ~Src (bitwise NOT) |
|
||||
| 0x1E | **XOR** | R | Src1, Src2, Dest | Dest = Src1 ^ Src2 (bitwise XOR) |
|
||||
| 0x1F | **NAND** | R | Src1, Src2, Dest | Dest = ~(Src1 & Src2) (bitwise NAND) |
|
||||
| 0x20 | **NOR** | R | Src1, Src2, Dest | Dest = ~(Src1 \| Src2) (bitwise NOR) |
|
||||
| 0x21 | **XNOR** | R | Src1, Src2, Dest | Dest = ~(Src1 ^ Src2) (bitwise XNOR) |
|
||||
|
||||
**Flag Effects:**
|
||||
- Zero flag set if result is zero
|
||||
- Other flags undefined
|
||||
|
||||
**Encoding Note:**
|
||||
NOT uses only Src (SrcReg1) and Dest (DestReg); SrcReg2 unused (set to noreg)
|
||||
|
||||
### Shift Operations
|
||||
|
||||
| Hex | Mnemonic | Type | Operands | Description |
|
||||
|-----|----------|------|----------|-------------|
|
||||
| 0x17 | **SHL** | R | Reg, ShiftAmount | Shift Reg left by ShiftAmount bits<br/>Zero-fill from right |
|
||||
| 0x18 | **SHR** | R | Reg, ShiftAmount | Shift Reg right by ShiftAmount bits<br/>Zero-fill from left (logical shift) |
|
||||
|
||||
**Shift Amount:**
|
||||
- **Literal shifts**: ShiftAmount is a 5-bit literal (0-31) in assembly
|
||||
- Stored in ShiftAmt field of instruction
|
||||
- SrcReg2 set to noreg
|
||||
- **Register shifts**: ShiftAmount is a register containing shift value
|
||||
- Register specified in SrcReg2 field
|
||||
- ShiftAmt field must be 0
|
||||
- Only low 5 bits of register value used
|
||||
|
||||
**Note:** Current assembler implementation may only support literal shifts. Check assembler documentation.
|
||||
|
||||
**Flag Effects:**
|
||||
- Zero flag set if result is zero
|
||||
|
||||
**Encoding Notes:**
|
||||
- Reg in both SrcReg1 and DestReg fields (shifted in place)
|
||||
- For literal shifts: ShiftAmt field contains shift count, SrcReg2 = noreg
|
||||
- For register shifts: SrcReg2 contains register, ShiftAmt must be 0
|
||||
|
||||
### System and Control Instructions
|
||||
|
||||
| Hex | Mnemonic | Type | Operands | Description |
|
||||
|-----|----------|------|----------|-------------|
|
||||
| 0x22 | **INT** | I | InterruptCode | Trigger interrupt with 8-bit code<br/>Saves return address to ret register<br/>Sets bpr to kernel stack |
|
||||
| 0x23 | **IRT** | R | - | Return from interrupt<br/>Restores execution context |
|
||||
| 0x24 | **HLT** | R | - | Halt processor execution<br/>Stops fetch-decode-execute cycle |
|
||||
|
||||
**INT Behavior:**
|
||||
1. Save current PCX to ret register
|
||||
2. Switch bpr to kernel stack address
|
||||
3. Look up interrupt handler address in interrupt descriptor table (idr)
|
||||
4. Jump to handler at interrupt vector
|
||||
|
||||
**IRT Behavior:**
|
||||
1. Restore previous execution context
|
||||
2. Return to address in ret register
|
||||
3. Restore user stack pointer
|
||||
|
||||
**Encoding Notes:**
|
||||
- INT: InterruptCode in low 8 bits of Immediate field
|
||||
- IRT/HLT: All register fields set to noreg, ShiftAmt to 0
|
||||
|
||||
### Meta Instructions (Assembler/Linker)
|
||||
|
||||
These instructions are used by the assembler and linker but may not represent real CPU operations.
|
||||
|
||||
| Hex | Mnemonic | Description |
|
||||
|-----|----------|-------------|
|
||||
| 0x27 | **SEGMENT** | Segment marker (implementation-specific) |
|
||||
| 0x3E | **DATA** | Raw data embedding |
|
||||
|
||||
**Note:** The SEGMENT instruction opcode may vary between implementations (0x27 in assembler, 0x3F in some contexts). Consult your specific toolchain documentation.
|
||||
|
||||
## Instruction Summary Table
|
||||
|
||||
| Opcode | Mnemonic | Type | Category |
|
||||
|--------|----------|------|----------|
|
||||
| 0x00 | NOP | R | Control |
|
||||
| 0x01 | MOV | R | Data Movement |
|
||||
| 0x02 | MOVS | R | Data Movement |
|
||||
| 0x03 | LDB | I | Memory Load |
|
||||
| 0x04 | LDBS | I | Memory Load |
|
||||
| 0x05 | LDH | I | Memory Load |
|
||||
| 0x06 | LDHS | I | Memory Load |
|
||||
| 0x07 | LDW | I | Memory Load |
|
||||
| 0x08 | STB | I | Memory Store |
|
||||
| 0x09 | STH | I | Memory Store |
|
||||
| 0x0A | STW | I | Memory Store |
|
||||
| 0x0B | LLI | I | Immediate Load |
|
||||
| 0x0C | LUI | I | Immediate Load |
|
||||
| 0x0D | JMP | I | Jump |
|
||||
| 0x0E | JEQ | I | Branch |
|
||||
| 0x0F | JNE | I | Branch |
|
||||
| 0x10 | JGT | I | Branch |
|
||||
| 0x11 | JGE | I | Branch |
|
||||
| 0x12 | JLT | I | Branch |
|
||||
| 0x13 | JLE | I | Branch |
|
||||
| 0x14 | CMP | R | Comparison |
|
||||
| 0x15 | INC | R | Arithmetic |
|
||||
| 0x16 | DEC | R | Arithmetic |
|
||||
| 0x17 | SHL | R | Shift |
|
||||
| 0x18 | SHR | R | Shift |
|
||||
| 0x19 | ADD | R | Arithmetic |
|
||||
| 0x1A | SUB | R | Arithmetic |
|
||||
| 0x1B | AND | R | Logical |
|
||||
| 0x1C | OR | R | Logical |
|
||||
| 0x1D | NOT | R | Logical |
|
||||
| 0x1E | XOR | R | Logical |
|
||||
| 0x1F | NAND | R | Logical |
|
||||
| 0x20 | NOR | R | Logical |
|
||||
| 0x21 | XNOR | R | Logical |
|
||||
| 0x22 | INT | I | System |
|
||||
| 0x23 | IRT | R | System |
|
||||
| 0x24 | HLT | R | System |
|
||||
| 0x25 | IADD | I | Arithmetic |
|
||||
| 0x26 | ISUB | I | Arithmetic |
|
||||
| 0x27 | SEGMENT | - | Meta |
|
||||
| 0x3E | DATA | - | Meta |
|
||||
|
||||
## Exception Conditions
|
||||
|
||||
The following conditions trigger exceptions:
|
||||
|
||||
| Exception | Trigger Condition |
|
||||
|-----------|------------------|
|
||||
| **Illegal Instruction** | - Invalid opcode<br/>- noreg used as source/destination<br/>- ShiftAmt non-zero for non-shift instruction<br/>- Register field violations |
|
||||
| **Protection Fault** | - Write to pcx register<br/>- Read/write idr or mmr in user mode<br/>- Read from noreg<br/>- Write to zero register (discarded, no fault) |
|
||||
| **Alignment Fault** | - LDH/LDHS/STH with odd address<br/>- LDW/STW with address not divisible by 4 |
|
||||
| **Memory Access Violation** | - Access to unmapped or protected memory<br/>- Stack overflow/underflow |
|
||||
|
||||
## Calling Convention
|
||||
|
||||
See the DSA Assembly Language Reference for the complete calling convention and ABI specification.
|
||||
|
||||
## Notes on Design
|
||||
|
||||
1. **Word Size:** All addresses and general computation is 32-bit
|
||||
2. **Endianness:** Little-endian for instructions and runtime data; assembler data directives may use big-endian
|
||||
3. **Stack Growth:** Stack grows **downward** (toward lower addresses) - PUSH decrements SPR
|
||||
4. **Alignment:** Natural alignment required for halfword and word accesses
|
||||
5. **Sign Extension:** All immediate values are sign-extended unless noted
|
||||
6. **Zero Register:** Provides constant zero, writes are legal but discarded
|
||||
7. **Reserved Encodings:** Opcodes 0x27-0x3D and 0x3F reserved or implementation-specific
|
||||
@@ -263,12 +263,12 @@
|
||||
- [ ] Array syntax
|
||||
- [ ] Struct syntax
|
||||
- [x] Pointer syntax
|
||||
- [ ] Namespaced call syntax
|
||||
- [x] Namespaced call syntax
|
||||
- [x] AST node definitions
|
||||
- [ ] Error recovery mechanisms
|
||||
- [ ] Comprehensive parser tests
|
||||
- [ ] Syntax error message quality testing
|
||||
- [ ] Implement C frontend by moving lexer/parser from `c_compiler` to the new `compiler` project structure
|
||||
- [x] Implement C frontend by moving lexer/parser from `c_compiler` to the new `compiler` project structure
|
||||
- [ ] Evaluate possible memory management strategies (e.g., keep all variables on the stack vs spill only when calling functions)
|
||||
|
||||
---
|
||||
@@ -290,7 +290,7 @@
|
||||
- [ ] Optimize register allocation further
|
||||
- [x] Implement proper function calling conventions
|
||||
- [ ] Add constant folding optimization
|
||||
- [ ] Dead code elimination
|
||||
- [x] Dead code elimination
|
||||
- [ ] Test each feature thoroughly
|
||||
|
||||
---
|
||||
@@ -376,7 +376,7 @@
|
||||
**Dependencies:** None
|
||||
**Deliverable:** `docs/build-system-design.md`
|
||||
|
||||
- [ ] Define project structure conventions
|
||||
- [x] Define project structure conventions
|
||||
- [ ] Design build manifest format (`dsa-project.toml` or similar)
|
||||
- [ ] Dependency resolution strategy
|
||||
- [ ] Build cache design
|
||||
@@ -391,12 +391,12 @@
|
||||
**Dependencies:** 3.1.1, 1.2.2, 1.1.3, 2.1.3
|
||||
**Deliverable:** `dsa-build` executable
|
||||
|
||||
- [ ] Create crate: `dsa-build`
|
||||
- [x] Create crate: `dsa-build`
|
||||
- [ ] Manifest parser
|
||||
- [ ] Dependency graph builder
|
||||
- [ ] Task orchestrator
|
||||
- [ ] Compilation tasks
|
||||
- [ ] Assembly tasks
|
||||
- [x] Compilation tasks
|
||||
- [x] Assembly tasks
|
||||
- [ ] Linking tasks
|
||||
- [ ] Build cache implementation
|
||||
- [ ] Parallel build support
|
||||
@@ -412,11 +412,11 @@
|
||||
**Dependencies:** 3.1.2
|
||||
**Deliverable:** Enhanced `dsa-build` with project management
|
||||
|
||||
- [ ] `dsa new <project>` — Create new project
|
||||
- [ ] `dsa init` — Initialize in existing directory
|
||||
- [x] `dsa new <project>` — Create new project
|
||||
- [x] `dsa init` — Initialize in existing directory
|
||||
- [ ] `dsa add <dependency>` — Add dependency
|
||||
- [ ] Binary vs library project types
|
||||
- [ ] Template system for project scaffolding
|
||||
- [x] Template system for project scaffolding
|
||||
- [ ] Documentation for each command
|
||||
|
||||
---
|
||||
@@ -0,0 +1,638 @@
|
||||
# DSA Implementation vs Documentation Discrepancies
|
||||
|
||||
## Critical Discrepancies
|
||||
|
||||
### 1. **Stack Growth Direction** ❌ CRITICAL
|
||||
|
||||
**Documentation states:** Stack grows upward (toward higher addresses)
|
||||
|
||||
**Implementation shows (expand.rs:44-51):**
|
||||
```rust
|
||||
fn expand_push(current: &Node, nodes: &mut Vec<Node>) -> Result<(), AssembleError> {
|
||||
// ...
|
||||
nodes.extend(vec![
|
||||
node!(label, Opcode::SubI, spr, 4, spr), // spr = spr - 4
|
||||
node!(None, Opcode::Stw, reg, spr, 0),
|
||||
]);
|
||||
```
|
||||
|
||||
**Implementation shows (expand.rs:130-137):**
|
||||
```rust
|
||||
fn expand_pop(current: &Node, nodes: &mut Vec<Node>) -> Result<(), AssembleError> {
|
||||
// ...
|
||||
nodes.extend(vec![
|
||||
node!(label, Opcode::Ldw, spr, reg, 0),
|
||||
node!(None, Opcode::AddI, spr, 4, spr), // spr = spr + 4
|
||||
]);
|
||||
```
|
||||
|
||||
**Reality:** Stack grows **DOWNWARD** (toward lower addresses)
|
||||
- PUSH: Decrements SPR by 4, then stores
|
||||
- POP: Loads, then increments SPR by 4
|
||||
|
||||
**Impact:** All documentation examples and calling convention diagrams are backwards!
|
||||
|
||||
---
|
||||
|
||||
### 2. **CALL Pseudo-instruction Expansion** ❌ CRITICAL
|
||||
|
||||
**Documentation states (DSA_Assembly_Reference.md):**
|
||||
```asm
|
||||
; call print::print expands to:
|
||||
lwi print::print, ret ; Load function address into ret
|
||||
jmp 0, ret ; Jump to function (saves return in pcx)
|
||||
```
|
||||
|
||||
**Implementation shows (expand.rs:109-123):**
|
||||
```rust
|
||||
fn expand_call(current: &Node, nodes: &mut Vec<Node>) -> Result<(), AssembleError> {
|
||||
nodes.extend(vec![
|
||||
node!(label, Opcode::SubI, spr, 4, spr), // Decrement stack pointer
|
||||
node!(None, Opcode::Stw, pcx, spr, 0), // Store PCX (return addr) on stack
|
||||
node!(None, Opcode::Jmp, addr, zero), // Jump to function
|
||||
]);
|
||||
```
|
||||
|
||||
**Reality:** CALL expansion is:
|
||||
1. Decrement SPR by 4
|
||||
2. Store PCX (return address) to stack
|
||||
3. Jump to function address
|
||||
|
||||
**Impact:** Return address is stored on the STACK, not in RET register!
|
||||
|
||||
---
|
||||
|
||||
### 3. **RETURN Pseudo-instruction Expansion** ❌ CRITICAL
|
||||
|
||||
**Documentation states:**
|
||||
```asm
|
||||
; return expands to:
|
||||
jmp 0, ret ; Jump to address in ret register
|
||||
```
|
||||
|
||||
**Implementation shows (expand.rs:125-135):**
|
||||
```rust
|
||||
fn expand_return(current: &Node, nodes: &mut Vec<Node>) {
|
||||
nodes.extend(vec![
|
||||
node!(label, Opcode::Ldw, spr, ret, 0), // Load return addr from stack
|
||||
node!(None, Opcode::AddI, spr, 4, spr), // Increment stack pointer
|
||||
node!(None, Opcode::Jmp, 4, ret), // Jump to (ret + 4)
|
||||
]);
|
||||
}
|
||||
```
|
||||
|
||||
**Reality:** RETURN expansion is:
|
||||
1. Load return address from stack into RET register
|
||||
2. Increment SPR by 4
|
||||
3. Jump to (RET + 4)
|
||||
|
||||
**Why +4?** The stored PCX points to the instruction AFTER the call's jump, so we need to add 4 to skip past the stored PCX instruction itself... or this might be a bug in the implementation.
|
||||
|
||||
**Impact:** Return mechanism is completely different from documentation!
|
||||
|
||||
---
|
||||
|
||||
### 4. **Calling Convention - Stack Frame Layout** ❌ CRITICAL
|
||||
|
||||
**Documentation states:**
|
||||
```
|
||||
Higher Addresses
|
||||
├─────────────┤
|
||||
│ Arg N │ ← spr + (8 + 4*(N-1))
|
||||
│ ... │
|
||||
│ Arg 2 │ ← spr + 16
|
||||
│ Arg 1 │ ← spr + 12
|
||||
│ Arg 0 │ ← spr + 8
|
||||
├─────────────┤
|
||||
│ Ret Addr │ ← spr + 4
|
||||
├─────────────┤
|
||||
│ Old BPR │ ← spr + 0
|
||||
├─────────────┤ ← bpr, spr
|
||||
│ Locals │
|
||||
Lower Addresses
|
||||
```
|
||||
|
||||
**Reality based on implementation:**
|
||||
Since stack grows DOWN:
|
||||
```
|
||||
Lower Addresses
|
||||
├─────────────┤ ← Current SPR/BPR
|
||||
│ Old BPR │ ← spr + 0 (immediately above SPR)
|
||||
├─────────────┤
|
||||
│ Ret Addr │ ← spr + 4 (pushed by CALL)
|
||||
├─────────────┤
|
||||
│ Arg 0 │ ← spr + 8
|
||||
│ Arg 1 │ ← spr + 12
|
||||
│ Arg 2 │ ← spr + 16
|
||||
│ ... │
|
||||
│ Arg N │ ← spr + (8 + 4*(N-1))
|
||||
├─────────────┤
|
||||
Higher Addresses
|
||||
```
|
||||
|
||||
**The diagram needs to be flipped!** The offsets are correct, but the direction is wrong.
|
||||
|
||||
---
|
||||
|
||||
### 5. **Label-Based Load/Store Scratch Register** ⚠️ IMPORTANT
|
||||
|
||||
**Documentation states:** Uses `rgf` as scratch register
|
||||
|
||||
**Implementation confirms (expand.rs:138-153):**
|
||||
```rust
|
||||
fn expand_ldx(current: &Node, nodes: &mut Vec<Node>) -> Result<(), AssembleError> {
|
||||
// For ldb label, reg:
|
||||
nodes.extend(vec![
|
||||
node!(current.label(), Opcode::Lli, name, reg),
|
||||
node!(None, Opcode::Lui, name, reg),
|
||||
node!(None, opcode, reg, reg, offset),
|
||||
]);
|
||||
```
|
||||
|
||||
**Wait! This is WRONG in the implementation!**
|
||||
|
||||
The load expansion uses the DESTINATION register as scratch:
|
||||
```asm
|
||||
ldb buffer, rg2 expands to:
|
||||
lli buffer, rg2 ; Uses rg2 as destination
|
||||
lui buffer, rg2 ; Uses rg2 as destination
|
||||
ldb rg2, rg2, 0 ; Uses rg2 as base
|
||||
```
|
||||
|
||||
**Documentation says it should use rgf:**
|
||||
```asm
|
||||
ldb buffer, rg2 expands to:
|
||||
lli buffer, rgf ; Uses rgf as scratch
|
||||
lui buffer, rgf ; Uses rgf as scratch
|
||||
ldb rgf, rg2, 0 ; Load from rgf into rg2
|
||||
```
|
||||
|
||||
**For stores (expand.rs:155-176):**
|
||||
```rust
|
||||
fn expand_stx(current: &Node, nodes: &mut Vec<Node>) -> Result<(), AssembleError> {
|
||||
// For stb reg, label:
|
||||
let temp = Token::Register(Register::Acc); // Uses ACC, not RGF!
|
||||
|
||||
nodes.extend(vec![
|
||||
node!(current.label(), Opcode::Lli, dest, temp),
|
||||
node!(None, Opcode::Lui, dest, temp),
|
||||
node!(None, opcode, base, temp, offset),
|
||||
]);
|
||||
```
|
||||
|
||||
**Reality:**
|
||||
- Load pseudo-instructions use the DESTINATION register as scratch
|
||||
- Store pseudo-instructions use the ACC register as scratch, NOT rgf
|
||||
|
||||
**Impact:** Documentation is incorrect about which registers are used!
|
||||
|
||||
---
|
||||
|
||||
### 6. **LWI Pseudo-instruction** ✅ CORRECT
|
||||
|
||||
**Documentation and implementation agree:**
|
||||
```rust
|
||||
fn expand_lwi(current: &Node, nodes: &mut Vec<Node>) -> Result<(), AssembleError> {
|
||||
nodes.extend(vec![
|
||||
node!(current.label(), Opcode::Lli, val, reg),
|
||||
node!(None, Opcode::Lui, val, reg),
|
||||
]);
|
||||
```
|
||||
|
||||
This matches the documented expansion.
|
||||
|
||||
---
|
||||
|
||||
### 7. **PUSHA/POPA Pseudo-instructions** 📝 UNDOCUMENTED
|
||||
|
||||
**These exist in implementation but are NOT in documentation!**
|
||||
|
||||
**expand.rs:53-76:**
|
||||
```rust
|
||||
fn expand_pusha(current: &Node, nodes: &mut Vec<Node>) -> Result<(), AssembleError> {
|
||||
let count = expect_token!(arg0, Immediate)?;
|
||||
let spr = Token::Register(Register::Spr);
|
||||
let registers: Vec<Register> = Register::general();
|
||||
|
||||
nodes.push(node!(label, Opcode::SubI, spr, Token::Immediate(count * 4), spr));
|
||||
|
||||
nodes.extend((0..count).rev().map(|i| {
|
||||
node!(None, Opcode::Stw,
|
||||
Token::Register(registers[i as usize]),
|
||||
spr,
|
||||
Token::Immediate(i * 4)
|
||||
)
|
||||
}));
|
||||
```
|
||||
|
||||
**expand.rs:78-101:**
|
||||
```rust
|
||||
fn expand_popa(current: &Node, nodes: &mut Vec<Node>) -> Result<(), AssembleError> {
|
||||
let count = expect_token!(arg0, Immediate)?;
|
||||
|
||||
nodes.extend((0..count).rev().map(|i| {
|
||||
node!(
|
||||
{ if i == 0 { label.clone() } else { None } },
|
||||
Opcode::Ldw,
|
||||
spr,
|
||||
Token::Register(registers[i as usize]),
|
||||
Token::Immediate(i * 4)
|
||||
)
|
||||
}));
|
||||
|
||||
nodes.push(node!(None, Opcode::AddI, spr, Token::Immediate(count * 4), spr));
|
||||
```
|
||||
|
||||
**What they do:**
|
||||
- `pusha N` - Push first N general-purpose registers (rg0-rgN) to stack
|
||||
- `popa N` - Pop first N general-purpose registers from stack
|
||||
|
||||
**Missing from documentation entirely!**
|
||||
|
||||
---
|
||||
|
||||
### 8. **Register Index Encoding** ⚠️ IMPORTANT
|
||||
|
||||
**Documentation states:** System registers like MAR, MDR, STS, CIR, PCX are "internal" and not accessible
|
||||
|
||||
**Implementation shows (instructions.rs:148-153):**
|
||||
```rust
|
||||
0x18 => Self::Mar,
|
||||
0x19 => Self::Mdr,
|
||||
0x1A => Self::Sts,
|
||||
0x1B => Self::Cir,
|
||||
0x1C => Self::Pcx,
|
||||
```
|
||||
|
||||
**Reality:** These registers ARE encoded in the instruction format at indices 0x18-0x1C!
|
||||
|
||||
**However, instructions.rs:186 shows:**
|
||||
```rust
|
||||
"null" => Ok(Self::NoReg), // Can parse "null" as NoReg
|
||||
```
|
||||
|
||||
**Documentation never mentions "null" as an alternative name for noreg!**
|
||||
|
||||
---
|
||||
|
||||
### 9. **LUI Immediate Value Handling** ⚠️ IMPORTANT
|
||||
|
||||
**Documentation states:**
|
||||
```
|
||||
lui immediate, dest_reg ; Load immediate into upper 16 bits
|
||||
```
|
||||
|
||||
**Implementation shows (codegen.rs:248-254):**
|
||||
```rust
|
||||
fn build_load_immediate_instruction(...) -> Result<Instruction, AssembleError> {
|
||||
// ...
|
||||
match opcode {
|
||||
Opcode::Lli => {
|
||||
let instruction_args = args!(I, immediate: value as u16, r1: dest);
|
||||
Ok(Instruction::LoadLowerImmediate(instruction_args))
|
||||
}
|
||||
Opcode::Lui => {
|
||||
let upper_value = value >> 16; // Shifts right by 16!
|
||||
let instruction_args = args!(I, immediate: upper_value as u16, r1: dest);
|
||||
Ok(Instruction::LoadUpperImmediate(instruction_args))
|
||||
}
|
||||
```
|
||||
|
||||
**Reality:** When assembling `lui immediate, reg`, the assembler:
|
||||
1. Takes the immediate value
|
||||
2. Shifts it RIGHT by 16 bits
|
||||
3. Stores the result in the instruction
|
||||
|
||||
**This means:**
|
||||
```asm
|
||||
lli 0x1234, rg0 ; Stores 0x1234 in lower 16 bits
|
||||
lui 0xABCD0000, rg0 ; Right-shifts to 0xABCD, stores in upper 16 bits
|
||||
```
|
||||
|
||||
**Or more likely, the assembler expects:**
|
||||
```asm
|
||||
lli 0x1234, rg0 ; Stores 0x1234 in lower 16 bits
|
||||
lui 0xABCD, rg0 ; Stores 0xABCD in upper 16 bits (no shift needed)
|
||||
```
|
||||
|
||||
**Documentation needs clarification on what immediate value format LUI expects!**
|
||||
|
||||
---
|
||||
|
||||
### 10. **Data Definition Encoding** ⚠️ IMPORTANT
|
||||
|
||||
**Implementation (expand.rs:217-267):**
|
||||
```rust
|
||||
fn process_dx_data(args: Vec<Token>, size: usize) -> Result<Vec<u32>, AssembleError> {
|
||||
for token in args {
|
||||
match token {
|
||||
Token::StringLit(mut s) => {
|
||||
s.push('\0'); // Automatically adds null terminator!
|
||||
for ch in s.chars() {
|
||||
let mut char_buf = [0u8; 4];
|
||||
let char_bytes = ch.encode_utf8(&mut char_buf);
|
||||
buffer.extend_from_slice(char_bytes.as_bytes());
|
||||
}
|
||||
}
|
||||
Token::Immediate(value) => {
|
||||
buffer.extend_from_slice(&value.to_be_bytes()); // BIG ENDIAN!
|
||||
}
|
||||
```
|
||||
|
||||
**Key findings:**
|
||||
1. String literals automatically get null terminator appended
|
||||
2. Numeric values are stored in **BIG ENDIAN** format (to_be_bytes)
|
||||
3. Documentation says "little-endian byte order" globally
|
||||
|
||||
**Contradiction:** Data definition uses BIG ENDIAN, but doc says LITTLE ENDIAN!
|
||||
|
||||
---
|
||||
|
||||
### 11. **Segment Instruction** 📝 UNDOCUMENTED
|
||||
|
||||
**Implementation has a SEGMENT instruction (0x27/0x3F):**
|
||||
```rust
|
||||
Segment(u32) = 0x3F,
|
||||
```
|
||||
|
||||
**This is completely undocumented!**
|
||||
|
||||
From model.rs:
|
||||
```rust
|
||||
Self::Segment => write!(f, "[SEGMENT]"),
|
||||
```
|
||||
|
||||
From codegen.rs:
|
||||
```rust
|
||||
Opcode::Segment => build_segment_instruction(&args),
|
||||
```
|
||||
|
||||
**Purpose unclear, needs documentation!**
|
||||
|
||||
---
|
||||
|
||||
### 12. **Data Instruction** 📝 UNDOCUMENTED
|
||||
|
||||
**Implementation has a DATA instruction (0x3E):**
|
||||
```rust
|
||||
Data(u32) = 0x3E,
|
||||
```
|
||||
|
||||
**This appears to be a meta-instruction for embedding raw data, but it's undocumented in the assembly reference!**
|
||||
|
||||
---
|
||||
|
||||
### 13. **INC/DEC Instruction Encoding** ⚠️ MINOR
|
||||
|
||||
**Implementation (codegen.rs:293-299):**
|
||||
```rust
|
||||
fn build_inc_dec_instruction(opcode: Opcode, args: &[Token]) -> Result<Instruction, AssembleError> {
|
||||
let reg = expect_token!(reg_token, Register)?;
|
||||
match opcode {
|
||||
Opcode::Inc => Ok(Instruction::Increment(args!(R, sr1: reg))),
|
||||
Opcode::Dec => Ok(Instruction::Decrement(args!(R, sr1: reg))),
|
||||
```
|
||||
|
||||
**Reality:** INC/DEC only set SR1 field, not DR field.
|
||||
|
||||
**But args.rs shows:**
|
||||
```rust
|
||||
impl RTypeArgs {
|
||||
pub fn new(...) -> Self {
|
||||
let sr1 = sr1.unwrap_or_default(); // Defaults to NoReg
|
||||
let dr = dr.unwrap_or_default(); // Defaults to NoReg
|
||||
```
|
||||
|
||||
**So the DR field gets set to NoReg, which is correct per documentation.**
|
||||
|
||||
**However, the Display impl (instructions.rs:449) shows:**
|
||||
```rust
|
||||
Self::Increment(a) | Self::Decrement(a) => write!(f, " {}", a.sr1),
|
||||
```
|
||||
|
||||
**This is correct - only shows SR1 in disassembly.**
|
||||
|
||||
---
|
||||
|
||||
### 14. **Shift Instruction Operand Order** ⚠️ MINOR
|
||||
|
||||
**Implementation (codegen.rs:301-312):**
|
||||
```rust
|
||||
fn build_shift_instruction(opcode: Opcode, args: &[Token]) -> Result<Instruction, AssembleError> {
|
||||
let reg = expect_token!(reg_token, Register)?;
|
||||
let amount = expect_token!(amount_token, Immediate)? as u8;
|
||||
|
||||
match opcode {
|
||||
Opcode::Shl => Ok(Instruction::ShiftLeft(args!(R, sr1: reg, shamt: amount))),
|
||||
```
|
||||
|
||||
**This only handles LITERAL shift amounts, not REGISTER shift amounts!**
|
||||
|
||||
**Documentation states both are supported:**
|
||||
```asm
|
||||
shl rg0, 2 ; Literal shift
|
||||
shl rg0, rg1 ; Register shift
|
||||
```
|
||||
|
||||
**The current codegen only handles the literal case!**
|
||||
|
||||
**This is a BUG in the implementation - register shifts aren't properly assembled!**
|
||||
|
||||
---
|
||||
|
||||
### 15. **Jump Instruction Operand Order** ⚠️ CONFUSION
|
||||
|
||||
**Documentation shows assembly syntax:**
|
||||
```asm
|
||||
jmp addr [, offset_reg]
|
||||
```
|
||||
|
||||
**But implementation (codegen.rs:256-270):**
|
||||
```rust
|
||||
fn build_jump_instruction(opcode: Opcode, args: &[Token]) -> Result<Instruction, AssembleError> {
|
||||
let address = expect_token!(address_token, Immediate)?;
|
||||
let offset = expect_token!(offset_token, Register)?;
|
||||
let instruction_args = args!(I, immediate: address as u16, r1: offset);
|
||||
```
|
||||
|
||||
**This expects:**
|
||||
1. First arg: immediate (address)
|
||||
2. Second arg: register (offset)
|
||||
|
||||
**So assembly syntax should be:**
|
||||
```asm
|
||||
jmp immediate, offset_register
|
||||
```
|
||||
|
||||
**Example:**
|
||||
```asm
|
||||
jmp 0x1000, zero ; Jump to 0x1000
|
||||
jmp 4, ret ; Jump to (ret + 4)
|
||||
```
|
||||
|
||||
**Documentation syntax is correct, but parameter names are confusing!**
|
||||
|
||||
The "address" is actually an OFFSET, and the register is the BASE!
|
||||
|
||||
**Better naming:**
|
||||
```asm
|
||||
jmp offset, base_register
|
||||
; Target = base_register + offset
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 16. **NOT Instruction Operand Count** ✅ MINOR ISSUE
|
||||
|
||||
**Documentation shows:**
|
||||
```asm
|
||||
not src, dest ; Two operands
|
||||
```
|
||||
|
||||
**Implementation (instructions.rs:428-429):**
|
||||
```rust
|
||||
Self::Compare(args) | Self::Not(args) => {
|
||||
write!(f, " {}, {}", args.sr1, args.sr2)
|
||||
}
|
||||
```
|
||||
|
||||
**This displays BOTH sr1 and sr2 for NOT!**
|
||||
|
||||
**But codegen.rs:354-362:**
|
||||
```rust
|
||||
fn build_not_instruction(args: &[Token]) -> Result<Instruction, AssembleError> {
|
||||
let reg = expect_token!(reg_token, Register)?;
|
||||
let dest = expect_token!(dest_token, Register)?;
|
||||
Ok(Instruction::Not(args!(R, sr1: reg, dr: dest)))
|
||||
```
|
||||
|
||||
**Sets sr1 and dr, NOT sr1 and sr2!**
|
||||
|
||||
**The Display impl is WRONG - should show sr1 and dr:**
|
||||
```rust
|
||||
Self::Not(args) => write!(f, " {}, {}", args.sr1, args.dr)
|
||||
```
|
||||
|
||||
**This is a display bug in the implementation!**
|
||||
|
||||
---
|
||||
|
||||
### 17. **Register File Indexing** ✅ CORRECT
|
||||
|
||||
**Documentation and implementation both agree:**
|
||||
- 0x00-0x0F: rg0-rgf (general purpose)
|
||||
- 0x10: acc
|
||||
- 0x11: spr
|
||||
- 0x12: bpr
|
||||
- 0x13: ret
|
||||
- 0x14: idr
|
||||
- 0x15: mmr
|
||||
- 0x16: zero
|
||||
- 0x17: noreg
|
||||
|
||||
**This matches perfectly.**
|
||||
|
||||
---
|
||||
|
||||
### 18. **Immediate Arithmetic Destination** ⚠️ MINOR
|
||||
|
||||
**Implementation (codegen.rs:314-330):**
|
||||
```rust
|
||||
fn build_arithmetic_immediate_instruction(...) -> Result<Instruction, AssembleError> {
|
||||
let reg = expect_token!(reg_token, Register)?;
|
||||
let immediate = expect_token!(immediate_token, Immediate)? as u16;
|
||||
let dest = expect_token!(dest_token, Register)?;
|
||||
let instruction_args = args!(I, immediate: immediate, r1: reg, r2: dest);
|
||||
```
|
||||
|
||||
**This REQUIRES three arguments:**
|
||||
1. Source register
|
||||
2. Immediate value
|
||||
3. Destination register
|
||||
|
||||
**But documentation says destination is optional:**
|
||||
```
|
||||
iadd src_reg, imm [, dest_reg] ; dest optional
|
||||
```
|
||||
|
||||
**Reality:** The assembler REQUIRES the destination register!
|
||||
|
||||
**If you want in-place operation:**
|
||||
```asm
|
||||
iadd rg0, 10, rg0 ; Required to specify rg0 twice
|
||||
```
|
||||
|
||||
**Not:**
|
||||
```asm
|
||||
iadd rg0, 10 ; This won't work!
|
||||
```
|
||||
|
||||
**Documentation is misleading - destination is NOT optional!**
|
||||
|
||||
---
|
||||
|
||||
### 19. **Memory Instruction Offsets** ✅ CORRECT
|
||||
|
||||
**Implementation correctly handles signed 16-bit offsets:**
|
||||
```rust
|
||||
let offset = expect_token!(offset_token, Immediate)? as u16;
|
||||
```
|
||||
|
||||
**These are stored as u16 but interpreted as signed i16 at runtime.**
|
||||
|
||||
**Documentation is correct about this.**
|
||||
|
||||
---
|
||||
|
||||
### 20. **Instruction Opcode Values** ✅ VERIFIED
|
||||
|
||||
Comparing model.rs opcodes with instructions.rs:
|
||||
|
||||
| Instruction | model.rs | instructions.rs | Match |
|
||||
|-------------|----------|-----------------|-------|
|
||||
| Nop | 0x00 | 0x0 | ✅ |
|
||||
| Mov | 0x01 | 0x1 | ✅ |
|
||||
| MovSigned | 0x02 | 0x2 | ✅ |
|
||||
| LoadByte | 0x03 | 0x3 | ✅ |
|
||||
| ... | ... | ... | ✅ |
|
||||
| AddImmediate | 0x25 | 0x25 | ✅ |
|
||||
| SubImmediate | 0x26 | 0x26 | ✅ |
|
||||
| Segment | 0x27 | 0x3F | ❌ MISMATCH! |
|
||||
|
||||
**CRITICAL:** Segment instruction has opcode **0x27** in model.rs but **0x3F** in instructions.rs!
|
||||
|
||||
---
|
||||
|
||||
## Summary of Critical Issues
|
||||
|
||||
### Must Fix in Documentation:
|
||||
|
||||
1. ✅ **Stack grows DOWNWARD** - flip all diagrams
|
||||
2. ✅ **CALL expansion** - uses stack, not ret register directly
|
||||
3. ✅ **RETURN expansion** - loads from stack, jumps to ret+4
|
||||
4. ✅ **Stack frame layout** - flip diagram vertically
|
||||
5. ✅ **Load pseudo scratch register** - uses DEST reg, not rgf
|
||||
6. ✅ **Store pseudo scratch register** - uses ACC, not rgf
|
||||
7. ✅ **Add PUSHA/POPA documentation**
|
||||
8. ✅ **Add SEGMENT instruction documentation**
|
||||
9. ✅ **Add DATA instruction documentation**
|
||||
10. ✅ **Clarify LUI immediate value handling**
|
||||
11. ✅ **Fix endianness** - data definition uses BIG endian
|
||||
12. ✅ **IADD/ISUB destination NOT optional**
|
||||
13. ✅ **Add "null" as alias for noreg**
|
||||
14. ✅ **Fix Segment opcode** - 0x27 or 0x3F?
|
||||
|
||||
### Potential Implementation Bugs:
|
||||
|
||||
1. ⚠️ **Shift instruction** - doesn't handle register shifts
|
||||
2. ⚠️ **NOT display** - shows sr2 instead of dr
|
||||
3. ⚠️ **RETURN +4 offset** - why is this needed?
|
||||
4. ⚠️ **Segment opcode mismatch** - 0x27 vs 0x3F
|
||||
|
||||
### Minor Documentation Improvements:
|
||||
|
||||
1. Add explicit examples of stack growth direction
|
||||
2. Show complete memory layout diagrams
|
||||
3. Document which registers are volatile/preserved
|
||||
4. Add troubleshooting section for common mistakes
|
||||
5. Clarify jump instruction parameter semantics
|
||||
@@ -0,0 +1,4 @@
|
||||
|
||||
- we definitely need to be able to use registers for shift operations.
|
||||
- we need logical boolean operations in addition to the bitwise ones.
|
||||
- better conditionals.
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,149 @@
|
||||
# DSA Documentation Inconsistencies Analysis
|
||||
|
||||
## 1. Register Descriptions
|
||||
|
||||
### Issue: System Registers vs Assembly-Accessible Registers
|
||||
- `registers.md` lists MAR, STS, CIR, MDR as "System" registers
|
||||
- These are NOT mentioned in `dsa_assembly_reference.md` or `instruction_set.md`
|
||||
- **Resolution**: System registers are internal CPU registers not directly accessible in assembly. They should be documented separately from programmer-accessible registers.
|
||||
|
||||
### Issue: Register Naming Inconsistencies
|
||||
- `registers.md` uses `RG0-RGF` (uppercase)
|
||||
- `dsa_assembly_reference.md` uses `rg0-rgf` (lowercase)
|
||||
- **Resolution**: Assembly syntax should be lowercase (standard convention)
|
||||
|
||||
### Issue: NOREG Register
|
||||
- `registers.md`: "Loads/using as dest register must cause an illegal instruction trap"
|
||||
- `dsa_assembly_reference.md`: "on-read/write: illegal instruction fault"
|
||||
- **Resolution**: Consistent terminology needed - use "illegal instruction fault"
|
||||
|
||||
## 2. Instruction Operand Order Inconsistencies
|
||||
|
||||
### Issue: Load Instructions
|
||||
- `instruction_set.md`: `LDB BaseReg, Offset, DestReg`
|
||||
- `dsa_assembly_reference.md`: `LDB base_reg, dest_reg [, offset]`
|
||||
- **Resolution**: Assembly reference shows standard syntax (base, dest, offset optional), instruction set shows encoding order
|
||||
|
||||
### Issue: Store Instructions
|
||||
- `instruction_set.md`: `STB SrcReg, BaseReg, Offset`
|
||||
- `dsa_assembly_reference.md`: `STB src_reg, base_reg [, offset]`
|
||||
- **Resolution**: Consistent - offset is optional
|
||||
|
||||
### Issue: Immediate Load Instructions
|
||||
- `instruction_set.md`: `LLI DstReg, Value` (destination first)
|
||||
- `dsa_assembly_reference.md`: `LLI imm, dest_reg` (immediate first)
|
||||
- **Resolution**: Assembly reference shows gas-style syntax (source, dest), instruction set shows encoding order
|
||||
|
||||
### Issue: Jump Instructions
|
||||
- `instruction_set.md`: `JMP DestReg, Offset | Address`
|
||||
- `dsa_assembly_reference.md`: `JMP addr [, offset_reg]` or `JMP imm, offset_reg`
|
||||
- **Resolution**: Different perspectives - instruction set shows encoding, assembly shows usage
|
||||
|
||||
## 3. Instruction Behavior Differences
|
||||
|
||||
### Issue: IADD/ISUB Operands
|
||||
- `instruction_set.md`: `IADD Src1, Literal, Dest` (3 operands)
|
||||
- `dsa_assembly_reference.md`: `IADD src_reg, imm [, dest_reg]` (dest optional)
|
||||
- **Resolution**: Assembly allows dest to default to src_reg
|
||||
|
||||
### Issue: SHL/SHR Operands
|
||||
- `instruction_set.md`: `SHL Reg, Literal | ValReg`
|
||||
- `dsa_assembly_reference.md`: `SHL reg, shift_amount`
|
||||
- **Resolution**: Both literal and register shifts supported
|
||||
|
||||
## 4. Pseudo-Instruction Inconsistencies
|
||||
|
||||
### Issue: PUSH/POP Expansion
|
||||
- `pseudoinstructions.md`:
|
||||
- PUSH = `INC SPR` then `STW register, SPR`
|
||||
- POP = `LDW SPR, register` then `DEC SPR`
|
||||
- Standard stack conventions suggest PUSH should decrement (grow down)
|
||||
- **Resolution**: Clarify stack growth direction
|
||||
|
||||
### Issue: LDB/LDH/LDW Pseudo vs Hardware
|
||||
- `pseudoinstructions.md` lists LDB, LDH, LDW as pseudo-instructions with label addressing
|
||||
- `instruction_set.md` lists them as hardware instructions
|
||||
- **Resolution**: Both exist - hardware instructions use registers, pseudo-instructions add label support
|
||||
|
||||
### Issue: LWI Naming
|
||||
- `dsa_assembly_reference.md`: LWI = Load Word Immediate (load address)
|
||||
- Could be confused with "Load Word Immediate" (load literal value)
|
||||
- **Resolution**: LWI specifically means "Load Word address Into register"
|
||||
|
||||
## 5. Calling Convention Details
|
||||
|
||||
### Issue: Argument Offsets
|
||||
- Calling convention says "first 3 args at offsets 8, 12, 16"
|
||||
- This assumes 32-bit words (4 bytes each)
|
||||
- Offset 8 is position of first argument (after return address at offset 4, and old BPR at offset 0)
|
||||
- **Resolution**: Clarify that SPR+0 = old BPR, SPR+4 = return address, SPR+8 = first arg
|
||||
|
||||
### Issue: Return Value Location
|
||||
- Says "Store return value (if any) to `spr+8`"
|
||||
- This overwrites the first argument
|
||||
- **Resolution**: This is intentional - return value replaces first argument position after cleanup
|
||||
|
||||
## 6. Missing Information
|
||||
|
||||
### From instruction_set.md not in assembly reference:
|
||||
- Instruction encoding details (R-type, I-type, J-type)
|
||||
- Hex opcodes for each instruction
|
||||
- Alignment requirements for memory operations
|
||||
- Sign extension behavior details
|
||||
|
||||
### From assembly reference not in instruction_set:
|
||||
- Complete pseudo-instruction expansions showing what they compile to
|
||||
- Library examples (multiply, print)
|
||||
- Detailed calling convention walkthrough
|
||||
- Module system (INCLUDE directive)
|
||||
|
||||
### From registers.md not elsewhere:
|
||||
- STS (Status Register) bit layout
|
||||
- Boot values for status flags
|
||||
- System registers (MAR, STS, CIR, MDR)
|
||||
|
||||
## 7. Terminology Inconsistencies
|
||||
|
||||
- "halfword" vs "half-word" vs "16-bit value"
|
||||
- "word" assumed to be 32-bit (should be explicit)
|
||||
- "register" vs "reg" in syntax
|
||||
- "immediate" vs "literal" vs "constant"
|
||||
|
||||
## 8. Critical Missing Details
|
||||
|
||||
### CALL and RETURN Pseudo-instructions
|
||||
- Assembly reference shows them but doesn't show their expansion
|
||||
- Need to document what they expand to
|
||||
|
||||
### Label Addressing Mode
|
||||
- Shows expansions for loads/stores with labels
|
||||
- Uses RGF as scratch register - should this be documented as reserved for this purpose?
|
||||
|
||||
### Stack Direction
|
||||
- Not explicitly stated whether stack grows up or down
|
||||
- PUSH uses INC SPR (suggests growing up) - unusual!
|
||||
|
||||
## Recommendations
|
||||
|
||||
1. **Separate Documentation into Logical Layers**:
|
||||
- ISA Specification (hardware-level, for CPU implementers)
|
||||
- Assembly Language Reference (for programmers)
|
||||
- ABI/Calling Convention (for compiler/linker writers)
|
||||
|
||||
2. **Standardize Terminology**:
|
||||
- Use consistent casing (lowercase for assembly mnemonics)
|
||||
- Define terms clearly (word = 32-bit, halfword = 16-bit, byte = 8-bit)
|
||||
- Distinguish "literal" (immediate value in code) from "address" (memory location)
|
||||
|
||||
3. **Document Stack Convention Clearly**:
|
||||
- Explicitly state stack grows upward (unusual but valid)
|
||||
- Show memory layout diagrams
|
||||
|
||||
4. **Show Complete Pseudo-instruction Expansions**:
|
||||
- CALL, RETURN need full expansion documentation
|
||||
- Document which register(s) are used as temporaries
|
||||
|
||||
5. **Clarify Register Usage Conventions**:
|
||||
- ACC: used by pseudo-instructions, volatile
|
||||
- RGF: used by label addressing, volatile
|
||||
- RG0-RGE: general purpose, callee may use per calling convention
|
||||
@@ -0,0 +1,26 @@
|
||||
# General TODO's
|
||||
|
||||
# Bugfixes
|
||||
- [x] [EASY] Investigate logical and operator not compiling - either a lexer or parser issue.
|
||||
- **note**: this was a parser issue.
|
||||
|
||||
# Missing features
|
||||
- [x] [MEDIUM] Get shift operations working correctly.
|
||||
- [ ] [MEDIUM] proper prefix/postfix inc/dec implementation. slightly more complex as we need to check for a variable and modify it in place
|
||||
- [ ] [EASY] Add multiply and divide operations to code generation
|
||||
- **note**: very easy to do but our division algorithm is hopelessly slow so not worth doing for now.
|
||||
|
||||
# Performance Improvements
|
||||
- [ ] [MEDIUM] implement a proper div/mod library that's not slow af.
|
||||
- [ ] [HARD] Immediate operations for values that support it (up to +/- u16::max for addi and subi respectively)
|
||||
- this requires significant complexity in code generation as we need to traverse down the tree when we come across these operations to prevent additional register allocations.
|
||||
|
||||
# Compiler optimisations
|
||||
|
||||
# Codegen improvements
|
||||
- [ ] [MEDIUM / time consuming] Add scoping to code generation
|
||||
- [ ] [MEDIUM / time consuming] Rewrite entire codegen to imrpove code quality and make the code more readable.
|
||||
- [ ] type-safe instruction builder
|
||||
- [ ] Instruction & Register enums
|
||||
- [ ] Instruction builder helper fns eg `fn add(left: &Register, right: &Register, dest: &Register) -> Instruction`
|
||||
- [ ] Instruction Block types.
|
||||
@@ -0,0 +1,25 @@
|
||||
use super::Syntax;
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
impl Syntax {
|
||||
pub fn dsc() -> Self {
|
||||
Syntax {
|
||||
language: "Damn Simple Code",
|
||||
case_sensitive: false,
|
||||
comment: "//",
|
||||
comment_multiline: ["/*", "*/"],
|
||||
hyperlinks: BTreeSet::from(["http"]),
|
||||
keywords: BTreeSet::from([
|
||||
"include", "fn", "let", "const", "static", "if", "else", "while", "for",
|
||||
"break", "continue", "loop", "return",
|
||||
]),
|
||||
types: BTreeSet::from([
|
||||
"u32", "u16", "u8", "i32", "i16", "i8", "str", "char", "bool", "void",
|
||||
]),
|
||||
special: BTreeSet::from([
|
||||
",", ";", ".", ":", "=", "+", "-", "*", "/", "%", "&", "|", "^", "~",
|
||||
"!", "?", "<", ">", "<<", ">>", "==", "!=", "<=", ">=", "&&", "||",
|
||||
]),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
#![allow(dead_code)]
|
||||
pub mod dsa;
|
||||
pub mod dsc;
|
||||
|
||||
use std::collections::BTreeSet;
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
[package]
|
||||
name = "dsx-build"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
authors.workspace = true
|
||||
|
||||
[dependencies]
|
||||
compiler = { path = "../compiler" }
|
||||
assembler = { path = "../assembler" }
|
||||
chrono = "0.4.43"
|
||||
@@ -0,0 +1,200 @@
|
||||
use std::process::{Command, Stdio};
|
||||
use std::{
|
||||
env, fs,
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use crate::templates::{Dsa, Dsc, Template};
|
||||
|
||||
mod templates;
|
||||
|
||||
/// Run a command and exit on failure.
|
||||
fn run(cmd: &mut Command) {
|
||||
let status = cmd.status().expect("failed to execute command");
|
||||
if !status.success() {
|
||||
std::process::exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
// Very small CLI – only three sub‑commands.
|
||||
let args: Vec<String> = env::args().collect();
|
||||
if args.len() < 2 {
|
||||
eprintln!("Usage: dsx-build <new|build|package> [options]");
|
||||
std::process::exit(1);
|
||||
}
|
||||
match args[1].as_str() {
|
||||
"new" => cmd_new(&args[2..]),
|
||||
"build" => cmd_build(),
|
||||
"package" => todo!("Package manager stub – not implemented yet."),
|
||||
_ => {
|
||||
eprintln!("Unknown command: {}", args[1]);
|
||||
std::process::exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------- new project ----------------------------------------------------
|
||||
fn cmd_new(args: &[String]) {
|
||||
let mut lang = "dsa";
|
||||
for i in 0..args.len() {
|
||||
if args[i] == "--lang" && i + 1 < args.len() {
|
||||
lang = &args[i + 1];
|
||||
}
|
||||
}
|
||||
|
||||
let lib = args.contains(&"--lib".to_string());
|
||||
|
||||
// Determine project root: a subdirectory named after the supplied --name argument.
|
||||
let mut name_opt = None;
|
||||
for i in 0..args.len() {
|
||||
if args[i] == "--name" && i + 1 < args.len() {
|
||||
name_opt = Some(&args[i + 1]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let project_name = match name_opt {
|
||||
Some(name) => name.to_string(),
|
||||
None => {
|
||||
eprintln!("Error: --name argument required");
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
let cwd = env::current_dir().unwrap();
|
||||
let src_path = cwd.join(&project_name).join("src");
|
||||
fs::create_dir_all(&src_path).expect("Failed to create project directory");
|
||||
|
||||
match lang {
|
||||
"dsa" => {
|
||||
// Minimal DSA binary template.
|
||||
let path = src_path.join(format!("main.dsa"));
|
||||
|
||||
let template = Dsa::create(&project_name, lib);
|
||||
|
||||
fs::write(path, template).expect("Unable to write DSA file");
|
||||
}
|
||||
"dsc" => {
|
||||
let path = src_path.join(format!("main.dsc"));
|
||||
|
||||
let template = Dsc::create(&project_name, lib);
|
||||
|
||||
fs::write(path, template).expect("Unable to write DSC file");
|
||||
}
|
||||
_ => {
|
||||
eprintln!("Unsupported language: {}", lang);
|
||||
std::process::exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
fs::create_dir_all(src_path.join("lib")).expect("Failed to create lib directory");
|
||||
fs::write(
|
||||
src_path.join("lib/print.dsa"),
|
||||
templates::create_print_lib(),
|
||||
)
|
||||
.expect("Failed to create print.dsa");
|
||||
fs::write(
|
||||
src_path.join("lib/maths.dsa"),
|
||||
templates::create_maths_lib(),
|
||||
)
|
||||
.expect("Failed to create maths.dsa");
|
||||
|
||||
println!(
|
||||
"Created new {} project in {}.",
|
||||
lang,
|
||||
src_path.parent().unwrap().display()
|
||||
);
|
||||
}
|
||||
|
||||
// ---------- build ----------------------------------------------------------
|
||||
fn cmd_build() {
|
||||
let cwd = env::current_dir().unwrap();
|
||||
|
||||
// Detect .dsc or .dsa files in current directory.
|
||||
let mut has_dsc = false;
|
||||
let mut has_dsa = false;
|
||||
for entry in fs::read_dir(&cwd.join("src")).expect("unable to read dir") {
|
||||
if let Ok(entry) = entry {
|
||||
let path = entry.path();
|
||||
if path.extension().and_then(|s| s.to_str()) == Some("dsc") {
|
||||
has_dsc = true;
|
||||
} else if path.extension().and_then(|s| s.to_str()) == Some("dsa") {
|
||||
has_dsa = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !has_dsc && !has_dsa {
|
||||
eprintln!("No .dsc or .dsa source found in src directory.");
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
// Assemble main.dsa to a dsb binary.
|
||||
println!("Assembling Project to a DSB binary...");
|
||||
let build_dir = cwd.join("build");
|
||||
fs::create_dir_all(&build_dir).expect("Failed to create build directory");
|
||||
|
||||
// Copy everything from `cwd/src` to the build directory.
|
||||
fn copy_recursively(src: &Path, dst: &Path) {
|
||||
if src.is_file() {
|
||||
fs::create_dir_all(dst.parent().unwrap())
|
||||
.expect("Failed to create parent directory");
|
||||
fs::copy(src, dst).expect("Failed to copy file");
|
||||
} else if src.is_dir() {
|
||||
for entry in fs::read_dir(src).expect("Unable to read source dir") {
|
||||
let entry = entry.expect("Failed to read entry");
|
||||
let child_src = entry.path();
|
||||
let child_dst = dst.join(entry.file_name());
|
||||
copy_recursively(&child_src, &child_dst);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let src_dir = cwd.join("src");
|
||||
if src_dir.exists() {
|
||||
copy_recursively(&src_dir, &build_dir);
|
||||
}
|
||||
|
||||
// Change current working directory to the build directory.
|
||||
env::set_current_dir(&build_dir).expect("Failed to change to build directory");
|
||||
|
||||
if has_dsc {
|
||||
println!("Compiling DSC to DSA...");
|
||||
fn compile_recursive(path: &Path) {
|
||||
if path.is_dir() {
|
||||
for entry in fs::read_dir(path).expect("unable to read dir") {
|
||||
let entry = entry.expect("failed to read entry");
|
||||
compile_recursive(&entry.path());
|
||||
}
|
||||
} else if path.extension().and_then(|s| s.to_str()) == Some("dsc") {
|
||||
let input_path = path;
|
||||
let output_path = path.with_extension("dsa");
|
||||
compiler::compile_file(&input_path, &output_path).unwrap_or_else(|e| {
|
||||
eprintln!("Failed to compile {:?}: {}", input_path, e);
|
||||
std::process::exit(1);
|
||||
});
|
||||
}
|
||||
}
|
||||
compile_recursive(&build_dir);
|
||||
}
|
||||
|
||||
// Replace .dsc with .dsa only in include statements, recursively for each file.
|
||||
let mut sed_cmd = Command::new("bash");
|
||||
sed_cmd.args(&[
|
||||
"-c",
|
||||
&format!(
|
||||
"find \"{}\" -type f -name '*.dsa' -exec sed -i '/^include/ s/\\.dsc/.dsa/g' {{}} +",
|
||||
build_dir.display()
|
||||
),
|
||||
]);
|
||||
run(&mut sed_cmd);
|
||||
|
||||
fs::create_dir_all(&cwd.join("artifacts")).expect("Failed to create build directory");
|
||||
assembler::assemble_file("./main.dsa", "../artifacts/out.dsb").unwrap_or_else(|e| {
|
||||
eprintln!("Failed to assemble {:?}: {}", "./main.dsa", e);
|
||||
std::process::exit(1);
|
||||
});
|
||||
|
||||
println!("Build finished. Binary at {}/main.dsb", build_dir.display());
|
||||
}
|
||||
@@ -0,0 +1,589 @@
|
||||
pub trait Template {
|
||||
fn lib(project: &str) -> String;
|
||||
fn bin(project: &str) -> String;
|
||||
|
||||
fn create(project: &str, lib: bool) -> String {
|
||||
if lib {
|
||||
Self::lib(project)
|
||||
} else {
|
||||
Self::bin(project)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Dsa;
|
||||
pub struct Dsc;
|
||||
|
||||
impl Template for Dsa {
|
||||
fn lib(project: &str) -> String {
|
||||
format!(
|
||||
r#"//
|
||||
lib.dsa
|
||||
// usage:
|
||||
//
|
||||
// include {project} "<relative path>"
|
||||
//
|
||||
// usage for {project}_main:
|
||||
// push (arg1)
|
||||
// push (arg0)
|
||||
// call {project}::{project}_main
|
||||
// pop (arg0)
|
||||
// pop (arg1)
|
||||
|
||||
// Example data declarations
|
||||
// dw example_data: 0x0000
|
||||
|
||||
// Main function template
|
||||
{project}_main:
|
||||
// the correct way to start a function as defined by the calling convention
|
||||
push bpr
|
||||
mov spr, bpr
|
||||
|
||||
// explanation of how to access args
|
||||
ldw bpr, rg0, 8 // arg 0
|
||||
ldw bpr, rg0, 12 // arg 1
|
||||
|
||||
// your code goes here
|
||||
// Example: load example_data into rg1
|
||||
// ldw example_data, rg1
|
||||
|
||||
// the correct way to end a function as defined by the calling convention
|
||||
mov bpr, spr
|
||||
pop bpr
|
||||
return
|
||||
"#,
|
||||
)
|
||||
}
|
||||
|
||||
fn bin(project: &str) -> String {
|
||||
format!(
|
||||
r#"
|
||||
// GENERATED BY DSX-BUILD
|
||||
// Generated at: {timestamp}
|
||||
// Project name: {project}
|
||||
|
||||
// Imports
|
||||
include print: "./lib/print.dsa"
|
||||
|
||||
// Globals & Reserved Memory
|
||||
dw stack: 0x10000
|
||||
db message: "Process Exited with code:"
|
||||
|
||||
// Entry Point
|
||||
_init:
|
||||
ldw stack, bpr
|
||||
mov bpr, spr
|
||||
push zero
|
||||
call main
|
||||
call print::print_newline
|
||||
lwi message, rg0
|
||||
push rg0
|
||||
call print::print
|
||||
pop zero
|
||||
call print::print_hex_word
|
||||
pop zero
|
||||
hlt
|
||||
|
||||
main:
|
||||
push bpr
|
||||
mov spr, bpr
|
||||
|
||||
// Your code goes here
|
||||
|
||||
// Return zero
|
||||
stw zero, bpr, 8
|
||||
|
||||
mov bpr, spr
|
||||
pop bpr
|
||||
return"#,
|
||||
timestamp = chrono::Utc::now().format("%Y-%m-%d %H:%M:%S").to_string()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl Template for Dsc {
|
||||
fn lib(project: &str) -> String {
|
||||
format!(
|
||||
r#"
|
||||
// GENERATED BY DSX-BUILD
|
||||
// Generated at: {timestamp}
|
||||
// Project name: {project}
|
||||
|
||||
// Imports
|
||||
include print: "./lib/print.dsa";
|
||||
|
||||
// Main Function
|
||||
fn {project}_main() -> u32 {{
|
||||
return 0;
|
||||
}}"#,
|
||||
timestamp = chrono::Utc::now().format("%Y-%m-%d %H:%M:%S").to_string()
|
||||
)
|
||||
}
|
||||
|
||||
fn bin(project: &str) -> String {
|
||||
format!(
|
||||
r#"
|
||||
// GENERATED BY DSX-BUILD
|
||||
// Generated at: {timestamp}
|
||||
// Project name: {project}
|
||||
|
||||
// Imports
|
||||
include print: "./lib/print.dsa";
|
||||
|
||||
// Main Function
|
||||
fn main() -> u32 {{
|
||||
return 0;
|
||||
}}"#,
|
||||
timestamp = chrono::Utc::now().format("%Y-%m-%d %H:%M:%S").to_string()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn create_print_lib() -> String {
|
||||
format!(
|
||||
r#"
|
||||
// lib:
|
||||
// print.dsa
|
||||
|
||||
// usage:
|
||||
//
|
||||
// include print "<relative path>""
|
||||
//
|
||||
// usage for print:
|
||||
// push (register containing address of string)
|
||||
// push pcx
|
||||
// jmp print::print
|
||||
//
|
||||
// usage for reset:
|
||||
// push pcx
|
||||
// jmp print::reset
|
||||
//
|
||||
// usage for clear:
|
||||
// push pcx
|
||||
// jmp print::clear
|
||||
//
|
||||
// usage for print_byte:
|
||||
// push (register containing byte)
|
||||
// push pcx
|
||||
// jmp print::print_byte
|
||||
//
|
||||
// usage for print_word:
|
||||
// push (register containing word)
|
||||
// push pcx
|
||||
// jmp print::print_word
|
||||
//
|
||||
// usage for print_num:
|
||||
// push (register containing number to print in decimal)
|
||||
// push pcx
|
||||
// jmp print::print_num
|
||||
//
|
||||
|
||||
include maths "./maths.dsa"
|
||||
|
||||
dw display: 0x20000
|
||||
dw current: 0x20000
|
||||
|
||||
// ------------------------------------------
|
||||
// prints the string at addr(arg[0]) to the screen. (no trailing whitespace unless explicitly provided)
|
||||
print:
|
||||
push bpr
|
||||
mov spr, bpr
|
||||
|
||||
ldw bpr, rg0, 8
|
||||
ldw current, rg1
|
||||
|
||||
_print_loop:
|
||||
ldb rg0, acc
|
||||
cmp acc, zero
|
||||
jeq _end
|
||||
stb acc, rg1
|
||||
|
||||
addi rg0, 1
|
||||
addi rg1, 1
|
||||
|
||||
jmp _print_loop
|
||||
|
||||
// ------------------------------------------
|
||||
println:
|
||||
push bpr
|
||||
mov spr, bpr
|
||||
|
||||
ldw bpr, rg0, 8
|
||||
ldw current, rg1
|
||||
|
||||
_println_loop:
|
||||
ldb rg0, acc
|
||||
cmp acc, zero
|
||||
jeq _println_end
|
||||
stb acc, rg1
|
||||
|
||||
addi rg0, 1
|
||||
addi rg1, 1
|
||||
|
||||
jmp _println_loop
|
||||
|
||||
_println_end:
|
||||
call print_newline
|
||||
jmp _end
|
||||
|
||||
// ------------------------------------------
|
||||
// prints the value of arg[0] to the screen.
|
||||
print_word:
|
||||
// initialise
|
||||
push bpr
|
||||
mov spr, bpr
|
||||
|
||||
// load byte into acc
|
||||
ldw bpr, rg0, 8
|
||||
ldw current, rg1
|
||||
|
||||
addi rg1, 3
|
||||
|
||||
stb rg0, rg1
|
||||
subi rg1, 1
|
||||
shr rg0, 8
|
||||
stb rg0, rg1
|
||||
subi rg1, 1
|
||||
shr rg0, 8
|
||||
stb rg0, rg1
|
||||
subi rg1, 1
|
||||
shr rg0, 8
|
||||
stb rg0, rg1
|
||||
|
||||
addi rg1, 4
|
||||
jmp _end
|
||||
|
||||
// ------------------------------------------
|
||||
// prints the last byte of arg[0] to the screen.
|
||||
print_byte:
|
||||
push bpr
|
||||
mov spr, bpr
|
||||
|
||||
ldw bpr, rg0, 8
|
||||
ldw current, rg1
|
||||
|
||||
stb rg0, rg1
|
||||
addi rg1, 1
|
||||
jmp _end
|
||||
|
||||
// ------------------------------------------
|
||||
// prints the value of arg[0] to the screen in hex.
|
||||
print_hex_word:
|
||||
push bpr
|
||||
mov spr, bpr
|
||||
|
||||
ldw current, rg1
|
||||
|
||||
ldb bpr, rg0, 8
|
||||
push rg0
|
||||
call _print_hex_byte
|
||||
addi spr, 4
|
||||
|
||||
ldb bpr, rg0, 9
|
||||
push rg0
|
||||
call _print_hex_byte
|
||||
addi spr, 4
|
||||
|
||||
ldb bpr, rg0, 10
|
||||
push rg0
|
||||
call _print_hex_byte
|
||||
addi spr, 4
|
||||
|
||||
ldb bpr, rg0, 11
|
||||
push rg0
|
||||
call _print_hex_byte
|
||||
addi spr, 4
|
||||
|
||||
jmp _end
|
||||
|
||||
// ------------------------------------------
|
||||
// prints the last byte of arg[0] to the screen in hex.
|
||||
print_hex_byte:
|
||||
push bpr
|
||||
mov spr, bpr
|
||||
|
||||
ldw bpr, rg0, 8
|
||||
ldw current, rg1
|
||||
|
||||
call _print_hex_byte
|
||||
jmp _end
|
||||
|
||||
// function body
|
||||
_print_hex_byte:
|
||||
// mask to get lower nibble
|
||||
lli 0xF, rg2
|
||||
// save rg0 state
|
||||
push rg0
|
||||
|
||||
shr rg0, 4
|
||||
and rg0, rg2, rg0
|
||||
call _print_hex_nibble
|
||||
pop rg0
|
||||
|
||||
and rg0, rg2, rg0
|
||||
call _print_hex_nibble
|
||||
return
|
||||
|
||||
// print a hex digit
|
||||
_print_hex_nibble:
|
||||
lli 10, rg3
|
||||
cmp rg0, rg3
|
||||
jlt _print_hex_nibble_number
|
||||
addi rg0, 0x37, rg0
|
||||
stb rg0, rg1
|
||||
addi rg1, 1
|
||||
return
|
||||
|
||||
// helper function.
|
||||
_print_hex_nibble_number:
|
||||
addi rg0, 0x30, rg0
|
||||
stb rg0, rg1
|
||||
addi rg1, 1
|
||||
return
|
||||
|
||||
// ------------------------------------------
|
||||
// print whitespace
|
||||
print_whitespace:
|
||||
push bpr
|
||||
mov spr, bpr
|
||||
|
||||
ldw current, rg1
|
||||
lli 0x20, rg0
|
||||
stb rg0, rg1
|
||||
addi rg1, 1
|
||||
jmp _end
|
||||
|
||||
// ------------------------------------------
|
||||
// print newline
|
||||
print_newline:
|
||||
push bpr
|
||||
mov spr, bpr
|
||||
|
||||
// load variables into registers
|
||||
ldw display, rg0
|
||||
ldw current, rg1
|
||||
|
||||
// get the offset from the display base
|
||||
sub rg1, rg0, rg0
|
||||
|
||||
lwi 80, rg2
|
||||
pusha 3
|
||||
push rg0
|
||||
push rg2
|
||||
call maths::divmod
|
||||
pop zero // result
|
||||
pop rg3 // remainder
|
||||
popa 3
|
||||
|
||||
sub rg1, rg3, rg2
|
||||
addi rg2, 80, rg1
|
||||
|
||||
// _end saves the display state
|
||||
jmp _end
|
||||
|
||||
// ------------------------------------------
|
||||
// prints arg[0] as a decimal number to the screen.
|
||||
print_num:
|
||||
push bpr
|
||||
mov spr, bpr
|
||||
|
||||
ldw bpr, rg0, 8 // load number to print
|
||||
lli 0, rg5 // rg5 = digit counter
|
||||
|
||||
// check if number is zero
|
||||
cmp rg0, zero
|
||||
jne _print_num_extract_digits
|
||||
|
||||
// special case: print '0' for zero
|
||||
lli 0x30, rg6
|
||||
push rg6 // push digit to stack buffer
|
||||
lli 1, rg5 // we have 1 digit
|
||||
jmp _print_num_output
|
||||
|
||||
_print_num_extract_digits:
|
||||
// divide by 10 repeatedly to get digits
|
||||
cmp rg0, zero
|
||||
jeq _print_num_output
|
||||
|
||||
// call divmod(rg0, 10)
|
||||
push rg0 // dividend
|
||||
lli 10, rg1
|
||||
push rg1 // divisor (10)
|
||||
call maths::divmod
|
||||
pop rg0 // quotient (continue dividing this)
|
||||
pop rg1 // remainder (the digit)
|
||||
|
||||
// convert digit to ASCII and push to stack buffer
|
||||
addi rg1, 0x30, rg6 // convert to ASCII
|
||||
push rg6 // push digit to stack
|
||||
inc rg5 // increment digit counter
|
||||
|
||||
jmp _print_num_extract_digits
|
||||
|
||||
_print_num_output:
|
||||
// now print digits (pop them off in reverse order)
|
||||
ldw current, rg1 // get display pointer
|
||||
|
||||
_print_num_output_loop:
|
||||
// check if we've printed all digits
|
||||
cmp rg5, zero
|
||||
jeq _print_num_done
|
||||
|
||||
// pop digit and print it
|
||||
pop rg6
|
||||
stb rg6, rg1
|
||||
addi rg1, 1
|
||||
dec rg5
|
||||
|
||||
jmp _print_num_output_loop
|
||||
|
||||
_print_num_done:
|
||||
jmp _end
|
||||
|
||||
// ------------------------------------------
|
||||
// resets the cursor position on the screen to 0x20000. (0,0)
|
||||
reset:
|
||||
push bpr
|
||||
mov spr, bpr
|
||||
ldw display, rg1
|
||||
jmp _end
|
||||
|
||||
// ------------------------------------------
|
||||
// clears the screen
|
||||
clear:
|
||||
push bpr
|
||||
mov spr, bpr
|
||||
// display size = 2000 bytes / 500 words
|
||||
lli 500 rg0
|
||||
ldw display, rg1
|
||||
|
||||
_clear_loop:
|
||||
dec rg0
|
||||
stw zero, rg1
|
||||
addi rg1, 4
|
||||
cmp rg0, zero
|
||||
jgt _clear_loop
|
||||
jmp _end
|
||||
|
||||
// ------------------------------------------
|
||||
// return
|
||||
_end:
|
||||
stw rg1, current
|
||||
|
||||
mov bpr, spr
|
||||
pop bpr
|
||||
return
|
||||
"#
|
||||
)
|
||||
}
|
||||
|
||||
pub fn create_maths_lib() -> String {
|
||||
format!(
|
||||
r#"
|
||||
// multiply.dsa
|
||||
// usage:
|
||||
//
|
||||
// include multiply "<relative path>"
|
||||
//
|
||||
// usage for multiply:
|
||||
// push (arg1)
|
||||
// push (arg0)
|
||||
// call multiply::multiply
|
||||
// pop (arg0)
|
||||
// pop (arg1)
|
||||
|
||||
multiply:
|
||||
push bpr
|
||||
mov spr, bpr
|
||||
|
||||
ldw bpr, rg0, 8 // load op 2
|
||||
ldw bpr, rg1, 12 // load op 1
|
||||
lwi 0, rg2 // initialise rg2 to zero
|
||||
|
||||
_multiply_loop:
|
||||
add rg2, rg0, rg2
|
||||
dec rg1
|
||||
|
||||
cmp rg1, zero
|
||||
jgt _multiply_loop
|
||||
|
||||
_multiply_end:
|
||||
stw rg2, bpr, 8
|
||||
|
||||
mov bpr, spr
|
||||
pop bpr
|
||||
return
|
||||
|
||||
divmod:
|
||||
push bpr
|
||||
mov spr, bpr
|
||||
|
||||
ldw bpr, rg1, 8 // load op 2
|
||||
ldw bpr, rg0, 12 // load op 1
|
||||
|
||||
lli 0, rg3
|
||||
|
||||
_divmod_loop:
|
||||
cmp rg0, rg1
|
||||
jlt _divmod_end
|
||||
|
||||
sub rg0, rg1, rg0
|
||||
inc rg3
|
||||
|
||||
jmp _divmod_loop
|
||||
|
||||
_divmod_end:
|
||||
// store div in first arg
|
||||
// store mod in second arg
|
||||
stw rg3, bpr, 8
|
||||
stw rg0, bpr, 12
|
||||
|
||||
mov bpr, spr
|
||||
pop bpr
|
||||
return
|
||||
|
||||
// multiply.dsa - improved version
|
||||
// Multiplies two 32-bit numbers using shift-and-add
|
||||
//
|
||||
// Usage:
|
||||
// push operand2 (multiplier)
|
||||
// push operand1 (multiplicand)
|
||||
// call multiply::multiply
|
||||
// pop result
|
||||
// pop zero (discard second argument)
|
||||
|
||||
new_multiply:
|
||||
push bpr
|
||||
mov spr, bpr
|
||||
|
||||
ldw bpr, rg0, 8 // rg0 = multiplicand
|
||||
ldw bpr, rg1, 12 // rg1 = multiplier
|
||||
|
||||
lli 0, rg2 // rg2 = result (accumulator)
|
||||
lli 32, rg3 // rg3 = bit counter
|
||||
|
||||
mult_loop:
|
||||
// Check if lowest bit of multiplier is 1
|
||||
lli 1, acc
|
||||
and rg1, acc, acc // acc = rg1 & 1
|
||||
cmp acc, zero
|
||||
jeq skip_add // if (rg1 & 1) == 0, skip addition
|
||||
|
||||
// Add multiplicand to result
|
||||
add rg2, rg0, rg2
|
||||
|
||||
skip_add:
|
||||
shl rg0, 1 // shift multiplicand left
|
||||
shr rg1, 1 // shift multiplier right
|
||||
|
||||
dec rg3
|
||||
cmp rg3, zero
|
||||
jgt mult_loop
|
||||
|
||||
stw rg2, bpr, 8 // store result
|
||||
mov bpr, spr
|
||||
pop bpr
|
||||
return
|
||||
"#
|
||||
)
|
||||
}
|
||||
+2
-1
@@ -20,10 +20,11 @@ compiler = { path = "../compiler" }
|
||||
dsa_editor = { path = "../dsa_editor" }
|
||||
egui = "0.31.1"
|
||||
dirs = "6.0.0"
|
||||
discord-presence = { version = "1.6.0", optional = true }
|
||||
discord-presence = { version = "2.0.0", optional = true }
|
||||
toml = { version = "0.8.23", optional = true }
|
||||
serde = { version = "1.0.219", features = ["derive"], optional = true }
|
||||
egui_file = "0.22.1"
|
||||
rustc-hash = "2.1.1"
|
||||
|
||||
[features]
|
||||
default = ["config"]
|
||||
|
||||
@@ -0,0 +1,53 @@
|
||||
use common::prelude::Instruction;
|
||||
use rustc_hash::FxHashMap;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Cache {
|
||||
addr: u32,
|
||||
instruction_block: Option<[u8; 256]>,
|
||||
instruction_lookup: FxHashMap<u32, Instruction>,
|
||||
}
|
||||
|
||||
impl Cache {
|
||||
#[must_use]
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
addr: 0,
|
||||
instruction_block: None,
|
||||
instruction_lookup: FxHashMap::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn lookup_value(&mut self, addr: u32) -> Option<u32> {
|
||||
if addr < self.addr || addr >= self.addr + 256 || self.instruction_block.is_none()
|
||||
{
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(u32::from_be_bytes(
|
||||
self.instruction_block.expect("this should not be none!")
|
||||
[(addr - self.addr) as usize..(addr - self.addr + 4) as usize]
|
||||
.try_into()
|
||||
.expect("Failed to convert bytes to u32"),
|
||||
))
|
||||
}
|
||||
|
||||
pub const fn set(&mut self, addr: u32, block: &[u8; 256]) {
|
||||
self.addr = addr - addr % 256;
|
||||
self.instruction_block = Some(*block);
|
||||
}
|
||||
|
||||
pub fn lookup_instruction(&mut self, instruction: u32) -> Option<Instruction> {
|
||||
self.instruction_lookup.get(&instruction).copied()
|
||||
}
|
||||
|
||||
pub fn insert(&mut self, value: u32, instruction: Instruction) {
|
||||
self.instruction_lookup.insert(value, instruction);
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Cache {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
@@ -25,9 +25,11 @@ pub fn run_emulator(
|
||||
let mut running = Running::Paused;
|
||||
let mut step = 0;
|
||||
let mut addr;
|
||||
let mut history = Vec::<(u32, Instruction)>::new();
|
||||
let mut history = Vec::<(u32, u32)>::with_capacity(32768);
|
||||
let size = 256;
|
||||
|
||||
let record_history = true;
|
||||
|
||||
state_tx
|
||||
.send(StateUpdate::Running(Running::Paused))
|
||||
.expect("Failed to send initial state!");
|
||||
@@ -36,7 +38,9 @@ pub fn run_emulator(
|
||||
let mut update = false;
|
||||
|
||||
loop {
|
||||
let cmd = if running == Running::Running || step > 0 {
|
||||
let cmd = if step > 0 {
|
||||
None
|
||||
} else if running == Running::Running && step == 0 {
|
||||
match cmd_rx.try_recv() {
|
||||
Ok(cmd) => Some(cmd),
|
||||
Err(mpsc::TryRecvError::Empty) => {
|
||||
@@ -52,10 +56,15 @@ pub fn run_emulator(
|
||||
}
|
||||
};
|
||||
|
||||
if running == Running::Running && step == 0 {
|
||||
step = 32768;
|
||||
}
|
||||
|
||||
if let Some(cmd) = cmd {
|
||||
match cmd {
|
||||
Command::Start => {
|
||||
running = Running::Running;
|
||||
step = 32768;
|
||||
|
||||
// Update RPC with current state. TODO: Make this only occur on state
|
||||
// changes.
|
||||
@@ -71,9 +80,11 @@ pub fn run_emulator(
|
||||
}
|
||||
Command::Stop => {
|
||||
running = Running::Paused;
|
||||
step = 0;
|
||||
}
|
||||
Command::Reset(x) => {
|
||||
running = Running::Paused;
|
||||
step = 0;
|
||||
|
||||
match x {
|
||||
0 => {
|
||||
@@ -95,20 +106,12 @@ pub fn run_emulator(
|
||||
}
|
||||
Command::Step(x) => {
|
||||
step = x;
|
||||
running = Running::Paused;
|
||||
}
|
||||
Command::Write(offset, data) => {
|
||||
update = true;
|
||||
|
||||
processor
|
||||
.memory
|
||||
.write_range(offset, data)
|
||||
.unwrap_or_else(|_| {
|
||||
report_err(
|
||||
state_tx,
|
||||
"Failed to write memory range!",
|
||||
&mut processor,
|
||||
);
|
||||
});
|
||||
processor.memory.write_range(offset, data);
|
||||
}
|
||||
Command::Interrupt(_interrupt) => {
|
||||
update = true;
|
||||
@@ -118,14 +121,7 @@ pub fn run_emulator(
|
||||
Command::MemRequest(new, size) if update => {
|
||||
addr = new;
|
||||
let _ = state_tx.send(StateUpdate::MemoryView(
|
||||
processor.memory.read_range(addr, size).unwrap_or_else(|_| {
|
||||
report_err(
|
||||
state_tx,
|
||||
"Failed to read memory range!",
|
||||
&mut processor,
|
||||
);
|
||||
Vec::new()
|
||||
}),
|
||||
processor.memory.read_range(addr, size),
|
||||
));
|
||||
}
|
||||
Command::DisplayRequest if update => {
|
||||
@@ -163,50 +159,19 @@ pub fn run_emulator(
|
||||
let _ = state_tx.send(StateUpdate::Instructions(instruction_count));
|
||||
}
|
||||
Command::WriteBlock(addr, block) => {
|
||||
processor
|
||||
.memory
|
||||
.write_range(addr, block.to_vec())
|
||||
.unwrap_or_else(|_| {
|
||||
report_err(
|
||||
state_tx,
|
||||
"Failed to write memory block!",
|
||||
&mut processor,
|
||||
);
|
||||
});
|
||||
processor.memory.write_range(addr, block.to_vec());
|
||||
}
|
||||
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
if step > 0 {
|
||||
step -= 1;
|
||||
update = true;
|
||||
running = Running::Paused;
|
||||
|
||||
// Execute one cycle.
|
||||
match processor.cycle() {
|
||||
Ok((addr, instruction)) => {
|
||||
history.push((addr, instruction));
|
||||
}
|
||||
Err(why) => {
|
||||
let pcx = processor
|
||||
.get(Register::Pcx)
|
||||
.expect("SPR should never be invalid");
|
||||
report_err(
|
||||
state_tx,
|
||||
&format!(
|
||||
"Could not decode instruction at {pcx:x}. Reason: {why}"
|
||||
),
|
||||
&mut processor,
|
||||
);
|
||||
}
|
||||
}
|
||||
instruction_count += 1;
|
||||
continue;
|
||||
if running == Running::Running {
|
||||
step += 1;
|
||||
}
|
||||
|
||||
if running == Running::Running {
|
||||
if step > 0 {
|
||||
step -= 1;
|
||||
update = true;
|
||||
|
||||
// Execute one cycle.
|
||||
@@ -227,9 +192,18 @@ pub fn run_emulator(
|
||||
}
|
||||
};
|
||||
|
||||
history.push(instruction);
|
||||
if matches!(instruction.1, Instruction::Halt) {
|
||||
if record_history {
|
||||
history.push((
|
||||
instruction.0,
|
||||
processor
|
||||
.get(Register::Cir)
|
||||
.expect("CIR should never be invalid"),
|
||||
));
|
||||
}
|
||||
|
||||
if matches!(instruction, (_, Instruction::Halt)) {
|
||||
running = Running::Halted;
|
||||
step = 0;
|
||||
}
|
||||
|
||||
instruction_count += 1;
|
||||
|
||||
@@ -1,52 +1,42 @@
|
||||
use std::collections::HashMap;
|
||||
use rustc_hash::FxHashMap;
|
||||
|
||||
use crate::emulator::system::model::ProcessorError;
|
||||
|
||||
pub trait MemoryUnit: Send + Sync {
|
||||
fn reset(&mut self);
|
||||
fn read_byte(&mut self, addr: u32) -> Result<u8, ProcessorError>;
|
||||
fn write_byte(&mut self, addr: u32, value: u8) -> Result<(), ProcessorError>;
|
||||
fn read_byte(&mut self, addr: u32) -> u8;
|
||||
fn write_byte(&mut self, addr: u32, value: u8);
|
||||
fn read_word(&mut self, addr: u32) -> Result<u32, ProcessorError>;
|
||||
fn write_word(&mut self, addr: u32, value: u32) -> Result<(), ProcessorError>;
|
||||
|
||||
fn read_range(&mut self, addr: u32, size: u32) -> Result<Vec<u8>, ProcessorError> {
|
||||
fn read_range(&mut self, addr: u32, size: u32) -> Vec<u8> {
|
||||
let mut data = Vec::with_capacity(size as usize);
|
||||
for i in 0..size {
|
||||
data.push(self.read_byte(addr + i)?);
|
||||
data.push(self.read_byte(addr + i));
|
||||
}
|
||||
Ok(data)
|
||||
data
|
||||
}
|
||||
|
||||
fn write_range(&mut self, addr: u32, value: Vec<u8>) -> Result<(), ProcessorError> {
|
||||
fn write_range(&mut self, addr: u32, value: Vec<u8>) {
|
||||
for (i, byte) in value.into_iter().enumerate() {
|
||||
self.write_byte(addr + i as u32, byte)?;
|
||||
self.write_byte(addr + i as u32, byte);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn read_block(&mut self, addr: u32) -> Result<[u8; 256], ProcessorError> {
|
||||
let mut data = [0; 256];
|
||||
for (i, byte) in data.iter_mut().enumerate() {
|
||||
*byte = self.read_byte(addr + i as u32)?;
|
||||
}
|
||||
Ok(data)
|
||||
}
|
||||
fn read_block(&mut self, addr: u32) -> &[u8; 256];
|
||||
|
||||
fn write_block(&mut self, addr: u32, data: [u8; 256]) -> Result<(), ProcessorError> {
|
||||
fn write_block(&mut self, addr: u32, data: &[u8; 256]) {
|
||||
for (i, byte) in data.iter().enumerate() {
|
||||
self.write_byte(addr + i as u32, *byte)?;
|
||||
self.write_byte(addr + i as u32, *byte);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct MainStore {
|
||||
pub data: HashMap<u32, Block>,
|
||||
pub data: FxHashMap<u32, Block>,
|
||||
}
|
||||
|
||||
pub struct Block {
|
||||
data: [u8; 256],
|
||||
}
|
||||
pub type Block = [u8; 256];
|
||||
|
||||
impl Default for MainStore {
|
||||
fn default() -> Self {
|
||||
@@ -58,113 +48,110 @@ impl MainStore {
|
||||
#[must_use]
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
data: HashMap::new(),
|
||||
data: FxHashMap::default(),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
const fn segment_addr(addr: u32) -> (u32, u8) {
|
||||
(addr / 256, (addr % 256) as u8)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn mut_block(&mut self, addr: u32) -> &mut Block {
|
||||
self.data
|
||||
.entry(addr)
|
||||
.or_insert_with(|| Block { data: [0; 256] });
|
||||
|
||||
self.data.get_mut(&addr).map_or_else(
|
||||
|| panic!("Could not fetch block with address {addr:x?}"),
|
||||
|block| block,
|
||||
)
|
||||
self.data.entry(addr).or_insert([0; 256])
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn block(&mut self, addr: u32) -> &Block {
|
||||
self.data
|
||||
.entry(addr)
|
||||
.or_insert_with(|| Block { data: [0; 256] });
|
||||
|
||||
self.data.get(&addr).map_or_else(
|
||||
|| panic!("Could not fetch block with address {addr:x?}"),
|
||||
|block| block,
|
||||
)
|
||||
self.data.entry(addr).or_insert([0; 256])
|
||||
}
|
||||
}
|
||||
|
||||
impl MemoryUnit for MainStore {
|
||||
#[inline]
|
||||
fn reset(&mut self) {
|
||||
self.data.clear();
|
||||
}
|
||||
|
||||
fn read_byte(&mut self, addr: u32) -> Result<u8, ProcessorError> {
|
||||
#[inline]
|
||||
fn read_byte(&mut self, addr: u32) -> u8 {
|
||||
let (block_addr, offset) = Self::segment_addr(addr);
|
||||
let block = self.block(block_addr);
|
||||
Ok(block.data[offset as usize])
|
||||
block[offset as usize]
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn read_word(&mut self, addr: u32) -> Result<u32, ProcessorError> {
|
||||
if addr % 4 != 0 {
|
||||
if !addr.is_multiple_of(4) {
|
||||
return Err(ProcessorError::BadMemoryAccess(addr));
|
||||
}
|
||||
|
||||
let (block_addr, offset) = Self::segment_addr(addr);
|
||||
let block = self.mut_block(block_addr);
|
||||
let mut bytes = [0; 4];
|
||||
bytes[0] = block.data[offset as usize];
|
||||
bytes[1] = block.data[(offset + 1) as usize];
|
||||
bytes[2] = block.data[(offset + 2) as usize];
|
||||
bytes[3] = block.data[(offset + 3) as usize];
|
||||
Ok(u32::from_be_bytes(bytes))
|
||||
let offset = offset as usize;
|
||||
let block = self.block(block_addr);
|
||||
Ok(u32::from_be_bytes(
|
||||
block[offset..=offset + 3]
|
||||
.try_into()
|
||||
.expect("Failed to read word!"),
|
||||
))
|
||||
}
|
||||
|
||||
fn read_range(&mut self, addr: u32, size: u32) -> Result<Vec<u8>, ProcessorError> {
|
||||
#[inline]
|
||||
fn read_range(&mut self, addr: u32, size: u32) -> Vec<u8> {
|
||||
let mut data = Vec::with_capacity(size as usize);
|
||||
for i in 0..size {
|
||||
data.push(self.read_byte(addr + i)?);
|
||||
data.push(self.read_byte(addr + i));
|
||||
}
|
||||
|
||||
Ok(data)
|
||||
data
|
||||
}
|
||||
|
||||
fn write_byte(&mut self, addr: u32, value: u8) -> Result<(), ProcessorError> {
|
||||
#[inline]
|
||||
fn write_byte(&mut self, addr: u32, value: u8) {
|
||||
let (block_addr, offset) = Self::segment_addr(addr);
|
||||
let block = self.mut_block(block_addr);
|
||||
block.data[offset as usize] = value;
|
||||
Ok(())
|
||||
block[offset as usize] = value;
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn write_word(&mut self, addr: u32, value: u32) -> Result<(), ProcessorError> {
|
||||
if addr % 4 != 0 {
|
||||
if !addr.is_multiple_of(4) {
|
||||
return Err(ProcessorError::BadMemoryAccess(addr));
|
||||
}
|
||||
|
||||
let (block_addr, offset) = Self::segment_addr(addr);
|
||||
let block = self.mut_block(block_addr);
|
||||
block.data[offset as usize] = (value >> 24) as u8;
|
||||
block.data[(offset + 1) as usize] = (value >> 16) as u8;
|
||||
block.data[(offset + 2) as usize] = (value >> 8) as u8;
|
||||
block.data[(offset + 3) as usize] = value as u8;
|
||||
block[offset as usize..=(offset + 3) as usize]
|
||||
.copy_from_slice(&value.to_be_bytes());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_range(&mut self, addr: u32, value: Vec<u8>) -> Result<(), ProcessorError> {
|
||||
for (i, byte) in value.into_iter().enumerate() {
|
||||
let (block_addr, offset) = Self::segment_addr(addr + i as u32);
|
||||
let block = self.mut_block(block_addr);
|
||||
block.data[offset as usize] = byte;
|
||||
#[inline]
|
||||
fn write_range(&mut self, addr: u32, value: Vec<u8>) {
|
||||
let mut current_block_addr = addr / 256;
|
||||
let mut current_block = self.mut_block(current_block_addr);
|
||||
let mut offset = addr % 256;
|
||||
for byte in value {
|
||||
current_block[offset as usize] = byte;
|
||||
offset += 1;
|
||||
if offset >= 256 {
|
||||
offset = 0;
|
||||
current_block_addr += 1;
|
||||
current_block = self.mut_block(current_block_addr);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn read_block(&mut self, addr: u32) -> Result<[u8; 256], ProcessorError> {
|
||||
#[inline]
|
||||
fn read_block(&mut self, addr: u32) -> &[u8; 256] {
|
||||
let (block_addr, _) = Self::segment_addr(addr);
|
||||
let block = self.block(block_addr);
|
||||
Ok(block.data)
|
||||
self.block(block_addr)
|
||||
}
|
||||
|
||||
fn write_block(&mut self, addr: u32, data: [u8; 256]) -> Result<(), ProcessorError> {
|
||||
#[inline]
|
||||
fn write_block(&mut self, addr: u32, data: &[u8; 256]) {
|
||||
let (block_addr, _) = Self::segment_addr(addr);
|
||||
let block = self.mut_block(block_addr);
|
||||
block.data = data;
|
||||
Ok(())
|
||||
let _ = self.data.insert(block_addr, *data);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
pub mod cache;
|
||||
pub mod emulator;
|
||||
pub mod memory;
|
||||
pub mod model;
|
||||
|
||||
@@ -78,7 +78,7 @@ pub struct State {
|
||||
|
||||
pub error_log: Vec<String>,
|
||||
|
||||
pub instruction_history: Vec<(u32, Instruction)>,
|
||||
pub instruction_history: Vec<(u32, u32)>,
|
||||
}
|
||||
|
||||
impl State {
|
||||
@@ -154,7 +154,7 @@ pub enum StateUpdate {
|
||||
MemoryView(Vec<u8>),
|
||||
DisplayView(Vec<u8>),
|
||||
Error(String),
|
||||
InstructionHistory(Vec<(u32, Instruction)>),
|
||||
InstructionHistory(Vec<(u32, u32)>),
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
|
||||
@@ -286,11 +286,10 @@ impl RegFile {
|
||||
Register::Sts => &mut self.sts,
|
||||
Register::Cir => &mut self.cir,
|
||||
Register::Pcx => &mut self.pcx,
|
||||
_ => return Err(ProcessorError::InvalidRegister(Register::NoReg as u8)),
|
||||
_ => return Err(ProcessorError::InvalidRegister(Register::Null as u8)),
|
||||
})
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub const fn get(&self, reg: Register) -> Result<u32, ProcessorError> {
|
||||
Ok(match reg {
|
||||
Register::Rg0 => self.rg0,
|
||||
@@ -321,7 +320,7 @@ impl RegFile {
|
||||
Register::Cir => self.cir,
|
||||
Register::Pcx => self.pcx,
|
||||
Register::Zero => 0,
|
||||
_ => return Err(ProcessorError::InvalidRegister(Register::NoReg as u8)),
|
||||
_ => return Err(ProcessorError::InvalidRegister(Register::Null as u8)),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ use std::{
|
||||
};
|
||||
|
||||
use crate::emulator::system::{
|
||||
cache::Cache,
|
||||
memory::MemoryUnit,
|
||||
model::{IODevice, ProcessorError, RegFile},
|
||||
};
|
||||
@@ -17,10 +18,7 @@ pub struct Processor {
|
||||
pub io_devices: Vec<Arc<dyn IODevice>>,
|
||||
|
||||
pub void: u32,
|
||||
}
|
||||
|
||||
fn log(message: &str) {
|
||||
println!("\x1b[32mINFO:\x1b[0m {message}");
|
||||
pub cache: Cache,
|
||||
}
|
||||
|
||||
impl Processor {
|
||||
@@ -32,6 +30,7 @@ impl Processor {
|
||||
halted: false,
|
||||
io_devices,
|
||||
void: 0,
|
||||
cache: Cache::new(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -51,21 +50,35 @@ impl Processor {
|
||||
// Get value from PCX.
|
||||
let addr = self.fetch()?;
|
||||
// Increment PCX.
|
||||
self.advance();
|
||||
self.advance()?;
|
||||
|
||||
// Set MAR to the previous value of PCX.
|
||||
*self.reg(Register::Mar)? = addr;
|
||||
let val = self.memory.read_word(addr)?;
|
||||
|
||||
let encoded = if let Some(val) = self.cache.lookup_value(addr) {
|
||||
val
|
||||
} else {
|
||||
let block = self.memory.read_block(addr);
|
||||
self.cache.set(addr, block);
|
||||
self.cache
|
||||
.lookup_value(addr)
|
||||
.expect("Failed to lookup value!")
|
||||
};
|
||||
|
||||
// Set CIR to the value of RAM[MAR].
|
||||
*self.reg(Register::Mar)? = val;
|
||||
*self.reg(Register::Cir)? = encoded;
|
||||
|
||||
// Decode and execute the instruction.
|
||||
let instruction = Instruction::decode(val)
|
||||
.map_err(|_| ProcessorError::InvalidInstruction(val))?;
|
||||
let decoded = if let Some(val) = self.cache.lookup_instruction(addr) {
|
||||
val
|
||||
} else {
|
||||
let decoded = Instruction::decode(encoded)
|
||||
.map_err(|_| ProcessorError::InvalidInstruction(encoded))?;
|
||||
self.cache.insert(addr, decoded);
|
||||
decoded
|
||||
};
|
||||
|
||||
instruction.execute(self)?;
|
||||
Ok((addr, instruction))
|
||||
decoded.execute(self)?;
|
||||
Ok((addr, decoded))
|
||||
}
|
||||
|
||||
const fn fetch(&self) -> Result<u32, ProcessorError> {
|
||||
@@ -84,7 +97,7 @@ impl Processor {
|
||||
}
|
||||
|
||||
pub fn display(&mut self) -> Result<Vec<u8>, ProcessorError> {
|
||||
self.memory.read_range(0x20000, 2000)
|
||||
Ok(self.memory.read_range(0x20000, 2000))
|
||||
}
|
||||
|
||||
pub fn cmp(&mut self, a: u32, b: u32) {
|
||||
@@ -163,10 +176,10 @@ impl Processor {
|
||||
let addr = self.get(Register::Spr)?;
|
||||
let size = n * 4;
|
||||
// returns the stack
|
||||
self.memory.read_range(
|
||||
Ok(self.memory.read_range(
|
||||
max(addr, 0), // ensures that we cannot read from a negative address
|
||||
min(size, addr), // ensures we don't read above the top of the stack
|
||||
)
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -209,7 +222,7 @@ impl Executable for Instruction {
|
||||
Self::LoadByte(a) => {
|
||||
*cpu.reg(a.r2)? = u32::from(
|
||||
cpu.memory
|
||||
.read_byte(cpu.get(a.r1)? + u32::from(a.immediate))?,
|
||||
.read_byte(cpu.get(a.r1)? + u32::from(a.immediate)),
|
||||
);
|
||||
}
|
||||
|
||||
@@ -218,7 +231,7 @@ impl Executable for Instruction {
|
||||
Self::LoadByteSigned(a) => {
|
||||
*cpu.reg(a.r2)? = sign_extend(u32::from(
|
||||
cpu.memory
|
||||
.read_byte(cpu.get(a.r1)? + u32::from(a.immediate))?,
|
||||
.read_byte(cpu.get(a.r1)? + u32::from(a.immediate)),
|
||||
));
|
||||
}
|
||||
|
||||
@@ -257,7 +270,7 @@ impl Executable for Instruction {
|
||||
cpu.memory.write_byte(
|
||||
cpu.get(a.r2)? + u32::from(a.immediate),
|
||||
cpu.get(a.r1)? as u8,
|
||||
)?;
|
||||
);
|
||||
}
|
||||
|
||||
// Stores a half-word from SrcReg in memory address (base + offset) The
|
||||
@@ -266,9 +279,9 @@ impl Executable for Instruction {
|
||||
// split the value into bytes and then write two bytes
|
||||
let bytes = (cpu.get(a.r1)? as u16).to_le_bytes();
|
||||
cpu.memory
|
||||
.write_byte(cpu.get(a.r2)? + u32::from(a.immediate), bytes[0])?;
|
||||
.write_byte(cpu.get(a.r2)? + u32::from(a.immediate), bytes[0]);
|
||||
cpu.memory
|
||||
.write_byte(cpu.get(a.r2)? + u32::from(a.immediate) + 1, bytes[1])?;
|
||||
.write_byte(cpu.get(a.r2)? + u32::from(a.immediate) + 1, bytes[1]);
|
||||
}
|
||||
|
||||
// Stores a word from SrcReg in memory address (base + offset) The effective
|
||||
@@ -349,17 +362,13 @@ impl Executable for Instruction {
|
||||
// Left shifts the value in Reg by the given amount (either a register, or a
|
||||
// literal value)
|
||||
Self::ShiftLeft(a) => {
|
||||
let reg = cpu.get(a.sr1)?;
|
||||
let val = a.shamt;
|
||||
*cpu.reg(a.sr1)? = shl(reg, val);
|
||||
*cpu.reg(a.dr)? = shl(cpu.get(a.sr1)?, a.shamt + cpu.get(a.sr2)? as u8);
|
||||
}
|
||||
|
||||
// Right shifts the value in Reg by the given amount (either a register, or a
|
||||
// literal value).
|
||||
Self::ShiftRight(a) => {
|
||||
let regval = cpu.get(a.sr1)?;
|
||||
let val = a.shamt;
|
||||
*cpu.reg(a.sr1)? = shr(regval, val);
|
||||
*cpu.reg(a.dr)? = shr(cpu.get(a.sr1)?, a.shamt + cpu.get(a.sr2)? as u8);
|
||||
}
|
||||
|
||||
// Adds the value of Src2 to Src1 and writes the result to a.dr
|
||||
|
||||
@@ -81,9 +81,7 @@ fn test_mov_signed_instruction() {
|
||||
fn test_load_byte_instruction() {
|
||||
let mut cpu = create_test_processor();
|
||||
let addr = 0x100;
|
||||
cpu.memory
|
||||
.write_byte(addr, 0xAB)
|
||||
.expect("Failed to write byte to memory");
|
||||
cpu.memory.write_byte(addr, 0xAB);
|
||||
*cpu.reg(Register::Rg1).expect("Failed to get register Rg1") = addr - 4;
|
||||
|
||||
let load_byte_instr = Instruction::LoadByte(ITypeArgs::new(
|
||||
@@ -105,9 +103,7 @@ fn test_load_byte_instruction() {
|
||||
fn test_load_byte_signed_instruction() {
|
||||
let mut cpu = create_test_processor();
|
||||
let addr = 0x100;
|
||||
cpu.memory
|
||||
.write_byte(addr, 0xFF)
|
||||
.expect("Failed to write byte to memory");
|
||||
cpu.memory.write_byte(addr, 0xFF);
|
||||
*cpu.reg(Register::Rg1).expect("Failed to get register Rg1") = addr;
|
||||
|
||||
let load_byte_signed_instr = Instruction::LoadByteSigned(ITypeArgs::new(
|
||||
@@ -189,7 +185,7 @@ fn test_store_byte_instruction() {
|
||||
store_byte_instr.execute(&mut cpu).expect(
|
||||
"Emulator was slain by losing the game while attempting to execute instruction",
|
||||
);
|
||||
assert_eq!(cpu.memory.read_byte(addr).expect("Emulator was slain by losing the game while attempting to execute instruction"), 0xAB);
|
||||
assert_eq!(cpu.memory.read_byte(addr), 0xAB);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -468,7 +464,7 @@ fn test_shift_left_with_shamt() {
|
||||
let shl_instr = Instruction::ShiftLeft(RTypeArgs::new(
|
||||
Some(Register::Rg1),
|
||||
Some(Register::Zero),
|
||||
None,
|
||||
Some(Register::Rg1),
|
||||
Some(2),
|
||||
));
|
||||
|
||||
@@ -489,7 +485,7 @@ fn test_shift_right_with_shamt() {
|
||||
let shr_instr = Instruction::ShiftRight(RTypeArgs::new(
|
||||
Some(Register::Rg1),
|
||||
Some(Register::Zero),
|
||||
None,
|
||||
Some(Register::Rg1),
|
||||
Some(2),
|
||||
));
|
||||
|
||||
|
||||
@@ -117,10 +117,7 @@ impl Editor {
|
||||
.file_name()
|
||||
.unwrap_or_else(|| OsStr::new("Unnamed!"))
|
||||
.to_str()
|
||||
.map_or_else(
|
||||
|| unreachable!("File name should be valid UTF-8."),
|
||||
|ext| ext,
|
||||
);
|
||||
.unwrap_or_else(|| unreachable!("File name should be valid UTF-8."));
|
||||
}
|
||||
"Unnamed!"
|
||||
}
|
||||
@@ -129,12 +126,9 @@ impl Editor {
|
||||
if let Some(path) = &self.path {
|
||||
return path
|
||||
.extension()
|
||||
.map_or_else(|| OsStr::new("Unknown!"), |ext| ext)
|
||||
.unwrap_or_else(|| OsStr::new("Unknown!"))
|
||||
.to_str()
|
||||
.map_or_else(
|
||||
|| unreachable!("File name should be valid UTF-8."),
|
||||
|ext| ext,
|
||||
);
|
||||
.unwrap_or_else(|| unreachable!("File name should be valid UTF-8."));
|
||||
}
|
||||
"Unknown!"
|
||||
}
|
||||
@@ -393,25 +387,20 @@ impl Editor {
|
||||
fn render_editor(&mut self, _state: &mut State, ui: &mut Ui, _ctx: &Context) {
|
||||
let available_width = ui.available_width();
|
||||
let syntax = match self.extension() {
|
||||
"dsa" => Some(Syntax::new("dsa")),
|
||||
_ => None,
|
||||
"dsa" => Syntax::dsa(),
|
||||
"dsc" => Syntax::dsc(),
|
||||
_ => Syntax::default(),
|
||||
};
|
||||
|
||||
let ed = CodeEditor::default()
|
||||
let mut editor = CodeEditor::default()
|
||||
.id_source("editor")
|
||||
.with_fontsize(12.0)
|
||||
.with_rows(0)
|
||||
.with_theme(ColorTheme::default())
|
||||
.with_syntax(Syntax::dsa())
|
||||
.with_syntax(syntax)
|
||||
.with_numlines(true)
|
||||
.desired_width(available_width - 500.0);
|
||||
|
||||
let mut editor = ed.clone();
|
||||
|
||||
if let Some(syntax) = syntax {
|
||||
editor = ed.with_syntax(syntax);
|
||||
}
|
||||
|
||||
editor.show(ui, &mut self.text);
|
||||
}
|
||||
|
||||
@@ -454,7 +443,7 @@ impl Editor {
|
||||
Some("dsc") => {
|
||||
let output_path = Path::new(path).with_extension("dsa");
|
||||
if let Err(e) = compiler::compile_file(path, &output_path) {
|
||||
self.error = Some(format!("Compiler error: {}", e));
|
||||
self.error = Some(format!("Compiler error: {e}"));
|
||||
}
|
||||
|
||||
let mut compiler = CompilerEngine::new();
|
||||
@@ -464,7 +453,7 @@ impl Editor {
|
||||
let instructions = match compiler.wait_for_result() {
|
||||
Ok(instructions) => instructions,
|
||||
Err(e) => {
|
||||
self.error = Some(format!("Assembler error: {}", e));
|
||||
self.error = Some(format!("Assembler error: {e}"));
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use common::prelude::Instruction;
|
||||
use egui::{Context, Ui};
|
||||
|
||||
use crate::emulator::{
|
||||
@@ -57,8 +58,11 @@ impl Component for History {
|
||||
.color(egui::Color32::from_rgb(255, 200, 200)),
|
||||
);
|
||||
|
||||
let decoded = Instruction::decode(instruction.1)
|
||||
.unwrap_or(Instruction::Nop);
|
||||
|
||||
ui.label(
|
||||
egui::RichText::new(instruction.1.to_string())
|
||||
egui::RichText::new(decoded.to_string())
|
||||
.font(egui::FontId::monospace(12.0))
|
||||
.color(egui::Color32::from_rgb(200, 255, 200)),
|
||||
);
|
||||
|
||||
@@ -79,10 +79,7 @@ impl Loader {
|
||||
.file_name()
|
||||
.unwrap_or_else(|| OsStr::new("Unnamed!"))
|
||||
.to_str()
|
||||
.map_or_else(
|
||||
|| unreachable!("File name should be valid UTF-8."),
|
||||
|ext| ext,
|
||||
);
|
||||
.unwrap_or_else(|| unreachable!("File name should be valid UTF-8."));
|
||||
}
|
||||
"Unnamed!"
|
||||
}
|
||||
|
||||
@@ -1,121 +0,0 @@
|
||||
|
||||
// GENERATED BY DSC COMPILER
|
||||
// Generated at 2026-02-04 01:55:11
|
||||
|
||||
// Imports
|
||||
include arena: "./lib/memory/arena_alloc.dsa"
|
||||
include print: "./lib/io/print.dsa"
|
||||
|
||||
// Globals & Reserved Memory
|
||||
|
||||
|
||||
// Entry Point
|
||||
dw stack: 0x10000
|
||||
db message: "Process Exited with code:"
|
||||
_init:
|
||||
ldw stack, bpr
|
||||
mov bpr, spr
|
||||
push zero
|
||||
call main
|
||||
call print::print_newline
|
||||
lwi message, rg0
|
||||
push rg0
|
||||
call print::print
|
||||
pop zero
|
||||
call print::print_hex_word
|
||||
pop zero
|
||||
hlt
|
||||
|
||||
|
||||
// Return
|
||||
_ret:
|
||||
mov bpr, spr
|
||||
pop bpr
|
||||
return
|
||||
|
||||
// Compiled Code Starts...
|
||||
main:
|
||||
push bpr
|
||||
mov spr, bpr
|
||||
|
||||
lli 0, rg0
|
||||
push rg0 // bpr-4: x
|
||||
subi bpr 4 rg1
|
||||
lli 512, rg0
|
||||
push rg1 // bpr-8: y
|
||||
push rg0 // push arg 0
|
||||
call arena::new
|
||||
pop rg2
|
||||
lli 32, rg0
|
||||
push rg2 // bpr-12: alloc
|
||||
push rg0 // push arg 1
|
||||
push rg2 // push arg 0
|
||||
call arena::alloc
|
||||
pop rg3
|
||||
pop zero
|
||||
lli 32, rg0
|
||||
subi bpr 12 rg2
|
||||
ldw rg2, rg2 // bpr-20: alloc
|
||||
push rg2 // bpr-16: alloc
|
||||
push rg3 // bpr-20: ptr1
|
||||
push rg0 // push arg 1
|
||||
push rg2 // push arg 0
|
||||
call arena::alloc
|
||||
pop rg4
|
||||
pop zero
|
||||
subi bpr 16 rg0
|
||||
ldw rg0, rg0 // bpr-24: alloc
|
||||
push rg0 // bpr-24: alloc
|
||||
push rg4 // bpr-28: ptr2
|
||||
push rg0 // push arg 0
|
||||
call print::print_hex_word
|
||||
pop zero
|
||||
call print::print_newline
|
||||
subi bpr 20 rg0
|
||||
ldw rg0, rg0 // bpr-28: ptr1
|
||||
push rg0 // bpr-32: ptr1
|
||||
push rg0 // push arg 0
|
||||
call print::print_hex_word
|
||||
pop zero
|
||||
call print::print_newline
|
||||
subi bpr 28 rg0
|
||||
ldw rg0, rg0 // bpr-36: ptr2
|
||||
push rg0 // bpr-36: ptr2
|
||||
push rg0 // push arg 0
|
||||
call print::print_hex_word
|
||||
pop zero
|
||||
call print::print_newline
|
||||
subi bpr 36 rg0
|
||||
ldw rg0, rg0 // bpr-44: ptr2
|
||||
ldw rg0, rg2
|
||||
push rg0 // bpr-40: ptr2
|
||||
push rg2 // push arg 0
|
||||
call print::print_num
|
||||
pop zero
|
||||
call print::print_newline
|
||||
lli 42, rg2
|
||||
subi bpr 40 rg5
|
||||
ldw rg5, rg5 // bpr-48: ptr2
|
||||
stw rg2, rg5
|
||||
push rg5 // bpr-44: ptr2
|
||||
push rg5 // push arg 0
|
||||
call print::print_hex_word
|
||||
pop zero
|
||||
call print::print_newline
|
||||
subi bpr 44 rg2
|
||||
ldw rg2, rg2 // bpr-52: ptr2
|
||||
ldw rg2, rg5
|
||||
push rg2 // bpr-48: ptr2
|
||||
push rg5 // push arg 0
|
||||
call print::print_num
|
||||
pop zero
|
||||
call print::print_newline
|
||||
db str_12: "end"
|
||||
lwi str_12, rg5
|
||||
push rg5 // push arg 0
|
||||
call print::println
|
||||
pop zero
|
||||
lli 0, rg5
|
||||
stw rg5, bpr, 8
|
||||
jmp _ret
|
||||
|
||||
@@ -28,5 +28,3 @@ fn main() -> u32 {
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -32,3 +32,13 @@ handle_hard_fault:
|
||||
call print::print
|
||||
pop zero
|
||||
hlt
|
||||
|
||||
trigger:
|
||||
push bpr
|
||||
mov spr, bpr
|
||||
|
||||
int 0x01
|
||||
|
||||
mov bpr, spr
|
||||
pop bpr
|
||||
return
|
||||
|
||||
@@ -0,0 +1,216 @@
|
||||
// block_alloc.dsa
|
||||
// Fixed-size block allocator
|
||||
//
|
||||
// Memory layout:
|
||||
// [base + 0]: free list head pointer (pointer to first free block, or 0 if none)
|
||||
// [base + 4]: block size
|
||||
// [base + 8]: total blocks
|
||||
// [base + 12]: base address of block pool
|
||||
// [base + 16+]: block pool (each block starts with a 4-byte next pointer)
|
||||
//
|
||||
// Usage:
|
||||
// include block_alloc "./lib/memory/block_alloc.dsa"
|
||||
//
|
||||
// For init:
|
||||
// push num_blocks (e.g., 32)
|
||||
// push block_size (e.g., 64 bytes)
|
||||
// call block_alloc::init
|
||||
// pop block_size
|
||||
// pop num_blocks
|
||||
// ; result in spr+8 (allocator handle)
|
||||
//
|
||||
// For alloc:
|
||||
// push allocator_handle
|
||||
// call block_alloc::alloc
|
||||
// pop allocator_handle
|
||||
// ; result in spr+8 (pointer to block, or 0 if out of memory)
|
||||
//
|
||||
// For free:
|
||||
// push block_pointer
|
||||
// push allocator_handle
|
||||
// call block_alloc::free
|
||||
// pop allocator_handle
|
||||
// pop block_pointer
|
||||
|
||||
dw heap_start: 0x30000 // Start of our heap area
|
||||
|
||||
// Initialize the allocator
|
||||
// Args: block_size, num_blocks
|
||||
// Returns: allocator handle (pointer to metadata)
|
||||
init:
|
||||
push bpr
|
||||
mov spr, bpr
|
||||
|
||||
ldw bpr, rg0, 8 // block_size
|
||||
ldw bpr, rg1, 12 // num_blocks
|
||||
|
||||
// Allocate metadata (16 bytes) + pool space
|
||||
ldw heap_start, rg2 // base address for this allocator
|
||||
mov rg2, rg3 // save base in rg3
|
||||
|
||||
// Calculate total size needed: 16 + (block_size * num_blocks)
|
||||
// We'll use a simple multiplication by repeated addition
|
||||
mov rg0, rg4 // block_size to rg4
|
||||
mov rg1, rg5 // num_blocks to rg5
|
||||
lli 0, acc // accumulator for multiplication
|
||||
|
||||
_multiply_loop:
|
||||
cmp rg5, zero
|
||||
jeq _multiply_done
|
||||
add acc, rg4, acc
|
||||
dec rg5
|
||||
jmp _multiply_loop
|
||||
|
||||
_multiply_done:
|
||||
// acc now contains block_size * num_blocks
|
||||
addi acc, 16 // add metadata size
|
||||
|
||||
// Update heap_start for next allocation
|
||||
add rg2, acc, acc
|
||||
stw acc, heap_start
|
||||
|
||||
// Now set up metadata at rg3 (base)
|
||||
// [base + 0]: free list head (will point to first block)
|
||||
// [base + 4]: block_size
|
||||
// [base + 8]: total blocks
|
||||
// [base + 12]: pool base address
|
||||
|
||||
addi rg3, 16, rg6 // rg6 = pool base
|
||||
|
||||
stw rg6, rg3 // store pool base as free list head initially
|
||||
stw rg0, rg3, 4 // store block_size
|
||||
stw rg1, rg3, 8 // store total blocks
|
||||
stw rg6, rg3, 12 // store pool base address
|
||||
|
||||
// Now initialize the free list
|
||||
// Each block's first 4 bytes point to the next block
|
||||
// rg6 = current block pointer
|
||||
// rg0 = block_size
|
||||
// rg1 = num_blocks (counter)
|
||||
|
||||
dec rg1 // we'll count down from num_blocks-1
|
||||
|
||||
_init_loop:
|
||||
cmp rg1, zero
|
||||
jeq _init_loop_done
|
||||
|
||||
// Calculate next block address: current + block_size
|
||||
add rg6, rg0, rg7 // rg7 = next block address
|
||||
|
||||
// Store next pointer at current block
|
||||
stw rg7, rg6
|
||||
|
||||
// Move to next block
|
||||
mov rg7, rg6
|
||||
dec rg1
|
||||
jmp _init_loop
|
||||
|
||||
_init_loop_done:
|
||||
// Last block points to null (0)
|
||||
lli 0, acc
|
||||
stw acc, rg6
|
||||
|
||||
// Return allocator handle (base address - 16 to get back to metadata start)
|
||||
stw rg3, bpr, 8
|
||||
|
||||
mov bpr, spr
|
||||
pop bpr
|
||||
return
|
||||
|
||||
|
||||
// Allocate a block
|
||||
// Args: allocator_handle
|
||||
// Returns: pointer to block (or 0 if out of memory)
|
||||
alloc:
|
||||
push bpr
|
||||
mov spr, bpr
|
||||
|
||||
ldw bpr, rg0, 8 // allocator handle (metadata base)
|
||||
|
||||
// Load free list head
|
||||
ldw rg0, rg1 // rg1 = free list head
|
||||
|
||||
// Check if free list is empty
|
||||
cmp rg1, zero
|
||||
jeq _alloc_fail
|
||||
|
||||
// Free list is not empty, pop the first block
|
||||
// Load the next pointer from the block we're allocating
|
||||
ldw rg1, rg2 // rg2 = next free block
|
||||
|
||||
// Update free list head to point to next block
|
||||
stw rg2, rg0
|
||||
|
||||
// Return the allocated block (rg1)
|
||||
stw rg1, bpr, 8
|
||||
jmp _alloc_done
|
||||
|
||||
_alloc_fail:
|
||||
// No free blocks, return 0
|
||||
lli 0, acc
|
||||
stw acc, bpr, 8
|
||||
|
||||
_alloc_done:
|
||||
mov bpr, spr
|
||||
pop bpr
|
||||
return
|
||||
|
||||
|
||||
// Free a block
|
||||
// Args: allocator_handle, block_pointer
|
||||
// Returns: nothing (but could return error code if block is invalid)
|
||||
free:
|
||||
push bpr
|
||||
mov spr, bpr
|
||||
|
||||
ldw bpr, rg0, 8 // allocator handle
|
||||
ldw bpr, rg6, 12 // pointer to the block pointer to free
|
||||
ldw rg6, rg1 // rg1 = block pointer to free
|
||||
|
||||
// Load current free list head
|
||||
ldw rg0, rg2 // rg2 = current head
|
||||
|
||||
// Set the freed block's next pointer to current head
|
||||
stw rg2, rg1
|
||||
|
||||
// Update free list head to point to freed block
|
||||
stw rg1, rg0
|
||||
|
||||
// Update the freed block's previous pointer to NULL
|
||||
lli 0, rg1
|
||||
stw rg1, rg6
|
||||
|
||||
mov bpr, spr
|
||||
pop bpr
|
||||
return
|
||||
|
||||
|
||||
// Debug function: get stats
|
||||
// Args: allocator_handle
|
||||
// Returns: nothing (but could populate a stats structure)
|
||||
get_stats:
|
||||
push bpr
|
||||
mov spr, bpr
|
||||
|
||||
ldw bpr, rg0, 8 // allocator handle
|
||||
|
||||
// Count free blocks by traversing the free list
|
||||
ldw rg0, rg1 // rg1 = free list head
|
||||
lli 0, rg2 // rg2 = counter
|
||||
|
||||
count_loop:
|
||||
cmp rg1, zero
|
||||
jeq count_done
|
||||
|
||||
inc rg2
|
||||
ldw rg1, rg1 // move to next block
|
||||
jmp count_loop
|
||||
|
||||
count_done:
|
||||
// rg2 now contains number of free blocks
|
||||
// Could store this somewhere or return it
|
||||
stw rg2, bpr, 8
|
||||
|
||||
mov bpr, spr
|
||||
pop bpr
|
||||
return
|
||||
+10
-49
@@ -1,51 +1,12 @@
|
||||
// program to just test compute power
|
||||
|
||||
// GENERATED BY DSC COMPILER
|
||||
// Generated at 2026-02-04 01:44:06
|
||||
|
||||
// Imports
|
||||
include print: "./lib/io/print.dsa"
|
||||
include fib: "./lib/maths/fib.dsa"
|
||||
|
||||
// Globals & Reserved Memory
|
||||
|
||||
|
||||
// Entry Point
|
||||
dw stack: 0x10000
|
||||
db message: "Process Exited with code:"
|
||||
_init:
|
||||
ldw stack, bpr
|
||||
mov bpr, spr
|
||||
push zero
|
||||
call main
|
||||
call print::print_newline
|
||||
lwi message, rg0
|
||||
push rg0
|
||||
call print::print
|
||||
pop zero
|
||||
call print::print_hex_word
|
||||
pop zero
|
||||
hlt
|
||||
|
||||
|
||||
// Return
|
||||
_ret:
|
||||
mov bpr, spr
|
||||
pop bpr
|
||||
return
|
||||
|
||||
// Compiled Code Starts...
|
||||
main:
|
||||
push bpr
|
||||
mov spr, bpr
|
||||
|
||||
lli 6, rg0
|
||||
push rg0 // bpr-4: x
|
||||
push rg0 // push arg 0
|
||||
call fib::fib_n
|
||||
pop rg1
|
||||
push rg1 // bpr-8: y
|
||||
push rg1 // push arg 0
|
||||
call print::print_num
|
||||
pop zero
|
||||
jmp _ret
|
||||
dw large_num: 0x333333 // 333,333 instructions
|
||||
start:
|
||||
ldw large_num, rg0
|
||||
|
||||
// run approx 1m instructions
|
||||
loop:
|
||||
dec rg0
|
||||
cmp rg0, zero
|
||||
jgt loop
|
||||
hlt
|
||||
|
||||
+32
-4
@@ -1,9 +1,37 @@
|
||||
include print: "./lib/io/print.dsa";
|
||||
include fib: "./lib/maths/fib.dsa";
|
||||
include alloc: "./lib/memory/block_alloc.dsa";
|
||||
|
||||
fn main() -> u32 {
|
||||
let x: u32 = 6;
|
||||
let allocator: u32 = alloc::init(64, 32);
|
||||
|
||||
let y: u32 = fib::fib_n(x);
|
||||
print::print_num(y);
|
||||
print::print_hex_word(allocator);
|
||||
|
||||
print::print_newline();
|
||||
|
||||
let ptr: u32 = alloc::alloc(allocator);
|
||||
print::print_hex_word(ptr);
|
||||
*ptr = 200;
|
||||
|
||||
print::print_newline();
|
||||
|
||||
let p2: u32 = alloc::alloc(allocator);
|
||||
print::print_hex_word(p2);
|
||||
print::print_newline();
|
||||
print::print_num(*ptr);
|
||||
|
||||
alloc::free(allocator, &ptr);
|
||||
let ptr3: u32 = alloc::alloc(allocator);
|
||||
|
||||
print::print_newline();
|
||||
|
||||
print::print_hex_word(ptr3);
|
||||
|
||||
print::print_newline();
|
||||
print::print_hex_word(ptr);
|
||||
|
||||
if ptr == 0 {
|
||||
print::print("successful free of ptr");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,214 +0,0 @@
|
||||
|
||||
// GENERATED BY DSC COMPILER
|
||||
// Generated at 2026-02-03 23:37:16
|
||||
|
||||
// Imports
|
||||
include print: "./lib/io/print.dsa"
|
||||
|
||||
// Globals & Reserved Memory
|
||||
dw heap_start: 196608
|
||||
dw heap_end: 262144
|
||||
dw heap_current: 196608
|
||||
|
||||
// Entry Point
|
||||
dw stack: 0x10000
|
||||
db message: "Process Exited with code:"
|
||||
_init:
|
||||
ldw stack, bpr
|
||||
mov bpr, spr
|
||||
push zero
|
||||
call main
|
||||
call print::print_newline
|
||||
lwi message, rg0
|
||||
push rg0
|
||||
call print::print
|
||||
pop zero
|
||||
call print::print_hex_word
|
||||
pop zero
|
||||
hlt
|
||||
|
||||
|
||||
// Return
|
||||
_ret:
|
||||
mov bpr, spr
|
||||
pop bpr
|
||||
return
|
||||
|
||||
// Compiled Code Starts...
|
||||
main:
|
||||
push bpr
|
||||
mov spr, bpr
|
||||
|
||||
lli 0, rg0
|
||||
push rg0 // bpr-4: x
|
||||
subi bpr 4 rg1
|
||||
lli 512, rg0
|
||||
push rg1 // bpr-8: y
|
||||
push rg0 // push arg 0
|
||||
call arena_create
|
||||
pop rg2
|
||||
lli 32, rg0
|
||||
push rg2 // bpr-12: alloc
|
||||
push rg0 // push arg 1
|
||||
push rg2 // push arg 0
|
||||
call arena_alloc
|
||||
pop rg3
|
||||
pop zero
|
||||
lli 32, rg0
|
||||
subi bpr 12 rg2
|
||||
ldw rg2, rg2 // bpr-20: alloc
|
||||
push rg3 // bpr-16: ptr1
|
||||
push rg2 // bpr-20: alloc
|
||||
push rg0 // push arg 1
|
||||
push rg2 // push arg 0
|
||||
call arena_alloc
|
||||
pop rg4
|
||||
pop zero
|
||||
subi bpr 20 rg0
|
||||
ldw rg0, rg0 // bpr-28: alloc
|
||||
push rg4 // bpr-24: ptr2
|
||||
push rg0 // bpr-28: alloc
|
||||
push rg0 // push arg 0
|
||||
call print::print_hex_word
|
||||
pop zero
|
||||
call print::print_newline
|
||||
subi bpr 16 rg0
|
||||
ldw rg0, rg0 // bpr-24: ptr1
|
||||
push rg0 // bpr-32: ptr1
|
||||
push rg0 // push arg 0
|
||||
call print::print_hex_word
|
||||
pop zero
|
||||
call print::print_newline
|
||||
subi bpr 24 rg0
|
||||
ldw rg0, rg0 // bpr-32: ptr2
|
||||
push rg0 // bpr-36: ptr2
|
||||
push rg0 // push arg 0
|
||||
call print::print_hex_word
|
||||
pop zero
|
||||
call print::print_newline
|
||||
subi bpr 36 rg0
|
||||
ldw rg0, rg0 // bpr-44: ptr2
|
||||
ldw rg0, rg2
|
||||
push rg0 // bpr-40: ptr2
|
||||
push rg2 // push arg 0
|
||||
call print::print_num
|
||||
pop zero
|
||||
call print::print_newline
|
||||
lli 42, rg2
|
||||
subi bpr 40 rg5
|
||||
ldw rg5, rg5 // bpr-48: ptr2
|
||||
stw rg2, rg5
|
||||
push rg5 // bpr-44: ptr2
|
||||
push rg5 // push arg 0
|
||||
call print::print_hex_word
|
||||
pop zero
|
||||
call print::print_newline
|
||||
subi bpr 44 rg2
|
||||
ldw rg2, rg2 // bpr-52: ptr2
|
||||
ldw rg2, rg5
|
||||
push rg2 // bpr-48: ptr2
|
||||
push rg5 // push arg 0
|
||||
call print::print_num
|
||||
pop zero
|
||||
call print::print_newline
|
||||
db str_1: "end"
|
||||
lwi str_1, rg5
|
||||
push rg5 // push arg 0
|
||||
call print::println
|
||||
pop zero
|
||||
lli 0, rg5
|
||||
stw rg5, bpr, 8
|
||||
jmp _ret
|
||||
|
||||
arena_create:
|
||||
push bpr
|
||||
mov spr, bpr
|
||||
|
||||
ldw bpr, rg0, 8
|
||||
lli 12, rg1
|
||||
add rg0, rg1, rg2
|
||||
ldw heap_current, rg1
|
||||
add rg1, rg2, rg3
|
||||
ldw heap_end, rg4
|
||||
cmp rg3, rg4
|
||||
lli 0, rg5
|
||||
jle _cmp_end_2
|
||||
lli 1, rg5
|
||||
_cmp_end_2:
|
||||
cmp rg5, zero
|
||||
jeq _else_4
|
||||
_then_3:
|
||||
lli 0, rg4
|
||||
stw rg4, bpr, 8
|
||||
jmp _ret
|
||||
jmp _end_5
|
||||
_else_4:
|
||||
nop
|
||||
_end_5:
|
||||
lli 12, rg4
|
||||
add rg1, rg4, rg5
|
||||
add rg1, rg2, rg4
|
||||
stw rg5, rg1
|
||||
lli 4, rg6
|
||||
add rg1, rg6, rg7
|
||||
stw rg5, rg7
|
||||
lli 8, rg6
|
||||
add rg1, rg6, rg7
|
||||
stw rg4, rg7
|
||||
stw rg3, heap_current
|
||||
stw rg1, bpr, 8
|
||||
jmp _ret
|
||||
|
||||
arena_alloc:
|
||||
push bpr
|
||||
mov spr, bpr
|
||||
|
||||
ldw bpr, rg0, 8
|
||||
ldw bpr, rg1, 12
|
||||
lli 4, rg2
|
||||
add rg0, rg2, rg3
|
||||
ldw rg3, rg2
|
||||
lli 8, rg3
|
||||
add rg0, rg3, rg4
|
||||
ldw rg4, rg3
|
||||
add rg2, rg1, rg4
|
||||
cmp rg4, rg3
|
||||
lli 0, rg5
|
||||
jle _cmp_end_6
|
||||
lli 1, rg5
|
||||
_cmp_end_6:
|
||||
cmp rg5, zero
|
||||
jeq _else_8
|
||||
_then_7:
|
||||
lli 0, rg5
|
||||
stw rg5, bpr, 8
|
||||
jmp _ret
|
||||
jmp _end_9
|
||||
_else_8:
|
||||
nop
|
||||
_end_9:
|
||||
lli 4, rg5
|
||||
add rg0, rg5, rg6
|
||||
stw rg4, rg6
|
||||
stw rg2, bpr, 8
|
||||
jmp _ret
|
||||
|
||||
arena_destroy:
|
||||
push bpr
|
||||
mov spr, bpr
|
||||
|
||||
ldw bpr, rg0, 8
|
||||
lli 0, rg1
|
||||
stw rg1, bpr, 8
|
||||
jmp _ret
|
||||
|
||||
reset_all:
|
||||
push bpr
|
||||
mov spr, bpr
|
||||
|
||||
ldw heap_start, rg0
|
||||
stw rg0, heap_current
|
||||
lli 0, rg0
|
||||
stw rg0, bpr, 8
|
||||
jmp _ret
|
||||
|
||||
@@ -0,0 +1,20 @@
|
||||
include print: "./lib/io/print.dsa";
|
||||
|
||||
fn main() -> u32 {
|
||||
let x: u32 = 30;
|
||||
print::print_num(x);
|
||||
|
||||
200 + 5;
|
||||
|
||||
let p: Point = Point {
|
||||
x: 10,
|
||||
y: 20,
|
||||
test: [2, 3, 4]
|
||||
};
|
||||
}
|
||||
|
||||
struct Point {
|
||||
x: u32,
|
||||
y: u32,
|
||||
test: [u32; 3],
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
include print "./lib/io/print.dsa"
|
||||
include print: "./lib/io/print.dsa"
|
||||
|
||||
dw idt: 0xFFFF0000
|
||||
dw stack: 0x10000
|
||||
@@ -57,7 +57,7 @@ start:
|
||||
|
||||
// test reset cursor pos
|
||||
call print::reset
|
||||
|
||||
|
||||
// test print string at reset pos
|
||||
lwi replace, rg0
|
||||
push rg0
|
||||
|
||||
@@ -7,6 +7,5 @@ int factorial(int n) {
|
||||
|
||||
int main() {
|
||||
int res = factorial(3);
|
||||
printnum(res);
|
||||
return 0;
|
||||
return res;
|
||||
}
|
||||
@@ -1,427 +0,0 @@
|
||||
# DSA Assembly Language Instruction Reference
|
||||
|
||||
## Overview
|
||||
|
||||
This document provides a comprehensive reference for the DSA (Damn Simple Architecture) assembly language, including all hardware instructions and pseudo-instructions with their syntax variations and usage examples.
|
||||
|
||||
## Calling Convention
|
||||
|
||||
| Step | Responsibility | Action | Description |
|
||||
|------|----------------|--------|-------------|
|
||||
| 1 | **Caller** | Push arguments | Push exactly n arguments to the stack (in order, last argument pushed first) |
|
||||
| 2 | **Caller** | Call function | Execute `call namespace::function` - this automatically pushes the return address (pcx) and jumps to the function |
|
||||
| 3 | **Function** | Set up stack frame | Execute `push bpr; mov spr, bpr` to establish new stack frame |
|
||||
| 4 | **Function** | Access arguments | Read arguments starting at `spr+8` (first 3 args at offsets 8, 12, 16) |
|
||||
| 5 | **Function** | Execute function | Perform the function's operations using the arguments |
|
||||
| 6 | **Function** | Store return value | Write return value (if any) to `spr+8` |
|
||||
| 7 | **Function** | Restore stack frame | Execute `mov bpr, spr; pop bpr` to restore previous stack frame |
|
||||
| 8 | **Function** | Return | Execute `return` pseudo-instruction to return to caller |
|
||||
| 9 | **Caller** | Clean up stack | Pop exactly n arguments from the stack to clean up |
|
||||
| 10 | **Caller** | Handle unused values | Use `pop zero` to discard any unused stack values if needed |
|
||||
|
||||
**Notes:**
|
||||
- The namespace in step 2 is the name assigned in the `include` statement
|
||||
- The `call` pseudo-instruction automatically handles return address management so long as the callee does not mess with the stack
|
||||
- Arguments are accessed by the callee using offsets from the base pointer (bpr)
|
||||
|
||||
## Registers
|
||||
|
||||
| Register | Type | Description |
|
||||
|----------|------|---------------------------------------------------------------------------------------------------|
|
||||
| `rg0-rgf` | General Purpose | General-purpose registers. |
|
||||
| `acc` | Special | Accumulator for calculations and temporary storage - don't use this for variables as pseudo instructions may overwrite this implicitly! |
|
||||
| `spr` | Special | Stack pointer |
|
||||
| `bpr` | Special | Base pointer for stack frames |
|
||||
| `ret` | Special | Return address register |
|
||||
| `idr` | Privileged | Interrupt descriptor table address<br/>**on-read/write: protection fault (unless in kernel mode)** |
|
||||
| `mmr` | Privileged | Hardware memory map table address<br/>**on-read/write: protection fault (unless in kernel mode)** |
|
||||
| `zero` | Read-only | Always contains zero<br/>**on-read: always returns zero**<br/>**on-write: value is voided** |
|
||||
| `pcx` | Read-only | Program counter<br/>**on-write: protection fault** |
|
||||
| `noreg` | Placeholder | Indicates absence of register argument<br/>**on-read/write: illegal instruction fault** |
|
||||
|
||||
## Hardware Instructions
|
||||
|
||||
### Data Movement Instructions
|
||||
|
||||
| Mnemonic | Operands | Description |
|
||||
|----------|----------|-------------|
|
||||
| **MOV** | `src_reg, dest_reg` | Copy value from source to destination register |
|
||||
| **MOVS** | `src_reg, dest_reg` | Copy with sign extension |
|
||||
|
||||
**Examples:**
|
||||
```asm
|
||||
mov rg0, rg1 ; Copy rg0 to rg1
|
||||
movs rg0, rg1 ; Copy rg0 to rg1 with sign extension
|
||||
```
|
||||
### Memory Access Instructions
|
||||
|
||||
#### Load Instructions
|
||||
|
||||
| Mnemonic | Operands | Description |
|
||||
|----------|----------|-------------|
|
||||
| **LDB** | `base_reg, dest_reg [, offset]`<br>`label, dest_reg [, offset]` | Load byte from memory |
|
||||
| **LDBS** | `base_reg, dest_reg [, offset]`<br>`label, dest_reg [, offset]` | Load byte with sign extension |
|
||||
| **LDH** | `base_reg, dest_reg [, offset]`<br>`label, dest_reg [, offset]` | Load half-word (16-bit) |
|
||||
| **LDHS** | `base_reg, dest_reg [, offset]`<br>`label, dest_reg [, offset]` | Load half-word with sign extension |
|
||||
| **LDW** | `base_reg, dest_reg [, offset]`<br>`label, dest_reg [, offset]` | Load word (32-bit) |
|
||||
|
||||
**Examples:**
|
||||
```asm
|
||||
; Direct register addressing
|
||||
ldb rg0, rg1 ; Load byte from address in rg0
|
||||
ldw rg0, rg1, 8 ; Load word from (rg0 + 8)
|
||||
|
||||
; Label addressing
|
||||
ldb buffer, rg2 ; Load byte from label 'buffer'
|
||||
ldw stack, bpr ; Load stack address into base pointer
|
||||
```
|
||||
**Label Expansions:**
|
||||
```asm
|
||||
; ldb buffer, rg2 expands to:
|
||||
lli buffer, rg2 ; Load lower 16 bits of buffer address
|
||||
lui buffer, rg2 ; Load upper 16 bits of buffer address
|
||||
ldb rg2, rg2 ; Load byte from address in rg2
|
||||
|
||||
; ldw stack, bpr expands to:
|
||||
lli stack, bpr ; Load lower 16 bits of stack address
|
||||
lui stack, bpr ; Load upper 16 bits of stack address
|
||||
ldw bpr, bpr ; Load word from address in bpr
|
||||
```
|
||||
#### Store Instructions
|
||||
|
||||
| Mnemonic | Operands | Description |
|
||||
|----------|----------|-------------|
|
||||
| **STB** | `src_reg, base_reg [, offset]`<br>`src_reg, label [, offset]` | Store byte to memory |
|
||||
| **STH** | `src_reg, base_reg [, offset]`<br>`src_reg, label [, offset]` | Store half-word to memory |
|
||||
| **STW** | `src_reg, base_reg [, offset]`<br>`src_reg, label [, offset]` | Store word to memory |
|
||||
|
||||
**Examples:**
|
||||
```asm
|
||||
; Direct register addressing
|
||||
stb rg0, rg1 ; Store byte from rg0 to address in rg1
|
||||
stw rg0, rg1, 12 ; Store word to (rg1 + 12)
|
||||
|
||||
; Label addressing
|
||||
stb acc, buffer ; Store byte from accumulator to 'buffer'
|
||||
stw rg1, current ; Store word to 'current' variable
|
||||
```
|
||||
**Label Expansions:**
|
||||
```asm
|
||||
; stb acc, buffer expands to:
|
||||
lli buffer, rgf ; Load lower 16 bits of buffer address
|
||||
lui buffer, rgf ; Load upper 16 bits of buffer address
|
||||
stb acc, rgf ; Store byte from acc to address in rgf
|
||||
|
||||
; stw rg1, current expands to:
|
||||
lli current, rgf ; Load lower 16 bits of current address
|
||||
lui current, rgf ; Load upper 16 bits of current address
|
||||
stw rg1, rgf ; Store word from rg1 to address in rgf
|
||||
```
|
||||
### Immediate Load Instructions
|
||||
|
||||
| Mnemonic | Operands | Description |
|
||||
|----------|----------|------------------------------------------------------------------------|
|
||||
| **LLI** | `imm, dest_reg` | Load 16-bit immediate into lower 16 bits<br/>**Clears upper 16 bits!** |
|
||||
| **LUI** | `imm, dest_reg` | Load 16-bit immediate into upper 16 bits |
|
||||
|
||||
**Usage**
|
||||
|
||||
ensure that you always run **Lli** before **Lui** as **Lli** clears the upper 16 bits.
|
||||
|
||||
**Examples:**
|
||||
```asm
|
||||
lli 0x1234, rg0 ; Load 0x1234 into lower 16 bits of rg0
|
||||
lui 0xABCD, rg0 ; Load 0xABCD into upper 16 bits of rg0
|
||||
```
|
||||
### Jump Instructions
|
||||
|
||||
| Mnemonic | Operands | Description |
|
||||
|----------|----------|-------------|
|
||||
| **JMP** | `addr [, offset_reg]`<br>`imm, offset_reg` | Unconditional jump |
|
||||
| **JEQ** | `addr [, offset_reg]` | Jump if equal flag set |
|
||||
| **JNE** | `addr [, offset_reg]` | Jump if not equal flag set |
|
||||
| **JGT** | `addr [, offset_reg]` | Jump if greater than flag set |
|
||||
| **JGE** | `addr [, offset_reg]` | Jump if greater or equal flags set |
|
||||
| **JLT** | `addr [, offset_reg]` | Jump if less than flag set |
|
||||
| **JLE** | `addr [, offset_reg]` | Jump if less or equal flags set |
|
||||
|
||||
**Examples:**
|
||||
```asm
|
||||
jmp start ; Jump to label 'start'
|
||||
jmp 4, ret ; Jump to address (4 + ret register)
|
||||
jeq end ; Jump to 'end' if equal flag set
|
||||
jgt loop ; Jump to 'loop' if greater than flag set
|
||||
```
|
||||
### Arithmetic Instructions
|
||||
|
||||
| Mnemonic | Operands | Description |
|
||||
|----------|----------|-------------|
|
||||
| **ADD** | `src1_reg, src2_reg, dest_reg` | Addition |
|
||||
| **SUB** | `src1_reg, src2_reg, dest_reg` | Subtraction |
|
||||
| **IADD** | `src_reg, imm [, dest_reg]` | Immediate addition |
|
||||
| **ISUB** | `src_reg, imm [, dest_reg]` | Immediate subtraction |
|
||||
| **INC** | `reg` | Increment register by 1 |
|
||||
| **DEC** | `reg` | Decrement register by 1 |
|
||||
|
||||
**Examples:**
|
||||
```asm
|
||||
add rg0, rg1, rg2 ; rg2 = rg0 + rg1
|
||||
sub rg0, rg1, rg2 ; rg2 = rg0 - rg1
|
||||
iadd rg0, 10 ; rg0 = rg0 + 10
|
||||
// or using alternate syntax
|
||||
addi rg0, 1 ; rg0 = rg0 + 1
|
||||
inc rg0 ; rg0 = rg0 + 1
|
||||
```
|
||||
### Bitwise Operations
|
||||
|
||||
| Mnemonic | Operands | Description |
|
||||
|----------|----------|-------------|
|
||||
| **AND** | `src1_reg, src2_reg, dest_reg` | Bitwise AND |
|
||||
| **OR** | `src1_reg, src2_reg, dest_reg` | Bitwise OR |
|
||||
| **XOR** | `src1_reg, src2_reg, dest_reg` | Bitwise XOR |
|
||||
| **NOT** | `src_reg, dest_reg` | Bitwise NOT |
|
||||
| **NAND** | `src1_reg, src2_reg, dest_reg` | Bitwise NAND |
|
||||
| **NOR** | `src1_reg, src2_reg, dest_reg` | Bitwise NOR |
|
||||
| **XNOR** | `src1_reg, src2_reg, dest_reg` | Bitwise XNOR |
|
||||
|
||||
**Examples:**
|
||||
```asm
|
||||
and rg0, rg1, rg2 ; rg2 = rg0 & rg1
|
||||
not rg0, rg1 ; rg1 = ~rg0
|
||||
```
|
||||
### Shift Operations
|
||||
|
||||
| Mnemonic | Operands | Description |
|
||||
|----------|----------|-------------|
|
||||
| **SHL** | `reg, shift_amount` | Shift left |
|
||||
| **SHR** | `reg, shift_amount` | Shift right |
|
||||
|
||||
**Examples:**
|
||||
```asm
|
||||
shl rg0, 2 ; Shift rg0 left by 2 bits
|
||||
shr rg0, 3 ; Shift rg0 right by 3 bits
|
||||
```
|
||||
### Comparison and Control
|
||||
|
||||
| Mnemonic | Operands | Description |
|
||||
|----------|----------|-------------|
|
||||
| **CMP** | `reg1, reg2` | Compare registers and set flags |
|
||||
|
||||
**Examples:**
|
||||
```asm
|
||||
cmp rg0, zero ; Compare rg0 with zero register
|
||||
cmp rg1, rg2 ; Compare rg1 with rg2
|
||||
```
|
||||
### System Instructions
|
||||
|
||||
| Mnemonic | Operands | Description |
|
||||
|----------|----------|-------------|
|
||||
| **HLT** | - | Halt processor execution |
|
||||
| **NOP** | - | No operation |
|
||||
| **INT** | `interrupt_code` | Trigger interrupt |
|
||||
| **IRT** | - | Return from interrupt |
|
||||
|
||||
**Examples:**
|
||||
```asm
|
||||
hlt ; Stop processor execution
|
||||
int 0x21 ; Trigger interrupt 0x21
|
||||
```
|
||||
## Pseudo-Instructions
|
||||
|
||||
### Data Definition
|
||||
|
||||
| Mnemonic | Syntax | Description |
|
||||
|----------|--------|-------------|
|
||||
| **DB** | `name: value1 [, value2, ...]` | Define bytes |
|
||||
| **DH** | `name: value1 [, value2, ...]` | Define half-words |
|
||||
| **DW** | `name: value1 [, value2, ...]` | Define words |
|
||||
|
||||
**Examples:**
|
||||
```asm
|
||||
db message: "Hello World", 0
|
||||
dh numbers: 1000, 2000, 3000
|
||||
dw stack: 0x10000
|
||||
```
|
||||
### Memory Reservation
|
||||
|
||||
| Mnemonic | Syntax | Description |
|
||||
|----------|--------|-------------|
|
||||
| **RESB** | `name: size` | Reserve bytes |
|
||||
| **RESH** | `name: size` | Reserve half-words |
|
||||
| **RESW** | `name: size` | Reserve words |
|
||||
|
||||
**Examples:**
|
||||
```asm
|
||||
resb buffer: 256 ; Reserve 256 bytes
|
||||
resh array: 100 ; Reserve space for 100 half-words
|
||||
resw heap: 1024 ; Reserve space for 1024 words
|
||||
```
|
||||
### Stack Operations
|
||||
|
||||
| Mnemonic | Operands | Description |
|
||||
|----------|----------|-------------|
|
||||
| **PUSH** | `reg` | Push register value onto stack |
|
||||
| **POP** | `reg` | Pop stack value into register |
|
||||
|
||||
**Examples:**
|
||||
```asm
|
||||
push rg0 ; Push rg0 value onto stack
|
||||
pop ret ; Pop return address
|
||||
```
|
||||
### Memory Access Shortcuts
|
||||
|
||||
| Mnemonic | Operands | Description |
|
||||
|----------|----------|-------------|
|
||||
| **LWI** | `name, reg` | Load address into register |
|
||||
|
||||
**Examples:**
|
||||
```asm
|
||||
lwi string, rg1 ; Load address of 'string' into rg1
|
||||
```
|
||||
|
||||
### Function Control
|
||||
|
||||
| Mnemonic | Operands | Description |
|
||||
|----------|----------|-------------|
|
||||
| **CALL** | `namespace::function` | Call a function with automatic return address management |
|
||||
| **RETURN** | - | Return from a function to the caller |
|
||||
|
||||
**Examples:**
|
||||
```asm
|
||||
call print::print ; Call the print function from the print namespace
|
||||
return ; Return from the current function
|
||||
```
|
||||
|
||||
### Module System
|
||||
|
||||
| Mnemonic | Syntax | Description |
|
||||
|----------|--------|-------------|
|
||||
| **INCLUDE** | `module_name "path"` | Include module |
|
||||
|
||||
**Examples:**
|
||||
```asm
|
||||
include print "print.dsa"
|
||||
include fib "fib.dsa"
|
||||
```
|
||||
## Library Examples
|
||||
|
||||
### Multiplication Library (multiply.dsa)
|
||||
|
||||
```asm
|
||||
// multiply.dsa
|
||||
// usage:
|
||||
//
|
||||
// include multiply "<relative path>"
|
||||
//
|
||||
// usage for multiply:
|
||||
// push (arg1)
|
||||
// push (arg0)
|
||||
// call multiply::multiply
|
||||
// pop (arg0)
|
||||
// pop (arg1)
|
||||
|
||||
multiply:
|
||||
push bpr
|
||||
mov spr, bpr
|
||||
|
||||
ldw bpr, rg0, 8 // load op 1
|
||||
ldw bpr, rg1, 12 // load op 2
|
||||
|
||||
lli 0, acc // initialize accumulator
|
||||
|
||||
start:
|
||||
add acc, rg0, acc
|
||||
dec rg1
|
||||
|
||||
cmp rg1, zero
|
||||
jgt start
|
||||
|
||||
end:
|
||||
stw acc, bpr, 8 // store result for caller
|
||||
mov bpr, spr
|
||||
pop bpr
|
||||
return
|
||||
```
|
||||
|
||||
### Print Library (print.dsa)
|
||||
|
||||
```asm
|
||||
// print.dsa
|
||||
// usage:
|
||||
//
|
||||
// include print "<relative path>"
|
||||
//
|
||||
// usage for print:
|
||||
// push (register containing address of string)
|
||||
// call print::print
|
||||
// pop zero
|
||||
//
|
||||
// usage for reset:
|
||||
// call print::reset
|
||||
|
||||
dw display: 0x20000
|
||||
dw current: 0x20000
|
||||
|
||||
// prints the given text to the screen.
|
||||
print:
|
||||
push bpr
|
||||
mov spr, bpr
|
||||
|
||||
ldw bpr, rg0, 8 // get string address argument
|
||||
ldw current, rg1 // get current display position
|
||||
|
||||
print_loop:
|
||||
ldb rg0, acc
|
||||
stb acc, rg1
|
||||
|
||||
iadd rg0, 1
|
||||
iadd rg1, 1
|
||||
|
||||
cmp acc, zero
|
||||
jne print_loop
|
||||
jmp end
|
||||
|
||||
// return
|
||||
end:
|
||||
stw rg1, current
|
||||
|
||||
mov bpr, spr
|
||||
pop bpr
|
||||
return
|
||||
|
||||
// resets the cursor position on the screen
|
||||
reset:
|
||||
push bpr
|
||||
mov spr, bpr
|
||||
ldw display, rg1
|
||||
stw rg1, current
|
||||
mov bpr, spr
|
||||
pop bpr
|
||||
return
|
||||
```
|
||||
|
||||
### Example Program (main.dsa)
|
||||
|
||||
```asm
|
||||
include print "./print.dsa"
|
||||
|
||||
dw stack: 0x10000
|
||||
db string: "'To confuse your enemy, you must first confuse yourself' - Probably Sun Tzu."
|
||||
|
||||
init:
|
||||
// set up a stack.
|
||||
ldw stack, bpr
|
||||
mov bpr, spr
|
||||
|
||||
start:
|
||||
lwi string, rg1
|
||||
|
||||
// push string address argument
|
||||
push rg1
|
||||
// call print function
|
||||
call print::print
|
||||
// clean up stack
|
||||
pop rg1
|
||||
|
||||
hlt
|
||||
```
|
||||
@@ -1,10 +0,0 @@
|
||||
# DSA File formatting specification.
|
||||
|
||||
First, a clarification on what formats this document references.
|
||||
|
||||
- .dsb: DSA Binary object, similar to a .o object file
|
||||
- .dse: DSA Executable file, similar to a .exe/ELF binary
|
||||
|
||||
## Format Specification
|
||||
|
||||
### DSB binary format
|
||||
-53
@@ -1,53 +0,0 @@
|
||||
use std::{
|
||||
sync::{Arc, Mutex},
|
||||
thread,
|
||||
};
|
||||
|
||||
use dsa_rs::emulator::{
|
||||
system::{emulator::run_emulator, memory::MainStore, processor::Processor},
|
||||
ui::{
|
||||
control_unit::ControlPanel, interface::EmulatorUI, memory_inspector::MemoryInspector,
|
||||
stack_inspector::StackInspector,
|
||||
},
|
||||
};
|
||||
|
||||
fn main() -> Result<(), eframe::Error> {
|
||||
// Initialize Channels
|
||||
let (cmd_sender, cmd_receiver) = std::sync::mpsc::channel();
|
||||
let (state_sender, state_receiver) = std::sync::mpsc::channel();
|
||||
|
||||
let mainstore = MainStore::new();
|
||||
let processor = Processor::new(Box::new(mainstore), vec![]);
|
||||
|
||||
thread::spawn(move || {
|
||||
run_emulator(&cmd_receiver, &state_sender, processor);
|
||||
});
|
||||
|
||||
// Create UI
|
||||
let mut ui = EmulatorUI::new(cmd_sender.clone(), state_receiver);
|
||||
|
||||
// Create UI modules
|
||||
let control_unit = ControlPanel::new(cmd_sender.clone());
|
||||
ui.add_component(Box::new(control_unit));
|
||||
|
||||
let mem_inspector = MemoryInspector::new(cmd_sender.clone());
|
||||
ui.add_component(Box::new(mem_inspector));
|
||||
|
||||
let stack_inspector = StackInspector::new();
|
||||
ui.add_component(Box::new(stack_inspector));
|
||||
|
||||
// Run UI
|
||||
let options = eframe::NativeOptions {
|
||||
viewport: egui::ViewportBuilder::default().with_inner_size([800.0, 600.0]),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
eframe::run_native(
|
||||
"DSA Simulator (Damn Simple Architecture 🔥)",
|
||||
options,
|
||||
Box::new(move |cc| {
|
||||
cc.egui_ctx.set_visuals(egui::Visuals::default());
|
||||
Ok(Box::new(ui))
|
||||
}),
|
||||
)
|
||||
}
|
||||
Reference in New Issue
Block a user