Compare commits
6 Commits
a1099249e9
...
b8abbfd02f
| Author | SHA1 | Date | |
|---|---|---|---|
| b8abbfd02f | |||
| c2bf9f6667 | |||
| 2f91c4127c | |||
| 89762b54e3 | |||
| a35cfbe864 | |||
| 8d130a870c |
+1
-1
@@ -1,7 +1,7 @@
|
|||||||
cargo-features = ["codegen-backend"]
|
cargo-features = ["codegen-backend"]
|
||||||
|
|
||||||
[workspace]
|
[workspace]
|
||||||
members = ["emulator", "common", "assembler", "dsa_editor", "compiler", "c_compiler"]
|
members = ["emulator", "common", "assembler", "dsa_editor", "compiler"]
|
||||||
resolver = "3"
|
resolver = "3"
|
||||||
|
|
||||||
[workspace.package]
|
[workspace.package]
|
||||||
|
|||||||
@@ -1,8 +0,0 @@
|
|||||||
[package]
|
|
||||||
name = "c_compiler"
|
|
||||||
version.workspace = true
|
|
||||||
edition.workspace = true
|
|
||||||
authors.workspace = true
|
|
||||||
|
|
||||||
[dependencies]
|
|
||||||
chrono = "0.4.42"
|
|
||||||
@@ -1,14 +0,0 @@
|
|||||||
int var_x = 5;
|
|
||||||
|
|
||||||
int factorial(int n) {
|
|
||||||
if (n <= 1) {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
return n * factorial(n - 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
int main() {
|
|
||||||
int result = var_x + factorial(5);
|
|
||||||
print(result);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
@@ -1,926 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
Simple C to DSA Assembly Compiler
|
|
||||||
Supports a subset of C including:
|
|
||||||
- int variables and functions
|
|
||||||
- Arithmetic operations (+, -, *, /)
|
|
||||||
- Comparisons (==, !=, <, >, <=, >=)
|
|
||||||
- If/else statements
|
|
||||||
- While loops
|
|
||||||
- Function calls
|
|
||||||
- Return statements
|
|
||||||
"""
|
|
||||||
|
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
from typing import List, Dict, Optional, Tuple
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from enum import Enum
|
|
||||||
from pprint import pprint
|
|
||||||
import json
|
|
||||||
|
|
||||||
|
|
||||||
class TokenType(Enum):
|
|
||||||
# Keywords
|
|
||||||
INT = "int"
|
|
||||||
IF = "if"
|
|
||||||
ELSE = "else"
|
|
||||||
WHILE = "while"
|
|
||||||
RETURN = "return"
|
|
||||||
|
|
||||||
# Identifiers and literals
|
|
||||||
IDENTIFIER = "IDENTIFIER"
|
|
||||||
NUMBER = "NUMBER"
|
|
||||||
|
|
||||||
# Operators
|
|
||||||
PLUS = "+"
|
|
||||||
MINUS = "-"
|
|
||||||
STAR = "*"
|
|
||||||
SLASH = "/"
|
|
||||||
ASSIGN = "="
|
|
||||||
EQ = "=="
|
|
||||||
NE = "!="
|
|
||||||
LT = "<"
|
|
||||||
GT = ">"
|
|
||||||
LE = "<="
|
|
||||||
GE = ">="
|
|
||||||
|
|
||||||
# Delimiters
|
|
||||||
LPAREN = "("
|
|
||||||
RPAREN = ")"
|
|
||||||
LBRACE = "{"
|
|
||||||
RBRACE = "}"
|
|
||||||
SEMICOLON = ";"
|
|
||||||
COMMA = ","
|
|
||||||
|
|
||||||
EOF = "EOF"
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Token:
|
|
||||||
type: TokenType
|
|
||||||
value: str
|
|
||||||
line: int
|
|
||||||
col: int
|
|
||||||
|
|
||||||
|
|
||||||
class Lexer:
|
|
||||||
def __init__(self, source: str):
|
|
||||||
self.source = source
|
|
||||||
self.pos = 0
|
|
||||||
self.line = 1
|
|
||||||
self.col = 1
|
|
||||||
self.tokens = []
|
|
||||||
|
|
||||||
def error(self, msg: str):
|
|
||||||
raise SyntaxError(f"Lexer error at line {self.line}, col {self.col}: {msg}")
|
|
||||||
|
|
||||||
def peek(self, offset: int = 0) -> Optional[str]:
|
|
||||||
pos = self.pos + offset
|
|
||||||
return self.source[pos] if pos < len(self.source) else None
|
|
||||||
|
|
||||||
def advance(self) -> Optional[str]:
|
|
||||||
if self.pos >= len(self.source):
|
|
||||||
return None
|
|
||||||
char = self.source[self.pos]
|
|
||||||
self.pos += 1
|
|
||||||
if char == "\n":
|
|
||||||
self.line += 1
|
|
||||||
self.col = 1
|
|
||||||
else:
|
|
||||||
self.col += 1
|
|
||||||
return char
|
|
||||||
|
|
||||||
def skip_whitespace(self):
|
|
||||||
while self.peek() and self.peek() in " \t\n\r":
|
|
||||||
self.advance()
|
|
||||||
|
|
||||||
def skip_comment(self):
|
|
||||||
if self.peek() == "/" and self.peek(1) == "/":
|
|
||||||
while self.peek() and self.peek() != "\n":
|
|
||||||
self.advance()
|
|
||||||
self.advance() # skip newline
|
|
||||||
|
|
||||||
def read_number(self) -> str:
|
|
||||||
num = ""
|
|
||||||
while self.peek() and self.peek().isdigit():
|
|
||||||
num += self.advance()
|
|
||||||
return num
|
|
||||||
|
|
||||||
def read_identifier(self) -> str:
|
|
||||||
ident = ""
|
|
||||||
while self.peek() and (self.peek().isalnum() or self.peek() == "_"):
|
|
||||||
ident += self.advance()
|
|
||||||
return ident
|
|
||||||
|
|
||||||
def tokenize(self) -> List[Token]:
|
|
||||||
keywords = {
|
|
||||||
"int": TokenType.INT,
|
|
||||||
"if": TokenType.IF,
|
|
||||||
"else": TokenType.ELSE,
|
|
||||||
"while": TokenType.WHILE,
|
|
||||||
"return": TokenType.RETURN,
|
|
||||||
}
|
|
||||||
|
|
||||||
while self.pos < len(self.source):
|
|
||||||
self.skip_whitespace()
|
|
||||||
self.skip_comment()
|
|
||||||
|
|
||||||
if self.pos >= len(self.source):
|
|
||||||
break
|
|
||||||
|
|
||||||
line, col = self.line, self.col
|
|
||||||
char = self.peek()
|
|
||||||
|
|
||||||
# Numbers
|
|
||||||
if char.isdigit():
|
|
||||||
num = self.read_number()
|
|
||||||
self.tokens.append(Token(TokenType.NUMBER, num, line, col))
|
|
||||||
|
|
||||||
# Identifiers and keywords
|
|
||||||
elif char.isalpha() or char == "_":
|
|
||||||
ident = self.read_identifier()
|
|
||||||
token_type = keywords.get(ident, TokenType.IDENTIFIER)
|
|
||||||
self.tokens.append(Token(token_type, ident, line, col))
|
|
||||||
|
|
||||||
# Two-character operators
|
|
||||||
elif char == "=" and self.peek(1) == "=":
|
|
||||||
self.advance()
|
|
||||||
self.advance()
|
|
||||||
self.tokens.append(Token(TokenType.EQ, "==", line, col))
|
|
||||||
elif char == "!" and self.peek(1) == "=":
|
|
||||||
self.advance()
|
|
||||||
self.advance()
|
|
||||||
self.tokens.append(Token(TokenType.NE, "!=", line, col))
|
|
||||||
elif char == "<" and self.peek(1) == "=":
|
|
||||||
self.advance()
|
|
||||||
self.advance()
|
|
||||||
self.tokens.append(Token(TokenType.LE, "<=", line, col))
|
|
||||||
elif char == ">" and self.peek(1) == "=":
|
|
||||||
self.advance()
|
|
||||||
self.advance()
|
|
||||||
self.tokens.append(Token(TokenType.GE, ">=", line, col))
|
|
||||||
|
|
||||||
# Single-character operators
|
|
||||||
elif char == "+":
|
|
||||||
self.advance()
|
|
||||||
self.tokens.append(Token(TokenType.PLUS, "+", line, col))
|
|
||||||
elif char == "-":
|
|
||||||
self.advance()
|
|
||||||
self.tokens.append(Token(TokenType.MINUS, "-", line, col))
|
|
||||||
elif char == "*":
|
|
||||||
self.advance()
|
|
||||||
self.tokens.append(Token(TokenType.STAR, "*", line, col))
|
|
||||||
elif char == "/":
|
|
||||||
self.advance()
|
|
||||||
self.tokens.append(Token(TokenType.SLASH, "/", line, col))
|
|
||||||
elif char == "=":
|
|
||||||
self.advance()
|
|
||||||
self.tokens.append(Token(TokenType.ASSIGN, "=", line, col))
|
|
||||||
elif char == "<":
|
|
||||||
self.advance()
|
|
||||||
self.tokens.append(Token(TokenType.LT, "<", line, col))
|
|
||||||
elif char == ">":
|
|
||||||
self.advance()
|
|
||||||
self.tokens.append(Token(TokenType.GT, ">", line, col))
|
|
||||||
elif char == "(":
|
|
||||||
self.advance()
|
|
||||||
self.tokens.append(Token(TokenType.LPAREN, "(", line, col))
|
|
||||||
elif char == ")":
|
|
||||||
self.advance()
|
|
||||||
self.tokens.append(Token(TokenType.RPAREN, ")", line, col))
|
|
||||||
elif char == "{":
|
|
||||||
self.advance()
|
|
||||||
self.tokens.append(Token(TokenType.LBRACE, "{", line, col))
|
|
||||||
elif char == "}":
|
|
||||||
self.advance()
|
|
||||||
self.tokens.append(Token(TokenType.RBRACE, "}", line, col))
|
|
||||||
elif char == ";":
|
|
||||||
self.advance()
|
|
||||||
self.tokens.append(Token(TokenType.SEMICOLON, ";", line, col))
|
|
||||||
elif char == ",":
|
|
||||||
self.advance()
|
|
||||||
self.tokens.append(Token(TokenType.COMMA, ",", line, col))
|
|
||||||
else:
|
|
||||||
self.error(f"Unexpected character: {char}")
|
|
||||||
|
|
||||||
self.tokens.append(Token(TokenType.EOF, "", self.line, self.col))
|
|
||||||
return self.tokens
|
|
||||||
|
|
||||||
|
|
||||||
# AST Node classes
|
|
||||||
@dataclass
|
|
||||||
class ASTNode:
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Program(ASTNode):
|
|
||||||
declarations: List["Declaration"]
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Declaration(ASTNode):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class FunctionDecl(Declaration):
|
|
||||||
name: str
|
|
||||||
params: List[str]
|
|
||||||
body: "CompoundStmt"
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class VarDecl(Declaration):
|
|
||||||
name: str
|
|
||||||
init: Optional["Expression"] = None
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Statement(ASTNode):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class CompoundStmt(Statement):
|
|
||||||
statements: List[Statement]
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class ExprStmt(Statement):
|
|
||||||
expr: Optional["Expression"]
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class IfStmt(Statement):
|
|
||||||
condition: "Expression"
|
|
||||||
then_stmt: Statement
|
|
||||||
else_stmt: Optional[Statement] = None
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class WhileStmt(Statement):
|
|
||||||
condition: "Expression"
|
|
||||||
body: Statement
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class ReturnStmt(Statement):
|
|
||||||
expr: Optional["Expression"]
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Expression(ASTNode):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class BinaryOp(Expression):
|
|
||||||
op: str
|
|
||||||
left: Expression
|
|
||||||
right: Expression
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class UnaryOp(Expression):
|
|
||||||
op: str
|
|
||||||
operand: Expression
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class AssignExpr(Expression):
|
|
||||||
name: str
|
|
||||||
value: Expression
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class VarExpr(Expression):
|
|
||||||
name: str
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class NumberExpr(Expression):
|
|
||||||
value: int
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class CallExpr(Expression):
|
|
||||||
name: str
|
|
||||||
args: List[Expression]
|
|
||||||
|
|
||||||
|
|
||||||
class Parser:
|
|
||||||
def __init__(self, tokens: List[Token]):
|
|
||||||
self.tokens = tokens
|
|
||||||
self.pos = 0
|
|
||||||
|
|
||||||
def error(self, msg: str):
|
|
||||||
token = self.current()
|
|
||||||
raise SyntaxError(f"Parser error at line {token.line}, col {token.col}: {msg}")
|
|
||||||
|
|
||||||
def current(self) -> Token:
|
|
||||||
return self.tokens[self.pos] if self.pos < len(self.tokens) else self.tokens[-1]
|
|
||||||
|
|
||||||
def peek(self, offset: int = 0) -> Token:
|
|
||||||
pos = self.pos + offset
|
|
||||||
return self.tokens[pos] if pos < len(self.tokens) else self.tokens[-1]
|
|
||||||
|
|
||||||
def advance(self) -> Token:
|
|
||||||
token = self.current()
|
|
||||||
if self.pos < len(self.tokens) - 1:
|
|
||||||
self.pos += 1
|
|
||||||
return token
|
|
||||||
|
|
||||||
def expect(self, token_type: TokenType) -> Token:
|
|
||||||
token = self.current()
|
|
||||||
if token.type != token_type:
|
|
||||||
self.error(f"Expected {token_type.value}, got {token.type.value}")
|
|
||||||
return self.advance()
|
|
||||||
|
|
||||||
def parse(self) -> Program:
|
|
||||||
declarations = []
|
|
||||||
while self.current().type != TokenType.EOF:
|
|
||||||
declarations.append(self.parse_declaration())
|
|
||||||
return Program(declarations)
|
|
||||||
|
|
||||||
def parse_declaration(self) -> Declaration:
|
|
||||||
self.expect(TokenType.INT)
|
|
||||||
name = self.expect(TokenType.IDENTIFIER).value
|
|
||||||
|
|
||||||
if self.current().type == TokenType.LPAREN:
|
|
||||||
# Function declaration
|
|
||||||
self.advance()
|
|
||||||
params = []
|
|
||||||
|
|
||||||
if self.current().type != TokenType.RPAREN:
|
|
||||||
self.expect(TokenType.INT)
|
|
||||||
params.append(self.expect(TokenType.IDENTIFIER).value)
|
|
||||||
|
|
||||||
while self.current().type == TokenType.COMMA:
|
|
||||||
self.advance()
|
|
||||||
self.expect(TokenType.INT)
|
|
||||||
params.append(self.expect(TokenType.IDENTIFIER).value)
|
|
||||||
|
|
||||||
self.expect(TokenType.RPAREN)
|
|
||||||
body = self.parse_compound_stmt()
|
|
||||||
return FunctionDecl(name, params, body)
|
|
||||||
else:
|
|
||||||
# Variable declaration
|
|
||||||
init = None
|
|
||||||
if self.current().type == TokenType.ASSIGN:
|
|
||||||
self.advance()
|
|
||||||
init = self.parse_expression()
|
|
||||||
self.expect(TokenType.SEMICOLON)
|
|
||||||
return VarDecl(name, init)
|
|
||||||
|
|
||||||
def parse_compound_stmt(self) -> CompoundStmt:
|
|
||||||
self.expect(TokenType.LBRACE)
|
|
||||||
statements = []
|
|
||||||
|
|
||||||
while self.current().type != TokenType.RBRACE:
|
|
||||||
statements.append(self.parse_statement())
|
|
||||||
|
|
||||||
self.expect(TokenType.RBRACE)
|
|
||||||
return CompoundStmt(statements)
|
|
||||||
|
|
||||||
def parse_statement(self) -> Statement:
|
|
||||||
token = self.current()
|
|
||||||
|
|
||||||
if token.type == TokenType.LBRACE:
|
|
||||||
return self.parse_compound_stmt()
|
|
||||||
elif token.type == TokenType.IF:
|
|
||||||
return self.parse_if_stmt()
|
|
||||||
elif token.type == TokenType.WHILE:
|
|
||||||
return self.parse_while_stmt()
|
|
||||||
elif token.type == TokenType.RETURN:
|
|
||||||
return self.parse_return_stmt()
|
|
||||||
elif token.type == TokenType.INT:
|
|
||||||
# Local variable declaration
|
|
||||||
self.advance()
|
|
||||||
name = self.expect(TokenType.IDENTIFIER).value
|
|
||||||
init = None
|
|
||||||
if self.current().type == TokenType.ASSIGN:
|
|
||||||
self.advance()
|
|
||||||
init = self.parse_expression()
|
|
||||||
self.expect(TokenType.SEMICOLON)
|
|
||||||
return ExprStmt(AssignExpr(name, init) if init else None)
|
|
||||||
else:
|
|
||||||
expr = (
|
|
||||||
self.parse_expression()
|
|
||||||
if self.current().type != TokenType.SEMICOLON
|
|
||||||
else None
|
|
||||||
)
|
|
||||||
self.expect(TokenType.SEMICOLON)
|
|
||||||
return ExprStmt(expr)
|
|
||||||
|
|
||||||
def parse_if_stmt(self) -> IfStmt:
|
|
||||||
self.expect(TokenType.IF)
|
|
||||||
self.expect(TokenType.LPAREN)
|
|
||||||
condition = self.parse_expression()
|
|
||||||
self.expect(TokenType.RPAREN)
|
|
||||||
then_stmt = self.parse_statement()
|
|
||||||
|
|
||||||
else_stmt = None
|
|
||||||
if self.current().type == TokenType.ELSE:
|
|
||||||
self.advance()
|
|
||||||
else_stmt = self.parse_statement()
|
|
||||||
|
|
||||||
return IfStmt(condition, then_stmt, else_stmt)
|
|
||||||
|
|
||||||
def parse_while_stmt(self) -> WhileStmt:
|
|
||||||
self.expect(TokenType.WHILE)
|
|
||||||
self.expect(TokenType.LPAREN)
|
|
||||||
condition = self.parse_expression()
|
|
||||||
self.expect(TokenType.RPAREN)
|
|
||||||
body = self.parse_statement()
|
|
||||||
return WhileStmt(condition, body)
|
|
||||||
|
|
||||||
def parse_return_stmt(self) -> ReturnStmt:
|
|
||||||
self.expect(TokenType.RETURN)
|
|
||||||
expr = None
|
|
||||||
if self.current().type != TokenType.SEMICOLON:
|
|
||||||
expr = self.parse_expression()
|
|
||||||
self.expect(TokenType.SEMICOLON)
|
|
||||||
return ReturnStmt(expr)
|
|
||||||
|
|
||||||
def parse_expression(self) -> Expression:
|
|
||||||
return self.parse_assignment()
|
|
||||||
|
|
||||||
def parse_assignment(self) -> Expression:
|
|
||||||
expr = self.parse_comparison()
|
|
||||||
|
|
||||||
if self.current().type == TokenType.ASSIGN:
|
|
||||||
if not isinstance(expr, VarExpr):
|
|
||||||
self.error("Invalid assignment target")
|
|
||||||
self.advance()
|
|
||||||
value = self.parse_assignment()
|
|
||||||
return AssignExpr(expr.name, value)
|
|
||||||
|
|
||||||
return expr
|
|
||||||
|
|
||||||
def parse_comparison(self) -> Expression:
|
|
||||||
expr = self.parse_additive()
|
|
||||||
|
|
||||||
while self.current().type in [
|
|
||||||
TokenType.EQ,
|
|
||||||
TokenType.NE,
|
|
||||||
TokenType.LT,
|
|
||||||
TokenType.GT,
|
|
||||||
TokenType.LE,
|
|
||||||
TokenType.GE,
|
|
||||||
]:
|
|
||||||
op = self.advance().value
|
|
||||||
right = self.parse_additive()
|
|
||||||
expr = BinaryOp(op, expr, right)
|
|
||||||
|
|
||||||
return expr
|
|
||||||
|
|
||||||
def parse_additive(self) -> Expression:
|
|
||||||
expr = self.parse_multiplicative()
|
|
||||||
|
|
||||||
while self.current().type in [TokenType.PLUS, TokenType.MINUS]:
|
|
||||||
op = self.advance().value
|
|
||||||
right = self.parse_multiplicative()
|
|
||||||
expr = BinaryOp(op, expr, right)
|
|
||||||
|
|
||||||
return expr
|
|
||||||
|
|
||||||
def parse_multiplicative(self) -> Expression:
|
|
||||||
expr = self.parse_unary()
|
|
||||||
|
|
||||||
while self.current().type in [TokenType.STAR, TokenType.SLASH]:
|
|
||||||
op = self.advance().value
|
|
||||||
right = self.parse_unary()
|
|
||||||
expr = BinaryOp(op, expr, right)
|
|
||||||
|
|
||||||
return expr
|
|
||||||
|
|
||||||
def parse_unary(self) -> Expression:
|
|
||||||
if self.current().type in [TokenType.PLUS, TokenType.MINUS]:
|
|
||||||
op = self.advance().value
|
|
||||||
operand = self.parse_unary()
|
|
||||||
return UnaryOp(op, operand)
|
|
||||||
|
|
||||||
return self.parse_primary()
|
|
||||||
|
|
||||||
def parse_primary(self) -> Expression:
|
|
||||||
token = self.current()
|
|
||||||
|
|
||||||
if token.type == TokenType.NUMBER:
|
|
||||||
self.advance()
|
|
||||||
return NumberExpr(int(token.value))
|
|
||||||
|
|
||||||
elif token.type == TokenType.IDENTIFIER:
|
|
||||||
name = self.advance().value
|
|
||||||
|
|
||||||
if self.current().type == TokenType.LPAREN:
|
|
||||||
# Function call
|
|
||||||
self.advance()
|
|
||||||
args = []
|
|
||||||
|
|
||||||
if self.current().type != TokenType.RPAREN:
|
|
||||||
args.append(self.parse_expression())
|
|
||||||
while self.current().type == TokenType.COMMA:
|
|
||||||
self.advance()
|
|
||||||
args.append(self.parse_expression())
|
|
||||||
|
|
||||||
self.expect(TokenType.RPAREN)
|
|
||||||
return CallExpr(name, args)
|
|
||||||
else:
|
|
||||||
return VarExpr(name)
|
|
||||||
|
|
||||||
elif token.type == TokenType.LPAREN:
|
|
||||||
self.advance()
|
|
||||||
expr = self.parse_expression()
|
|
||||||
self.expect(TokenType.RPAREN)
|
|
||||||
return expr
|
|
||||||
|
|
||||||
else:
|
|
||||||
self.error(f"Unexpected token: {token.type.value}")
|
|
||||||
|
|
||||||
|
|
||||||
class CodeGenerator:
|
|
||||||
def __init__(self):
|
|
||||||
self.output = []
|
|
||||||
self.label_counter = 0
|
|
||||||
self.string_counter = 0
|
|
||||||
self.functions = {}
|
|
||||||
self.current_function = None
|
|
||||||
self.local_vars = {}
|
|
||||||
self.global_vars = {}
|
|
||||||
self.register_pool = [f"rg{i:x}" for i in range(16)]
|
|
||||||
self.used_registers = set()
|
|
||||||
|
|
||||||
def new_label(self, prefix: str = "L") -> str:
|
|
||||||
label = f"{prefix}{self.label_counter}"
|
|
||||||
self.label_counter += 1
|
|
||||||
return label
|
|
||||||
|
|
||||||
def allocate_register(self) -> str:
|
|
||||||
for reg in self.register_pool:
|
|
||||||
if reg not in self.used_registers:
|
|
||||||
self.used_registers.add(reg)
|
|
||||||
return reg
|
|
||||||
raise RuntimeError("Out of registers")
|
|
||||||
|
|
||||||
def free_register(self, reg: str):
|
|
||||||
self.used_registers.discard(reg)
|
|
||||||
|
|
||||||
def emit(self, code: str):
|
|
||||||
self.output.append(code)
|
|
||||||
|
|
||||||
def generate(self, program: Program) -> str:
|
|
||||||
# Emit data section
|
|
||||||
self.emit("// Global variables")
|
|
||||||
for decl in program.declarations:
|
|
||||||
if isinstance(decl, VarDecl):
|
|
||||||
self.global_vars[decl.name] = f"var_{decl.name}"
|
|
||||||
if decl.init:
|
|
||||||
if isinstance(decl.init, NumberExpr):
|
|
||||||
self.emit(f"dw var_{decl.name}: {decl.init.value}")
|
|
||||||
else:
|
|
||||||
self.emit(f"dw var_{decl.name}: 0")
|
|
||||||
else:
|
|
||||||
self.emit(f"dw var_{decl.name}: 0")
|
|
||||||
|
|
||||||
self.emit("")
|
|
||||||
self.emit("// Entry point")
|
|
||||||
self.emit("dw stack_bottom: 0x10000")
|
|
||||||
self.emit("")
|
|
||||||
self.emit("init:")
|
|
||||||
self.emit(" ldw stack_bottom, spr")
|
|
||||||
self.emit(" mov spr, bpr")
|
|
||||||
|
|
||||||
self.emit(" push zero")
|
|
||||||
self.emit(" call main")
|
|
||||||
self.emit(" pop rg0")
|
|
||||||
self.emit(" hlt")
|
|
||||||
self.emit("")
|
|
||||||
|
|
||||||
# Emit functions
|
|
||||||
for decl in program.declarations:
|
|
||||||
if isinstance(decl, FunctionDecl):
|
|
||||||
self.generate_function(decl)
|
|
||||||
|
|
||||||
return "\n".join(self.output)
|
|
||||||
|
|
||||||
def generate_function(self, func: FunctionDecl):
|
|
||||||
self.current_function = func.name
|
|
||||||
self.functions[func.name] = func
|
|
||||||
self.local_vars = {}
|
|
||||||
|
|
||||||
# Map parameters to stack offsets
|
|
||||||
# Parameters start at bpr+8 (after return addr at bpr+4)
|
|
||||||
for i, param in enumerate(func.params):
|
|
||||||
self.local_vars[param] = 8 + (i * 4)
|
|
||||||
|
|
||||||
self.emit(f"{func.name}:")
|
|
||||||
self.emit(" push bpr")
|
|
||||||
self.emit(" mov spr, bpr")
|
|
||||||
self.emit("")
|
|
||||||
|
|
||||||
# Generate function body
|
|
||||||
self.generate_compound_stmt(func.body)
|
|
||||||
|
|
||||||
# Default return if no explicit return
|
|
||||||
self.emit("// default return")
|
|
||||||
self.emit(f"{func.name}_end:")
|
|
||||||
self.emit(" mov bpr, spr")
|
|
||||||
self.emit(" pop bpr")
|
|
||||||
self.emit(" return")
|
|
||||||
self.emit("")
|
|
||||||
|
|
||||||
def generate_compound_stmt(self, stmt: CompoundStmt):
|
|
||||||
for s in stmt.statements:
|
|
||||||
self.generate_statement(s)
|
|
||||||
|
|
||||||
def generate_statement(self, stmt: Statement):
|
|
||||||
if isinstance(stmt, CompoundStmt):
|
|
||||||
self.generate_compound_stmt(stmt)
|
|
||||||
elif isinstance(stmt, ExprStmt):
|
|
||||||
if stmt.expr:
|
|
||||||
reg = self.generate_expression(stmt.expr)
|
|
||||||
self.free_register(reg)
|
|
||||||
elif isinstance(stmt, IfStmt):
|
|
||||||
self.generate_if_stmt(stmt)
|
|
||||||
elif isinstance(stmt, WhileStmt):
|
|
||||||
self.generate_while_stmt(stmt)
|
|
||||||
elif isinstance(stmt, ReturnStmt):
|
|
||||||
self.generate_return_stmt(stmt)
|
|
||||||
|
|
||||||
def generate_if_stmt(self, stmt: IfStmt):
|
|
||||||
else_label = self.new_label("else")
|
|
||||||
end_label = self.new_label("endif")
|
|
||||||
|
|
||||||
# Evaluate condition
|
|
||||||
cond_reg = self.generate_expression(stmt.condition)
|
|
||||||
self.emit(f" cmp {cond_reg}, zero")
|
|
||||||
self.free_register(cond_reg)
|
|
||||||
|
|
||||||
if stmt.else_stmt:
|
|
||||||
self.emit(f" jeq {else_label}")
|
|
||||||
else:
|
|
||||||
self.emit(f" jeq {end_label}")
|
|
||||||
|
|
||||||
# Then branch
|
|
||||||
self.generate_statement(stmt.then_stmt)
|
|
||||||
|
|
||||||
if stmt.else_stmt:
|
|
||||||
self.emit(f" jmp {end_label}")
|
|
||||||
self.emit(f"{else_label}:")
|
|
||||||
self.generate_statement(stmt.else_stmt)
|
|
||||||
|
|
||||||
self.emit(f"{end_label}:")
|
|
||||||
|
|
||||||
def generate_while_stmt(self, stmt: WhileStmt):
|
|
||||||
start_label = self.new_label("while_start")
|
|
||||||
end_label = self.new_label("while_end")
|
|
||||||
|
|
||||||
self.emit(f"{start_label}:")
|
|
||||||
|
|
||||||
# Evaluate condition
|
|
||||||
cond_reg = self.generate_expression(stmt.condition)
|
|
||||||
self.emit(f" cmp {cond_reg}, zero")
|
|
||||||
self.free_register(cond_reg)
|
|
||||||
self.emit(f" jeq {end_label}")
|
|
||||||
|
|
||||||
# Loop body
|
|
||||||
self.generate_statement(stmt.body)
|
|
||||||
self.emit(f" jmp {start_label}")
|
|
||||||
|
|
||||||
self.emit(f"{end_label}:")
|
|
||||||
|
|
||||||
def generate_return_stmt(self, stmt: ReturnStmt):
|
|
||||||
if stmt.expr:
|
|
||||||
reg = self.generate_expression(stmt.expr)
|
|
||||||
# Store return value at spr+8 according to calling convention
|
|
||||||
self.emit(f" stw {reg}, spr, 8")
|
|
||||||
self.free_register(reg)
|
|
||||||
self.emit(f" jmp {self.current_function}_end")
|
|
||||||
|
|
||||||
def generate_expression(self, expr: Expression) -> str:
|
|
||||||
if isinstance(expr, NumberExpr):
|
|
||||||
reg = self.allocate_register()
|
|
||||||
if expr.value <= 0xFFFF and expr.value >= 0:
|
|
||||||
self.emit(f" lli {expr.value}, {reg}")
|
|
||||||
if expr.value > 0xFF:
|
|
||||||
self.emit(f" lui {expr.value >> 16}, {reg}")
|
|
||||||
else:
|
|
||||||
self.emit(f" lli {expr.value & 0xFFFF}, {reg}")
|
|
||||||
self.emit(f" lui {(expr.value >> 16) & 0xFFFF}, {reg}")
|
|
||||||
return reg
|
|
||||||
|
|
||||||
elif isinstance(expr, VarExpr):
|
|
||||||
reg = self.allocate_register()
|
|
||||||
if expr.name in self.local_vars:
|
|
||||||
offset = self.local_vars[expr.name]
|
|
||||||
self.emit(f" ldw bpr, {reg}, {offset}")
|
|
||||||
elif expr.name in self.global_vars:
|
|
||||||
label = self.global_vars[expr.name]
|
|
||||||
self.emit(f" ldw {label}, {reg}")
|
|
||||||
else:
|
|
||||||
raise RuntimeError(f"Undefined variable: {expr.name}")
|
|
||||||
return reg
|
|
||||||
|
|
||||||
elif isinstance(expr, AssignExpr):
|
|
||||||
value_reg = self.generate_expression(expr.value)
|
|
||||||
|
|
||||||
if expr.name in self.local_vars:
|
|
||||||
offset = self.local_vars[expr.name]
|
|
||||||
self.emit(f" stw {value_reg}, bpr, {offset}")
|
|
||||||
elif expr.name in self.global_vars:
|
|
||||||
label = self.global_vars[expr.name]
|
|
||||||
self.emit(f" stw {value_reg}, {label}")
|
|
||||||
else:
|
|
||||||
# New local variable - allocate after params and return value space
|
|
||||||
# Start local variables at offset -4 from bpr (growing downward)
|
|
||||||
offset = -(len([v for v in self.local_vars.values() if v < 0]) + 1) * 4
|
|
||||||
self.local_vars[expr.name] = offset
|
|
||||||
self.emit(f" stw {value_reg}, bpr, {offset}")
|
|
||||||
|
|
||||||
return value_reg
|
|
||||||
|
|
||||||
elif isinstance(expr, BinaryOp):
|
|
||||||
return self.generate_binary_op(expr)
|
|
||||||
|
|
||||||
elif isinstance(expr, UnaryOp):
|
|
||||||
operand_reg = self.generate_expression(expr.operand)
|
|
||||||
result_reg = self.allocate_register()
|
|
||||||
|
|
||||||
if expr.op == "-":
|
|
||||||
self.emit(f" lwi 0, {result_reg}")
|
|
||||||
self.emit(f" sub {result_reg}, {operand_reg}, {result_reg}")
|
|
||||||
else: # +
|
|
||||||
self.emit(f" mov {operand_reg}, {result_reg}")
|
|
||||||
|
|
||||||
self.free_register(operand_reg)
|
|
||||||
return result_reg
|
|
||||||
|
|
||||||
elif isinstance(expr, CallExpr):
|
|
||||||
# First, make space for return value (must be pushed BEFORE arguments)
|
|
||||||
temp_reg = self.allocate_register()
|
|
||||||
|
|
||||||
# Then push arguments in reverse order
|
|
||||||
arg_regs = []
|
|
||||||
for arg in reversed(expr.args):
|
|
||||||
reg = self.generate_expression(arg)
|
|
||||||
self.emit(f" push {reg}")
|
|
||||||
arg_regs.append(reg)
|
|
||||||
|
|
||||||
# Call function
|
|
||||||
self.emit(f" call {expr.name}")
|
|
||||||
|
|
||||||
# Get return value (it's now on top of stack)
|
|
||||||
self.emit(f" pop {temp_reg}")
|
|
||||||
|
|
||||||
# Clean up remaining args
|
|
||||||
for i in range(len(arg_regs) - 1):
|
|
||||||
self.emit(f" pop zero")
|
|
||||||
|
|
||||||
# Free the arg registers
|
|
||||||
for reg in arg_regs:
|
|
||||||
self.free_register(reg)
|
|
||||||
|
|
||||||
return temp_reg
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise RuntimeError(f"Unknown expression type: {type(expr)}")
|
|
||||||
|
|
||||||
def generate_binary_op(self, expr: BinaryOp) -> str:
|
|
||||||
# For operations that might contain function calls, we need to be careful
|
|
||||||
# about register allocation. Evaluate left, save it, evaluate right.
|
|
||||||
left_reg = self.generate_expression(expr.left)
|
|
||||||
|
|
||||||
# If right side contains a function call, we need to save left_reg
|
|
||||||
# For now, always save to be safe
|
|
||||||
saved_reg = self.allocate_register()
|
|
||||||
self.emit(f" mov {left_reg}, {saved_reg}")
|
|
||||||
self.free_register(left_reg)
|
|
||||||
|
|
||||||
right_reg = self.generate_expression(expr.right)
|
|
||||||
result_reg = self.allocate_register()
|
|
||||||
|
|
||||||
if expr.op == "+":
|
|
||||||
self.emit(f" add {left_reg}, {right_reg}, {result_reg}")
|
|
||||||
elif expr.op == "-":
|
|
||||||
self.emit(f" sub {left_reg}, {right_reg}, {result_reg}")
|
|
||||||
elif expr.op == "*":
|
|
||||||
# Simple multiplication using loop
|
|
||||||
temp_label = self.new_label("mult")
|
|
||||||
end_label = self.new_label("mult_end")
|
|
||||||
self.emit(f" lli 0, {result_reg}")
|
|
||||||
self.emit(f"{temp_label}:")
|
|
||||||
self.emit(f" cmp {right_reg}, zero")
|
|
||||||
self.emit(f" jeq {end_label}")
|
|
||||||
self.emit(f" add {result_reg}, {left_reg}, {result_reg}")
|
|
||||||
self.emit(f" dec {right_reg}")
|
|
||||||
self.emit(f" jmp {temp_label}")
|
|
||||||
self.emit(f"{end_label}:")
|
|
||||||
elif expr.op == "/":
|
|
||||||
# Simple division using loop
|
|
||||||
temp_label = self.new_label("div")
|
|
||||||
end_label = self.new_label("div_end")
|
|
||||||
self.emit(f" lli 0, {result_reg}")
|
|
||||||
self.emit(f"{temp_label}:")
|
|
||||||
self.emit(f" cmp {left_reg}, {right_reg}")
|
|
||||||
self.emit(f" jlt {end_label}")
|
|
||||||
self.emit(f" sub {left_reg}, {right_reg}, {left_reg}")
|
|
||||||
self.emit(f" inc {result_reg}")
|
|
||||||
self.emit(f" jmp {temp_label}")
|
|
||||||
self.emit(f"{end_label}:")
|
|
||||||
elif expr.op in ["==", "!=", "<", ">", "<=", ">="]:
|
|
||||||
self.emit(f" cmp {left_reg}, {right_reg}")
|
|
||||||
|
|
||||||
# Result is 1 if condition true, 0 otherwise
|
|
||||||
self.emit(f" lli 0, {result_reg}")
|
|
||||||
true_label = self.new_label("cmp_true")
|
|
||||||
end_label = self.new_label("cmp_end")
|
|
||||||
|
|
||||||
if expr.op == "==":
|
|
||||||
self.emit(f" jeq {true_label}")
|
|
||||||
elif expr.op == "!=":
|
|
||||||
self.emit(f" jne {true_label}")
|
|
||||||
elif expr.op == "<":
|
|
||||||
self.emit(f" jlt {true_label}")
|
|
||||||
elif expr.op == ">":
|
|
||||||
self.emit(f" jgt {true_label}")
|
|
||||||
elif expr.op == "<=":
|
|
||||||
self.emit(f" jle {true_label}")
|
|
||||||
elif expr.op == ">=":
|
|
||||||
self.emit(f" jge {true_label}")
|
|
||||||
|
|
||||||
self.emit(f" jmp {end_label}")
|
|
||||||
self.emit(f"{true_label}:")
|
|
||||||
self.emit(f" lli 1, {result_reg}")
|
|
||||||
self.emit(f"{end_label}:")
|
|
||||||
|
|
||||||
self.free_register(left_reg)
|
|
||||||
self.free_register(right_reg)
|
|
||||||
return result_reg
|
|
||||||
|
|
||||||
|
|
||||||
def compile_c_to_asm(source: str) -> str:
|
|
||||||
"""Compile C source code to DSA assembly."""
|
|
||||||
lexer = Lexer(source)
|
|
||||||
tokens = lexer.tokenize()
|
|
||||||
|
|
||||||
parser = Parser(tokens)
|
|
||||||
ast = parser.parse()
|
|
||||||
|
|
||||||
codegen = CodeGenerator()
|
|
||||||
assembly = codegen.generate(ast)
|
|
||||||
|
|
||||||
return assembly
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
if len(sys.argv) < 2:
|
|
||||||
print("Usage: python compiler.py <input.c> [output.dsa]")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
input_file = sys.argv[1]
|
|
||||||
output_file = sys.argv[2] if len(sys.argv) > 2 else input_file.replace(".c", ".dsa")
|
|
||||||
|
|
||||||
with open(input_file, "r") as f:
|
|
||||||
source = f.read()
|
|
||||||
|
|
||||||
try:
|
|
||||||
assembly = compile_c_to_asm(source)
|
|
||||||
|
|
||||||
with open(output_file, "w") as f:
|
|
||||||
f.write(assembly)
|
|
||||||
|
|
||||||
print(f"Successfully compiled {input_file} to {output_file}")
|
|
||||||
except (SyntaxError, RuntimeError) as e:
|
|
||||||
print(f"Compilation error: {e}")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
# # Example usage
|
|
||||||
# if len(sys.argv) > 1:
|
|
||||||
# example_c = sys.argv[1]
|
|
||||||
|
|
||||||
# else:
|
|
||||||
# example_c = """
|
|
||||||
# int factorial(int n) {
|
|
||||||
# if (n <= 1) {
|
|
||||||
# return 1;
|
|
||||||
# }
|
|
||||||
# return n * factorial(n - 1);
|
|
||||||
# }
|
|
||||||
|
|
||||||
# int main() {
|
|
||||||
# int result;
|
|
||||||
# result = factorial(5);
|
|
||||||
# return result;
|
|
||||||
# }
|
|
||||||
# """
|
|
||||||
|
|
||||||
# print("Example C program:")
|
|
||||||
# print(example_c)
|
|
||||||
# print("\n" + "="*60 + "\n")
|
|
||||||
# print("Generated DSA assembly:")
|
|
||||||
# print(compile_c_to_asm(example_c))
|
|
||||||
@@ -1,25 +0,0 @@
|
|||||||
include print: "lib/io/print.dsa"
|
|
||||||
|
|
||||||
int factorial(int n) {
|
|
||||||
if (n <= 1) {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
return n * factorial(n - 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
int add_(int a, int b) {
|
|
||||||
return a + b;
|
|
||||||
}
|
|
||||||
|
|
||||||
int greater(int a, int b) {
|
|
||||||
if (a + a > b + b) {
|
|
||||||
return a;
|
|
||||||
} else {
|
|
||||||
return b + a;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int main() {
|
|
||||||
printnum(-5);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
// Imports
|
|
||||||
include maths: "./lib/maths/core.dsa"
|
|
||||||
|
|
||||||
// Reserved Memory
|
|
||||||
|
|
||||||
@@ -1,106 +0,0 @@
|
|||||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
|
||||||
#[non_exhaustive]
|
|
||||||
pub enum Register {
|
|
||||||
// general purpose registers
|
|
||||||
Rg0,
|
|
||||||
Rg1,
|
|
||||||
Rg2,
|
|
||||||
Rg3,
|
|
||||||
Rg4,
|
|
||||||
Rg5,
|
|
||||||
Rg6,
|
|
||||||
Rg7,
|
|
||||||
Rg8,
|
|
||||||
Rg9,
|
|
||||||
Rga,
|
|
||||||
Rgb,
|
|
||||||
Rgc,
|
|
||||||
Rgd,
|
|
||||||
Rge,
|
|
||||||
Rgf,
|
|
||||||
|
|
||||||
// special purpose registers
|
|
||||||
Acc,
|
|
||||||
Spr,
|
|
||||||
Bpr,
|
|
||||||
Ret,
|
|
||||||
Idr,
|
|
||||||
Mmr,
|
|
||||||
Zero,
|
|
||||||
NoReg,
|
|
||||||
|
|
||||||
// system registers - can't be written to by instructions.
|
|
||||||
Mar,
|
|
||||||
Mdr,
|
|
||||||
Sts,
|
|
||||||
Cir,
|
|
||||||
Pcx,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
||||||
#[repr(u8)]
|
|
||||||
#[non_exhaustive]
|
|
||||||
/// A list of all current instructions in the DSA Assembly language.
|
|
||||||
pub enum Instruction {
|
|
||||||
// No-op
|
|
||||||
Nop = 0x0,
|
|
||||||
|
|
||||||
// Data transfer instructions
|
|
||||||
Mov(Register, Register) = 0x1,
|
|
||||||
Movs(Register, Register) = 0x2,
|
|
||||||
|
|
||||||
Ldb(Register, Register, Option<u32>) = 0x3,
|
|
||||||
Ldbs(Register, Register, Option<u32>) = 0x4,
|
|
||||||
Ldh(Register, Register, Option<u32>) = 0x5,
|
|
||||||
Ldhs(Register, Register, Option<u32>) = 0x6,
|
|
||||||
Ldw(Register, Register, Option<u32>) = 0x7,
|
|
||||||
|
|
||||||
Stb(Register, Register, Option<u32>) = 0x8,
|
|
||||||
Sth(Register, Register, Option<u32>) = 0x9,
|
|
||||||
Stw(Register, Register, Option<u32>) = 0xA,
|
|
||||||
|
|
||||||
Lli(u16, Register) = 0xB,
|
|
||||||
Lui(u16, Register) = 0xC,
|
|
||||||
|
|
||||||
// Jump Instructions
|
|
||||||
Jump(u16, Register) = 0xD,
|
|
||||||
JumpEq(u16, Register) = 0xE,
|
|
||||||
JumpNeq(u16, Register) = 0xF,
|
|
||||||
JumpGt(u16, Register) = 0x10,
|
|
||||||
JumpGe(u16, Register) = 0x11,
|
|
||||||
JumpLt(u16, Register) = 0x12,
|
|
||||||
JumpLe(u16, Register) = 0x13,
|
|
||||||
|
|
||||||
// Comparison
|
|
||||||
Compare(Register, Register) = 0x14,
|
|
||||||
|
|
||||||
// // Arithmetic
|
|
||||||
// Add(args::RTypeArgs) = 0x19,
|
|
||||||
// Sub(args::RTypeArgs) = 0x1A,
|
|
||||||
// Increment(args::RTypeArgs) = 0x15,
|
|
||||||
// Decrement(args::RTypeArgs) = 0x16,
|
|
||||||
// ShiftLeft(args::RTypeArgs) = 0x17,
|
|
||||||
// ShiftRight(args::RTypeArgs) = 0x18,
|
|
||||||
|
|
||||||
// // Logical
|
|
||||||
// And(args::RTypeArgs) = 0x1B,
|
|
||||||
// Or(args::RTypeArgs) = 0x1C,
|
|
||||||
// Not(args::RTypeArgs) = 0x1D,
|
|
||||||
// Xor(args::RTypeArgs) = 0x1E,
|
|
||||||
// Nand(args::RTypeArgs) = 0x1F,
|
|
||||||
// Nor(args::RTypeArgs) = 0x20,
|
|
||||||
// Xnor(args::RTypeArgs) = 0x21,
|
|
||||||
|
|
||||||
// // Misc
|
|
||||||
// Interrupt(Interrupt) = 0x22,
|
|
||||||
// IntReturn = 0x23,
|
|
||||||
// Halt = 0x24,
|
|
||||||
|
|
||||||
// // Immediate Arithmetic
|
|
||||||
// AddImmediate(args::ITypeArgs) = 0x25,
|
|
||||||
// SubImmediate(args::ITypeArgs) = 0x26,
|
|
||||||
|
|
||||||
// Fake Instructions
|
|
||||||
Data(u32) = 0x3E,
|
|
||||||
Segment(u32) = 0x3F,
|
|
||||||
}
|
|
||||||
@@ -1,599 +0,0 @@
|
|||||||
use std::collections::HashMap;
|
|
||||||
use std::hash::Hash;
|
|
||||||
use std::sync::LazyLock;
|
|
||||||
use std::sync::atomic::AtomicU32;
|
|
||||||
use std::time::SystemTime;
|
|
||||||
|
|
||||||
use chrono::{DateTime, Local};
|
|
||||||
|
|
||||||
use crate::registers::RegisterAllocator;
|
|
||||||
use crate::{block, cmd, comment, dsa};
|
|
||||||
|
|
||||||
use crate::parser::{
|
|
||||||
BinaryOperator, ConstExpr, Declaration, Expression, Parameter, Program, Statement,
|
|
||||||
UnaryOperator,
|
|
||||||
};
|
|
||||||
|
|
||||||
pub struct CodeGenerator {
|
|
||||||
ast: Program,
|
|
||||||
imports: HashMap<String, String>,
|
|
||||||
globals: Vec<String>,
|
|
||||||
functions: Vec<String>,
|
|
||||||
symbols: Vec<String>,
|
|
||||||
allocator: RegisterAllocator,
|
|
||||||
}
|
|
||||||
|
|
||||||
static GLOBAL_METHODS: LazyLock<HashMap<&str, &str>> = LazyLock::new(|| {
|
|
||||||
HashMap::from([("print", "print::print"), ("printnum", "print::print_num")])
|
|
||||||
});
|
|
||||||
|
|
||||||
fn import(name: &str, path: &str) -> String {
|
|
||||||
format!("include {name}: \"{}\"", path)
|
|
||||||
}
|
|
||||||
|
|
||||||
impl CodeGenerator {
|
|
||||||
const RET: &'static str = "\tjmp _ret";
|
|
||||||
|
|
||||||
pub fn new(ast: Program) -> Self {
|
|
||||||
CodeGenerator {
|
|
||||||
ast,
|
|
||||||
imports: HashMap::new(),
|
|
||||||
globals: Vec::new(),
|
|
||||||
functions: Vec::new(),
|
|
||||||
symbols: Vec::new(),
|
|
||||||
allocator: RegisterAllocator::new(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn include(&mut self, name: &str, path: &str) {
|
|
||||||
self.imports.insert(name.to_string(), path.to_string());
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn generate(&mut self) -> Result<String, String> {
|
|
||||||
// always include the print library for debugging!
|
|
||||||
self.include("print", "./lib/io/print.dsa");
|
|
||||||
|
|
||||||
for block in self.ast.clone().declarations {
|
|
||||||
match block {
|
|
||||||
Declaration::Variable { name, .. } => self.symbols.push(name),
|
|
||||||
Declaration::Function { name, .. } => self.symbols.push(name),
|
|
||||||
Declaration::Import { name, .. } => self.symbols.push(name),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for block in self.ast.clone().declarations {
|
|
||||||
self.generate_block(block.clone())?;
|
|
||||||
}
|
|
||||||
|
|
||||||
self.generate_layout()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn generate_layout(&mut self) -> Result<String, String> {
|
|
||||||
let datetime: DateTime<Local> = SystemTime::now().into();
|
|
||||||
Ok(dsa![
|
|
||||||
"",
|
|
||||||
comment!("GENERATED BY DSA-C COMPILER"),
|
|
||||||
comment!(format!(
|
|
||||||
"Generated at {}",
|
|
||||||
datetime.format("%Y-%m-%d %H:%M:%S")
|
|
||||||
)),
|
|
||||||
"",
|
|
||||||
// imports
|
|
||||||
comment!("Imports"),
|
|
||||||
self.imports
|
|
||||||
.iter()
|
|
||||||
.map(|(k, v)| import(k, v))
|
|
||||||
.collect::<Vec<String>>()
|
|
||||||
.join("\n"),
|
|
||||||
"",
|
|
||||||
// reserved memory
|
|
||||||
comment!("Globals & Reserved Memory"),
|
|
||||||
self.globals.join("\n"),
|
|
||||||
"",
|
|
||||||
// entry point
|
|
||||||
comment!("Entry Point"),
|
|
||||||
"dw stack: 0x10000",
|
|
||||||
"db message: \"Process Exited with code:\"",
|
|
||||||
block! [ "_init"
|
|
||||||
dsa![ldw stack, bpr],
|
|
||||||
dsa![mov bpr, spr],
|
|
||||||
dsa![push zero],
|
|
||||||
dsa![call main],
|
|
||||||
dsa![call print::print_newline],
|
|
||||||
dsa![lwi message, rg0],
|
|
||||||
dsa![push rg0],
|
|
||||||
dsa![call print::print],
|
|
||||||
dsa![pop zero],
|
|
||||||
dsa![call print::print_hex_word],
|
|
||||||
dsa![pop zero],
|
|
||||||
dsa![hlt]
|
|
||||||
],
|
|
||||||
"",
|
|
||||||
comment!("Function return boilerplate"),
|
|
||||||
block! [ "_ret"
|
|
||||||
dsa![mov bpr, spr],
|
|
||||||
dsa![pop bpr],
|
|
||||||
dsa![return]
|
|
||||||
],
|
|
||||||
// block! [ "main"
|
|
||||||
// dsa![push bpr],
|
|
||||||
// dsa![mov spr, bpr],
|
|
||||||
// dsa![lwi 67, rg1],
|
|
||||||
// dsa![stw rg1, spr, 8],
|
|
||||||
// dsa![mov bpr, spr],
|
|
||||||
// dsa![pop bpr],
|
|
||||||
// dsa![return]
|
|
||||||
// ],
|
|
||||||
"",
|
|
||||||
self.functions.join("\n"),
|
|
||||||
])
|
|
||||||
}
|
|
||||||
|
|
||||||
fn generate_global(&mut self, name: &str, init: Option<ConstExpr>) {
|
|
||||||
self.globals.push(format!(
|
|
||||||
"dw {}: {}",
|
|
||||||
name,
|
|
||||||
init.unwrap_or(ConstExpr::Number(0))
|
|
||||||
))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn generate_block(&mut self, block: Declaration) -> Result<(), String> {
|
|
||||||
match block {
|
|
||||||
Declaration::Variable { name, init } => self.generate_global(&name, init),
|
|
||||||
Declaration::Function {
|
|
||||||
name,
|
|
||||||
return_type,
|
|
||||||
params,
|
|
||||||
body,
|
|
||||||
} => {
|
|
||||||
let func = self.generate_function(&name, ¶ms, &body).join("\n");
|
|
||||||
|
|
||||||
self.functions.push(format!("{func}\n"));
|
|
||||||
}
|
|
||||||
Declaration::Import { name, path } => {
|
|
||||||
self.imports.insert(name, path);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
// Example: Generate code for a function
|
|
||||||
fn generate_function(
|
|
||||||
&mut self,
|
|
||||||
name: &str,
|
|
||||||
params: &[Parameter],
|
|
||||||
body: &[Statement],
|
|
||||||
) -> Vec<String> {
|
|
||||||
let mut code = Vec::new();
|
|
||||||
|
|
||||||
// Reset allocator for new function
|
|
||||||
self.allocator.reset();
|
|
||||||
|
|
||||||
// Function prologue
|
|
||||||
code.push(format!("{}:", name));
|
|
||||||
code.push("\tpush bpr".to_string());
|
|
||||||
code.push("\tmov spr, bpr".to_string());
|
|
||||||
code.push(String::new());
|
|
||||||
|
|
||||||
// Allocate parameters to registers or stack locations
|
|
||||||
for (i, param) in params.iter().enumerate() {
|
|
||||||
let offset = 8 + (i as i32 * 4); // Parameters start at bpr+8
|
|
||||||
// Track that this parameter is at a stack location
|
|
||||||
let (reg, load_code) = self.allocator.alloc_var(¶m.name).unwrap();
|
|
||||||
code.extend(load_code);
|
|
||||||
code.push(format!("\tldw bpr, {}, {}", reg, offset));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Generate code for function body
|
|
||||||
for stmt in body {
|
|
||||||
let stmt_code = self.generate_statement(stmt).unwrap();
|
|
||||||
code.extend(stmt_code);
|
|
||||||
}
|
|
||||||
|
|
||||||
// automatically return at function end
|
|
||||||
if let Some(x) = code.last()
|
|
||||||
&& x == Self::RET
|
|
||||||
{
|
|
||||||
} else {
|
|
||||||
code.push(Self::RET.to_string());
|
|
||||||
}
|
|
||||||
|
|
||||||
code
|
|
||||||
}
|
|
||||||
|
|
||||||
// Example: Generate code for a statement
|
|
||||||
fn generate_statement(&mut self, stmt: &Statement) -> Result<Vec<String>, String> {
|
|
||||||
let mut code = Vec::new();
|
|
||||||
|
|
||||||
match stmt {
|
|
||||||
Statement::Assign {
|
|
||||||
name,
|
|
||||||
declare_type,
|
|
||||||
value,
|
|
||||||
} => {
|
|
||||||
if let Some(expr) = value {
|
|
||||||
// Evaluate expression
|
|
||||||
let (result_reg, expr_code) = self.generate_expression(expr)?;
|
|
||||||
code.extend(expr_code);
|
|
||||||
|
|
||||||
// Store result in variable
|
|
||||||
let store_code = self.allocator.store_var(name, &result_reg);
|
|
||||||
code.extend(store_code);
|
|
||||||
|
|
||||||
// Free temporary register
|
|
||||||
self.allocator.free_temp(&result_reg);
|
|
||||||
} else {
|
|
||||||
// Just declaring variable without initialization
|
|
||||||
self.allocator.alloc_var(name)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Statement::Return { expr } => {
|
|
||||||
if let Some(e) = expr {
|
|
||||||
let (result_reg, expr_code) = self.generate_expression(e)?;
|
|
||||||
code.extend(expr_code);
|
|
||||||
code.push(format!("\tstw {}, bpr, 8", result_reg));
|
|
||||||
code.push(format!("\tjmp _ret"));
|
|
||||||
self.allocator.free_temp(&result_reg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Statement::If {
|
|
||||||
condition,
|
|
||||||
then_stmt,
|
|
||||||
else_stmt,
|
|
||||||
} => {
|
|
||||||
// Generate condition
|
|
||||||
let (cond_reg, cond_code) = self.generate_expression(condition)?;
|
|
||||||
code.extend(cond_code);
|
|
||||||
|
|
||||||
// Compare with zero
|
|
||||||
code.push(format!("\tcmp {}, zero", cond_reg));
|
|
||||||
self.allocator.free_temp(&cond_reg);
|
|
||||||
|
|
||||||
// Generate unique labels
|
|
||||||
let then_label = format!("_then_{}", self.get_unique_label());
|
|
||||||
let else_label = format!("_else_{}", self.get_unique_label());
|
|
||||||
let end_label = format!("_end_{}", self.get_unique_label());
|
|
||||||
|
|
||||||
// Jump to else if condition is false (equal to zero)
|
|
||||||
code.push(format!("\tjeq {}", else_label));
|
|
||||||
|
|
||||||
// Then block
|
|
||||||
code.push(format!("{}:", then_label));
|
|
||||||
for s in then_stmt {
|
|
||||||
code.extend(self.generate_statement(s)?);
|
|
||||||
}
|
|
||||||
|
|
||||||
if then_stmt.len() == 0 {
|
|
||||||
code.push("\tnop".to_string());
|
|
||||||
}
|
|
||||||
|
|
||||||
code.push(format!("\tjmp {}", end_label));
|
|
||||||
|
|
||||||
// Else block
|
|
||||||
code.push(format!("{}:", else_label));
|
|
||||||
for s in else_stmt {
|
|
||||||
code.extend(self.generate_statement(s)?);
|
|
||||||
}
|
|
||||||
|
|
||||||
if else_stmt.len() == 0 {
|
|
||||||
code.push("\tnop".to_string());
|
|
||||||
}
|
|
||||||
|
|
||||||
code.push(format!("{}:", end_label));
|
|
||||||
}
|
|
||||||
|
|
||||||
Statement::While { condition, body } => {
|
|
||||||
let loop_start = format!("_while_start_{}", self.get_unique_label());
|
|
||||||
let loop_end = format!("_while_end_{}", self.get_unique_label());
|
|
||||||
|
|
||||||
code.push(format!("{}:", loop_start));
|
|
||||||
|
|
||||||
// Generate condition
|
|
||||||
let (cond_reg, cond_code) = self.generate_expression(condition)?;
|
|
||||||
code.extend(cond_code);
|
|
||||||
|
|
||||||
code.push(format!("\tcmp {}, zero", cond_reg));
|
|
||||||
self.allocator.free_temp(&cond_reg);
|
|
||||||
|
|
||||||
code.push(format!("\tjeq {}", loop_end));
|
|
||||||
|
|
||||||
// Loop body
|
|
||||||
for s in body {
|
|
||||||
code.extend(self.generate_statement(s)?);
|
|
||||||
}
|
|
||||||
|
|
||||||
code.push(format!("\tjmp {}", loop_start));
|
|
||||||
code.push(format!("{}:", loop_end));
|
|
||||||
}
|
|
||||||
|
|
||||||
Statement::Expression { expr } => {
|
|
||||||
let (result_reg, expr_code) = self.generate_expression(expr)?;
|
|
||||||
code.extend(expr_code);
|
|
||||||
self.allocator.free_temp(&result_reg);
|
|
||||||
}
|
|
||||||
|
|
||||||
Statement::Block(statements) => {
|
|
||||||
for s in statements {
|
|
||||||
code.extend(self.generate_statement(s)?);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(code)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Example: Generate code for an expression
|
|
||||||
// Returns (register containing result, assembly code)
|
|
||||||
fn generate_expression(
|
|
||||||
&mut self,
|
|
||||||
expr: &Expression,
|
|
||||||
) -> Result<(String, Vec<String>), String> {
|
|
||||||
let mut code = Vec::new();
|
|
||||||
|
|
||||||
match expr {
|
|
||||||
Expression::Number { value } => {
|
|
||||||
let (reg, alloc_code) = self.allocator.alloc_temp()?;
|
|
||||||
code.extend(alloc_code);
|
|
||||||
|
|
||||||
// Load immediate value
|
|
||||||
code.push(format!("\tlli {}, {}", value & 0xFFFF, reg));
|
|
||||||
if *value > 0xFFFF || *value < 0 {
|
|
||||||
code.push(format!("\tlui {}, {}", (value >> 16) & 0xFFFF, reg));
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok((reg, code))
|
|
||||||
}
|
|
||||||
|
|
||||||
Expression::Variable { name, .. } => {
|
|
||||||
let (reg, load_code) = self.allocator.load_var(name)?;
|
|
||||||
code.extend(load_code);
|
|
||||||
Ok((reg, code))
|
|
||||||
}
|
|
||||||
|
|
||||||
Expression::Binary { op, left, right } => {
|
|
||||||
// Evaluate left operand
|
|
||||||
let (left_reg, left_code) = self.generate_expression(left)?;
|
|
||||||
code.extend(left_code);
|
|
||||||
|
|
||||||
// Evaluate right operand
|
|
||||||
let (right_reg, right_code) = self.generate_expression(right)?;
|
|
||||||
code.extend(right_code);
|
|
||||||
|
|
||||||
// Allocate result register
|
|
||||||
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
|
|
||||||
code.extend(result_alloc);
|
|
||||||
|
|
||||||
// Generate operation
|
|
||||||
match op {
|
|
||||||
BinaryOperator::Add => {
|
|
||||||
code.push(format!(
|
|
||||||
"\tadd {}, {}, {}",
|
|
||||||
left_reg, right_reg, result_reg
|
|
||||||
));
|
|
||||||
}
|
|
||||||
BinaryOperator::Sub => {
|
|
||||||
code.push(format!(
|
|
||||||
"\tsub {}, {}, {}",
|
|
||||||
left_reg, right_reg, result_reg
|
|
||||||
));
|
|
||||||
}
|
|
||||||
BinaryOperator::Mul => {
|
|
||||||
self.include("maths", "./lib/maths/core.dsa");
|
|
||||||
// Call multiply function
|
|
||||||
code.push(format!("\tpush {}", right_reg));
|
|
||||||
code.push(format!("\tpush {}", left_reg));
|
|
||||||
code.push("\tcall maths::multiply".to_string());
|
|
||||||
code.push(format!("\tpop {}", result_reg));
|
|
||||||
code.push("\tpop zero".to_string());
|
|
||||||
}
|
|
||||||
// Comparison operators - return 1 (true) or 0 (false)
|
|
||||||
BinaryOperator::Eq => {
|
|
||||||
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
|
|
||||||
code.push(format!("\tlli 0, {}", result_reg));
|
|
||||||
let end_label = format!("_cmp_end_{}", self.get_unique_label());
|
|
||||||
code.push(format!("\tjne {}", end_label)); // If not equal, skip setting to 1
|
|
||||||
code.push(format!("\tlli 1, {}", result_reg));
|
|
||||||
code.push(format!("{}:", end_label));
|
|
||||||
}
|
|
||||||
BinaryOperator::Ne => {
|
|
||||||
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
|
|
||||||
code.push(format!("\tlli 0, {}", result_reg));
|
|
||||||
let end_label = format!("_cmp_end_{}", self.get_unique_label());
|
|
||||||
code.push(format!("\tjeq {}", end_label)); // If equal, skip setting to 1
|
|
||||||
code.push(format!("\tlli 1, {}", result_reg));
|
|
||||||
code.push(format!("{}:", end_label));
|
|
||||||
}
|
|
||||||
BinaryOperator::Lt => {
|
|
||||||
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
|
|
||||||
code.push(format!("\tlli 0, {}", result_reg));
|
|
||||||
let end_label = format!("_cmp_end_{}", self.get_unique_label());
|
|
||||||
code.push(format!("\tjge {}", end_label)); // If greater or equal, skip setting to 1
|
|
||||||
code.push(format!("\tlli 1, {}", result_reg));
|
|
||||||
code.push(format!("{}:", end_label));
|
|
||||||
}
|
|
||||||
BinaryOperator::Le => {
|
|
||||||
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
|
|
||||||
code.push(format!("\tlli 0, {}", result_reg));
|
|
||||||
let end_label = format!("_cmp_end_{}", self.get_unique_label());
|
|
||||||
code.push(format!("\tjgt {}", end_label)); // If greater than, skip setting to 1
|
|
||||||
code.push(format!("\tlli 1, {}", result_reg));
|
|
||||||
code.push(format!("{}:", end_label));
|
|
||||||
}
|
|
||||||
BinaryOperator::Gt => {
|
|
||||||
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
|
|
||||||
code.push(format!("\tlli 0, {}", result_reg));
|
|
||||||
let end_label = format!("_cmp_end_{}", self.get_unique_label());
|
|
||||||
code.push(format!("\tjle {}", end_label)); // If less or equal, skip setting to 1
|
|
||||||
code.push(format!("\tlli 1, {}", result_reg));
|
|
||||||
code.push(format!("{}:", end_label));
|
|
||||||
}
|
|
||||||
BinaryOperator::Ge => {
|
|
||||||
code.push(format!("\tcmp {}, {}", left_reg, right_reg));
|
|
||||||
code.push(format!("\tlli 0, {}", result_reg));
|
|
||||||
let end_label = format!("_cmp_end_{}", self.get_unique_label());
|
|
||||||
code.push(format!("\tjlt {}", end_label)); // If less than, skip setting to 1
|
|
||||||
code.push(format!("\tlli 1, {}", result_reg));
|
|
||||||
code.push(format!("{}:", end_label));
|
|
||||||
}
|
|
||||||
_ => return Err(format!("Unsupported binary operator: {:?}", op)),
|
|
||||||
}
|
|
||||||
|
|
||||||
// Free operand registers (allocator will protect variables)
|
|
||||||
self.allocator.free_temp(&left_reg);
|
|
||||||
self.allocator.free_temp(&right_reg);
|
|
||||||
|
|
||||||
Ok((result_reg, code))
|
|
||||||
}
|
|
||||||
|
|
||||||
Expression::Call { name, args } => {
|
|
||||||
// Save caller-saved registers and track which ones we saved
|
|
||||||
let saved_regs = self.allocator.get_caller_saved_registers();
|
|
||||||
for reg in &saved_regs {
|
|
||||||
code.push(format!("\tpush {}", reg));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Evaluate and push arguments in reverse order
|
|
||||||
let mut arg_regs = Vec::new();
|
|
||||||
for arg in args.iter().rev() {
|
|
||||||
let (arg_reg, arg_code) = self.generate_expression(arg)?;
|
|
||||||
code.extend(arg_code);
|
|
||||||
code.push(format!("\tpush {}", arg_reg));
|
|
||||||
arg_regs.push(arg_reg);
|
|
||||||
}
|
|
||||||
|
|
||||||
if GLOBAL_METHODS.contains_key(name.as_str()) {
|
|
||||||
code.push(format!("\tcall {}", GLOBAL_METHODS[name.as_str()]));
|
|
||||||
} else if self.symbols.contains(name) {
|
|
||||||
// Call local function
|
|
||||||
code.push(format!("\tcall {}", name));
|
|
||||||
} else {
|
|
||||||
return Err(format!("undefined function {name}"));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Result is in rg0, allocate a register and move it
|
|
||||||
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
|
|
||||||
|
|
||||||
code.extend(result_alloc);
|
|
||||||
code.push(format!("\tpop {}", result_reg));
|
|
||||||
|
|
||||||
// Clean up arguments
|
|
||||||
if args.len() > 1 {
|
|
||||||
for _ in 0..(args.len() - 1) {
|
|
||||||
code.push("\tpop zero".to_string());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Restore caller-saved registers in reverse order (LIFO)
|
|
||||||
for reg in saved_regs.iter().rev() {
|
|
||||||
code.push(format!("\tpop {}", reg));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Free argument registers
|
|
||||||
for reg in arg_regs {
|
|
||||||
self.allocator.free_temp(®);
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok((result_reg, code))
|
|
||||||
}
|
|
||||||
|
|
||||||
Expression::Unary { op, operand } => {
|
|
||||||
let (operand_reg, operand_code) = self.generate_expression(operand)?;
|
|
||||||
code.extend(operand_code);
|
|
||||||
|
|
||||||
let (result_reg, result_alloc) = self.allocator.alloc_temp()?;
|
|
||||||
code.extend(result_alloc);
|
|
||||||
|
|
||||||
match op {
|
|
||||||
UnaryOperator::Minus => {
|
|
||||||
// Negate: result = 0 - operand
|
|
||||||
code.push(format!("\tsub zero, {}, {}", operand_reg, result_reg));
|
|
||||||
}
|
|
||||||
UnaryOperator::Plus => {
|
|
||||||
// Just move
|
|
||||||
code.push(format!("\tmov {}, {}", operand_reg, result_reg));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
self.allocator.free_temp(&operand_reg);
|
|
||||||
Ok((result_reg, code))
|
|
||||||
}
|
|
||||||
|
|
||||||
Expression::Empty => Ok(("zero".to_string(), code)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Helper for generating unique labels
|
|
||||||
fn get_unique_label(&mut self) -> String {
|
|
||||||
// You'd implement a counter here
|
|
||||||
static COUNTER: AtomicU32 = AtomicU32::new(0);
|
|
||||||
|
|
||||||
let val = COUNTER.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
|
|
||||||
(val + 1).to_string()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Build a single string from any number of arguments.
|
|
||||||
/// Each argument must implement `Display` or be convertible to a string.
|
|
||||||
#[macro_export]
|
|
||||||
macro_rules! dsa {
|
|
||||||
($($arg:expr),* $(,)?) => {{
|
|
||||||
// Start with an empty String – we’ll grow it as we go.
|
|
||||||
use std::fmt::Write;
|
|
||||||
let mut s = ::std::string::String::new();
|
|
||||||
$(
|
|
||||||
// `write!` is cheaper than `format!` for each element
|
|
||||||
// because it re‑uses the same buffer.
|
|
||||||
|
|
||||||
write!(s, "{}\n", $arg).expect("write to String failed");
|
|
||||||
)*
|
|
||||||
s
|
|
||||||
}};
|
|
||||||
}
|
|
||||||
|
|
||||||
// ──────────────────────── dsa! ────────────────────────
|
|
||||||
// A tiny helper that just turns its token‑stream into a string.
|
|
||||||
// The trailing comma is kept – it’s part of the syntax you want.
|
|
||||||
#[macro_export]
|
|
||||||
macro_rules! cmd {
|
|
||||||
($($tokens:tt)*) => {{
|
|
||||||
// We’ll just stringify the tokens and return a String.
|
|
||||||
format!("{}", concat!(stringify!($tokens), "\n"))
|
|
||||||
}};
|
|
||||||
}
|
|
||||||
|
|
||||||
// ──────────────────────── block! ────────────────────────
|
|
||||||
// Usage:
|
|
||||||
//
|
|
||||||
// let asm = block![ "name"
|
|
||||||
// dsa![mov rg0, rg1],
|
|
||||||
// dsa![add rg1, rg1]
|
|
||||||
// ];
|
|
||||||
//
|
|
||||||
// `asm` is a `&'static str` containing:
|
|
||||||
//
|
|
||||||
// name:
|
|
||||||
// mov rg0, rg1
|
|
||||||
// add rg1, rg1
|
|
||||||
//
|
|
||||||
#[macro_export]
|
|
||||||
macro_rules! block {
|
|
||||||
// The first token must be a string literal – that’s the label.
|
|
||||||
($label:literal $(dsa![$($ins:tt)*]),* ) => {{
|
|
||||||
// Build a single string at compile time.
|
|
||||||
const CODE: &str = concat!(
|
|
||||||
$label, ":\n",
|
|
||||||
// Each `dsa!` call yields a string like `"mov rg0, rg1"`.
|
|
||||||
// We add a newline after each one to get the desired layout.
|
|
||||||
$(concat!("\t", stringify!($($ins)*), "\n")),*
|
|
||||||
);
|
|
||||||
CODE
|
|
||||||
}};
|
|
||||||
}
|
|
||||||
|
|
||||||
#[macro_export]
|
|
||||||
macro_rules! comment {
|
|
||||||
($text:expr) => {{ format!("// {}", $text) }};
|
|
||||||
}
|
|
||||||
@@ -1,74 +0,0 @@
|
|||||||
use std::fmt;
|
|
||||||
|
|
||||||
use crate::{codegen::CodeGenerator, lexer::Lexer, parser::Parser};
|
|
||||||
|
|
||||||
// mod assembly;
|
|
||||||
pub mod codegen;
|
|
||||||
pub mod lexer;
|
|
||||||
pub mod parser;
|
|
||||||
mod registers;
|
|
||||||
|
|
||||||
// ============================================================================
|
|
||||||
// Main & Tests
|
|
||||||
// ============================================================================
|
|
||||||
|
|
||||||
fn main() {
|
|
||||||
// read from input file: syntax "c_compiler <src.c> [output.dsa]"
|
|
||||||
let args: Vec<String> = std::env::args().collect();
|
|
||||||
if args.len() < 2 {
|
|
||||||
eprintln!("Usage: c_compiler <src.c> [output.dsa]");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
let input_file = &args[1];
|
|
||||||
let output_file = if args.len() > 2 {
|
|
||||||
&args[2]
|
|
||||||
} else {
|
|
||||||
"output.dsa"
|
|
||||||
};
|
|
||||||
|
|
||||||
// read input
|
|
||||||
let input = std::fs::read_to_string(input_file).expect("Failed to read input file");
|
|
||||||
|
|
||||||
// Lexing
|
|
||||||
let mut lexer = Lexer::new(&input);
|
|
||||||
let tokens = match lexer.tokenize() {
|
|
||||||
Ok(tokens) => tokens,
|
|
||||||
Err(e) => {
|
|
||||||
eprintln!("Lexing error: {}", e);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
println!("Tokens:");
|
|
||||||
for token in &tokens {
|
|
||||||
println!(" {:?}", token.token_type);
|
|
||||||
}
|
|
||||||
println!();
|
|
||||||
|
|
||||||
// Parsing
|
|
||||||
let mut parser = Parser::new(tokens);
|
|
||||||
let ast = match parser.parse() {
|
|
||||||
Ok(ast) => ast,
|
|
||||||
Err(e) => {
|
|
||||||
eprintln!("Parsing error: {}", e);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
println!("AST:");
|
|
||||||
println!("{:#?}", ast);
|
|
||||||
|
|
||||||
// Code Gen
|
|
||||||
let mut generator = CodeGenerator::new(ast);
|
|
||||||
let result = match generator.generate() {
|
|
||||||
Ok(code) => code,
|
|
||||||
Err(e) => {
|
|
||||||
eprintln!("Parsing error: {}", e);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
std::fs::write(output_file, &result).expect("Failed to write output");
|
|
||||||
println!("Result written to {}", output_file);
|
|
||||||
}
|
|
||||||
@@ -1,344 +0,0 @@
|
|||||||
use std::collections::HashMap;
|
|
||||||
|
|
||||||
/// Register allocator for DSA assembly generation
|
|
||||||
/// Manages general-purpose registers (rg0-rgf) and handles stack spilling
|
|
||||||
pub struct RegisterAllocator {
|
|
||||||
/// Available general-purpose registers
|
|
||||||
available_registers: Vec<String>,
|
|
||||||
|
|
||||||
/// Maps variable names to their current location (register or stack offset)
|
|
||||||
variable_locations: HashMap<String, Location>,
|
|
||||||
|
|
||||||
/// Maps registers to the variables they currently hold
|
|
||||||
register_contents: HashMap<String, String>,
|
|
||||||
|
|
||||||
/// Current stack offset for local variables (relative to bpr)
|
|
||||||
/// Starts at -4 (going downward from base pointer)
|
|
||||||
stack_offset: i32,
|
|
||||||
|
|
||||||
/// Track which registers are currently in use
|
|
||||||
in_use: HashMap<String, bool>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub enum Location {
|
|
||||||
Register(String),
|
|
||||||
Stack(i32), // offset from bpr
|
|
||||||
}
|
|
||||||
|
|
||||||
impl RegisterAllocator {
|
|
||||||
pub fn new() -> Self {
|
|
||||||
// Initialize with available GP registers (rg0-rgf = 16 registers)
|
|
||||||
let registers = vec![
|
|
||||||
"rg0", "rg1", "rg2", "rg3", "rg4", "rg5", "rg6", "rg7", "rg8", "rg9", "rga",
|
|
||||||
"rgb", "rgc", "rgd", "rge", "rgf",
|
|
||||||
]
|
|
||||||
.into_iter()
|
|
||||||
.map(String::from)
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
RegisterAllocator {
|
|
||||||
available_registers: registers,
|
|
||||||
variable_locations: HashMap::new(),
|
|
||||||
register_contents: HashMap::new(),
|
|
||||||
stack_offset: -4, // Start at -4 (first local below saved bpr)
|
|
||||||
in_use: HashMap::new(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Allocate a temporary register for expression evaluation
|
|
||||||
/// Returns the register name and optionally assembly code to save it
|
|
||||||
pub fn alloc_temp(&mut self) -> Result<(String, Vec<String>), String> {
|
|
||||||
let mut code = Vec::new();
|
|
||||||
|
|
||||||
// Try to find an unused register
|
|
||||||
for reg in &self.available_registers {
|
|
||||||
if !self.in_use.get(reg).unwrap_or(&false) {
|
|
||||||
self.in_use.insert(reg.clone(), true);
|
|
||||||
return Ok((reg.clone(), code));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// All registers in use - need to spill one
|
|
||||||
// Choose the first register with a variable we can spill
|
|
||||||
// Find a register to spill
|
|
||||||
let reg_to_spill = self
|
|
||||||
.available_registers
|
|
||||||
.iter()
|
|
||||||
.find(|reg| self.register_contents.contains_key(*reg))
|
|
||||||
.cloned();
|
|
||||||
|
|
||||||
if let Some(reg) = reg_to_spill {
|
|
||||||
// Spill this variable to stack
|
|
||||||
let spill_code = self.spill_register(®)?;
|
|
||||||
code.extend(spill_code);
|
|
||||||
|
|
||||||
self.in_use.insert(reg.clone(), true);
|
|
||||||
return Ok((reg, code));
|
|
||||||
}
|
|
||||||
|
|
||||||
Err("No registers available and nothing to spill".to_string())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Free a temporary register after use
|
|
||||||
/// NOTE: This will NOT free registers that contain variables!
|
|
||||||
/// Variables persist throughout their scope and must not be freed
|
|
||||||
pub fn free_temp(&mut self, reg: &str) {
|
|
||||||
// Check if this register contains a variable
|
|
||||||
if self.register_contents.contains_key(reg) {
|
|
||||||
// This register holds a variable - don't free it!
|
|
||||||
// Variables are only freed when they go out of scope via free_var()
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// This is a true temporary - safe to free
|
|
||||||
self.in_use.insert(reg.to_string(), false);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Allocate a register for a named variable
|
|
||||||
/// Returns the register and any necessary assembly code
|
|
||||||
pub fn alloc_var(&mut self, var_name: &str) -> Result<(String, Vec<String>), String> {
|
|
||||||
// Check if variable already has a location
|
|
||||||
if let Some(location) = self.variable_locations.get(var_name).cloned() {
|
|
||||||
match location {
|
|
||||||
Location::Register(reg) => {
|
|
||||||
return Ok((reg.clone(), Vec::new()));
|
|
||||||
}
|
|
||||||
Location::Stack(offset) => {
|
|
||||||
// Variable is on stack, load it into a register
|
|
||||||
let (reg, mut code) = self.alloc_temp()?;
|
|
||||||
code.push(format!("\tldw bpr, {}, {}", reg, offset));
|
|
||||||
|
|
||||||
// Update location to register
|
|
||||||
self.variable_locations
|
|
||||||
.insert(var_name.to_string(), Location::Register(reg.clone()));
|
|
||||||
self.register_contents
|
|
||||||
.insert(reg.clone(), var_name.to_string());
|
|
||||||
|
|
||||||
return Ok((reg, code));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Variable doesn't have a location yet, allocate a new register
|
|
||||||
let (reg, code) = self.alloc_temp()?;
|
|
||||||
self.variable_locations
|
|
||||||
.insert(var_name.to_string(), Location::Register(reg.clone()));
|
|
||||||
self.register_contents
|
|
||||||
.insert(reg.clone(), var_name.to_string());
|
|
||||||
|
|
||||||
Ok((reg, code))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get the current location of a variable
|
|
||||||
pub fn get_var_location(&self, var_name: &str) -> Option<&Location> {
|
|
||||||
self.variable_locations.get(var_name)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Load a variable into a register (allocating if necessary)
|
|
||||||
/// Returns the register and assembly code to load it
|
|
||||||
pub fn load_var(&mut self, var_name: &str) -> Result<(String, Vec<String>), String> {
|
|
||||||
self.alloc_var(var_name)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Store a value from a register into a variable
|
|
||||||
/// Updates tracking and returns any necessary assembly code
|
|
||||||
pub fn store_var(&mut self, var_name: &str, source_reg: &str) -> Vec<String> {
|
|
||||||
let mut code = Vec::new();
|
|
||||||
|
|
||||||
// Check if variable already has a location
|
|
||||||
if let Some(location) = self.variable_locations.get(var_name) {
|
|
||||||
match location {
|
|
||||||
Location::Register(dest_reg) => {
|
|
||||||
if dest_reg != source_reg {
|
|
||||||
code.push(format!("\tmov {}, {}", source_reg, dest_reg));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Location::Stack(offset) => {
|
|
||||||
code.push(format!("\tstw {}, bpr, {}", source_reg, offset));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Variable doesn't exist yet - try to allocate a register
|
|
||||||
if let Some(free_reg) = self.find_free_register() {
|
|
||||||
if &free_reg != source_reg {
|
|
||||||
code.push(format!("\tmov {}, {}", source_reg, free_reg));
|
|
||||||
}
|
|
||||||
self.variable_locations
|
|
||||||
.insert(var_name.to_string(), Location::Register(free_reg.clone()));
|
|
||||||
self.register_contents
|
|
||||||
.insert(free_reg.clone(), var_name.to_string());
|
|
||||||
self.in_use.insert(free_reg, true);
|
|
||||||
} else {
|
|
||||||
// No free registers - allocate on stack
|
|
||||||
code.push(format!("\tstw {}, bpr, {}", source_reg, self.stack_offset));
|
|
||||||
self.variable_locations
|
|
||||||
.insert(var_name.to_string(), Location::Stack(self.stack_offset));
|
|
||||||
self.stack_offset -= 4; // Move to next stack slot
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
code
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Spill a register to the stack
|
|
||||||
/// Returns assembly code to perform the spill
|
|
||||||
fn spill_register(&mut self, reg: &str) -> Result<Vec<String>, String> {
|
|
||||||
let mut code = Vec::new();
|
|
||||||
|
|
||||||
if let Some(var_name) = self.register_contents.get(reg).cloned() {
|
|
||||||
// Store register content to stack
|
|
||||||
code.push(format!("\tstw {}, bpr, {}", reg, self.stack_offset));
|
|
||||||
|
|
||||||
// Update variable location
|
|
||||||
self.variable_locations
|
|
||||||
.insert(var_name.clone(), Location::Stack(self.stack_offset));
|
|
||||||
|
|
||||||
// Remove from register tracking
|
|
||||||
self.register_contents.remove(reg);
|
|
||||||
|
|
||||||
// Move to next stack slot
|
|
||||||
self.stack_offset -= 4;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(code)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Find a free register (not currently in use)
|
|
||||||
fn find_free_register(&self) -> Option<String> {
|
|
||||||
for reg in &self.available_registers {
|
|
||||||
if !self.in_use.get(reg).unwrap_or(&false) {
|
|
||||||
return Some(reg.clone());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
None
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Spill all registers to stack (useful before function calls)
|
|
||||||
pub fn spill_all(&mut self) -> Vec<String> {
|
|
||||||
let mut code = Vec::new();
|
|
||||||
|
|
||||||
let regs_to_spill: Vec<String> = self.register_contents.keys().cloned().collect();
|
|
||||||
|
|
||||||
for reg in regs_to_spill {
|
|
||||||
if let Ok(spill_code) = self.spill_register(®) {
|
|
||||||
code.extend(spill_code);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
code
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get the total stack space needed for local variables
|
|
||||||
pub fn get_stack_size(&self) -> i32 {
|
|
||||||
-self.stack_offset // Convert negative offset to positive size
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Reset allocator for a new function
|
|
||||||
pub fn reset(&mut self) {
|
|
||||||
self.variable_locations.clear();
|
|
||||||
self.register_contents.clear();
|
|
||||||
self.stack_offset = -4;
|
|
||||||
self.in_use.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Mark a variable as dead (no longer needed)
|
|
||||||
/// Frees its register if it's in one
|
|
||||||
pub fn free_var(&mut self, var_name: &str) {
|
|
||||||
if let Some(Location::Register(reg)) = self.variable_locations.get(var_name) {
|
|
||||||
let reg = reg.clone();
|
|
||||||
self.register_contents.remove(®);
|
|
||||||
self.in_use.insert(reg, false);
|
|
||||||
}
|
|
||||||
self.variable_locations.remove(var_name);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get list of registers that contain variables and are in use
|
|
||||||
/// These need to be saved before function calls
|
|
||||||
pub fn get_caller_saved_registers(&self) -> Vec<String> {
|
|
||||||
self.register_contents
|
|
||||||
.iter()
|
|
||||||
.filter(|(reg, _)| *self.in_use.get(*reg).unwrap_or(&false))
|
|
||||||
.map(|(reg, _)| reg.clone())
|
|
||||||
.collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Save caller-saved registers before a function call
|
|
||||||
/// Returns assembly code to save them
|
|
||||||
pub fn save_caller_saved(&mut self) -> Vec<String> {
|
|
||||||
let mut code = Vec::new();
|
|
||||||
|
|
||||||
// For simplicity, save all currently used registers
|
|
||||||
// In a more sophisticated compiler, you'd only save registers that are live
|
|
||||||
for (reg, var_name) in self.register_contents.clone() {
|
|
||||||
if *self.in_use.get(®).unwrap_or(&false) {
|
|
||||||
code.push(format!("\tpush {}", reg));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
code
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Restore caller-saved registers after a function call
|
|
||||||
/// Returns assembly code to restore them
|
|
||||||
pub fn restore_caller_saved(&mut self, saved_regs: &[String]) -> Vec<String> {
|
|
||||||
let mut code = Vec::new();
|
|
||||||
|
|
||||||
// Restore in reverse order (LIFO)
|
|
||||||
for reg in saved_regs.iter().rev() {
|
|
||||||
code.push(format!("\tpop {}", reg));
|
|
||||||
}
|
|
||||||
|
|
||||||
code
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_basic_allocation() {
|
|
||||||
let mut allocator = RegisterAllocator::new();
|
|
||||||
|
|
||||||
let (reg1, code1) = allocator.alloc_temp().unwrap();
|
|
||||||
assert_eq!(code1.len(), 0); // No spill needed
|
|
||||||
assert_eq!(reg1, "rg0");
|
|
||||||
|
|
||||||
let (reg2, code2) = allocator.alloc_temp().unwrap();
|
|
||||||
assert_eq!(code2.len(), 0);
|
|
||||||
assert_eq!(reg2, "rg1");
|
|
||||||
|
|
||||||
allocator.free_temp(®1);
|
|
||||||
|
|
||||||
let (reg3, code3) = allocator.alloc_temp().unwrap();
|
|
||||||
assert_eq!(code3.len(), 0);
|
|
||||||
assert_eq!(reg3, "rg0"); // Reuses freed register
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_variable_allocation() {
|
|
||||||
let mut allocator = RegisterAllocator::new();
|
|
||||||
|
|
||||||
let (reg, _) = allocator.alloc_var("x").unwrap();
|
|
||||||
assert_eq!(reg, "rg0");
|
|
||||||
|
|
||||||
// Requesting same variable again should return same register
|
|
||||||
let (reg2, _) = allocator.alloc_var("x").unwrap();
|
|
||||||
assert_eq!(reg2, "rg0");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_stack_allocation() {
|
|
||||||
let mut allocator = RegisterAllocator::new();
|
|
||||||
|
|
||||||
// Allocate all 16 registers
|
|
||||||
for i in 0..16 {
|
|
||||||
allocator.alloc_var(&format!("var{}", i)).unwrap();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Next allocation should spill to stack
|
|
||||||
let (reg, code) = allocator.alloc_var("var16").unwrap();
|
|
||||||
assert!(code.len() > 0); // Should have spill code
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,15 +1,13 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::hash::Hash;
|
|
||||||
use std::sync::LazyLock;
|
|
||||||
use std::sync::atomic::AtomicU32;
|
use std::sync::atomic::AtomicU32;
|
||||||
use std::time::SystemTime;
|
use std::time::SystemTime;
|
||||||
|
|
||||||
use chrono::{DateTime, Local};
|
use chrono::{DateTime, Local};
|
||||||
|
|
||||||
use crate::registers::{Location, RegisterAllocator};
|
use super::registers::RegisterAllocator;
|
||||||
use crate::{block, cmd, comment, dsa};
|
use crate::{block, comment, dsa};
|
||||||
|
|
||||||
use crate::parser::{
|
use crate::model::{
|
||||||
BinaryOperator, CompilerError, ConstExpr, Declaration, Dependency, Expression,
|
BinaryOperator, CompilerError, ConstExpr, Declaration, Dependency, Expression,
|
||||||
Program, Statement, UnaryOperator, Variable,
|
Program, Statement, UnaryOperator, Variable,
|
||||||
};
|
};
|
||||||
@@ -23,19 +21,6 @@ pub struct CodeGenerator {
|
|||||||
allocator: RegisterAllocator,
|
allocator: RegisterAllocator,
|
||||||
}
|
}
|
||||||
|
|
||||||
static GLOBAL_METHODS: LazyLock<HashMap<&str, &str>> = LazyLock::new(|| {
|
|
||||||
HashMap::from([
|
|
||||||
// ("print", "print::print"),
|
|
||||||
// ("println", "print::println"),
|
|
||||||
// ("printnum", "print::print_num"),
|
|
||||||
// ("print_space", "print::print_whitespace"),
|
|
||||||
// ("print_newline", "print::print_newline"),
|
|
||||||
// ("print_char", "print::print_byte"),
|
|
||||||
// ("print_word", "print::print_word"),
|
|
||||||
// ("print_hex", "print::print_hex_word"),
|
|
||||||
])
|
|
||||||
});
|
|
||||||
|
|
||||||
fn import(name: &str, path: &str) -> String {
|
fn import(name: &str, path: &str) -> String {
|
||||||
format!("include {name}: \"{}\"", path)
|
format!("include {name}: \"{}\"", path)
|
||||||
}
|
}
|
||||||
@@ -164,10 +149,7 @@ impl CodeGenerator {
|
|||||||
self.generate_global(&var.name, init)
|
self.generate_global(&var.name, init)
|
||||||
}
|
}
|
||||||
Declaration::Function {
|
Declaration::Function {
|
||||||
name,
|
name, params, body, ..
|
||||||
return_type,
|
|
||||||
params,
|
|
||||||
body,
|
|
||||||
} => {
|
} => {
|
||||||
let func = self.generate_function(&name, ¶ms, &body).join("\n");
|
let func = self.generate_function(&name, ¶ms, &body).join("\n");
|
||||||
|
|
||||||
@@ -0,0 +1,9 @@
|
|||||||
|
use crate::model::{CompilerError, Program};
|
||||||
|
|
||||||
|
mod codegen;
|
||||||
|
mod registers;
|
||||||
|
|
||||||
|
pub fn generate_code(ast: &Program) -> Result<String, CompilerError> {
|
||||||
|
let mut codegen = codegen::CodeGenerator::new(ast.clone());
|
||||||
|
codegen.generate()
|
||||||
|
}
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use crate::parser::CompilerError;
|
use crate::model::CompilerError;
|
||||||
|
|
||||||
/// Register allocator for DSA assembly generation
|
/// Register allocator for DSA assembly generation
|
||||||
/// Manages general-purpose registers (rg0-rgf) and handles stack spilling
|
/// Manages general-purpose registers (rg0-rgf) and handles stack spilling
|
||||||
@@ -147,7 +147,7 @@ impl RegisterAllocator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Get the current location of a variable
|
/// Get the current location of a variable
|
||||||
pub fn get_var_location(&self, var_name: &str) -> Option<&Location> {
|
pub fn _get_var_location(&self, var_name: &str) -> Option<&Location> {
|
||||||
self.variable_locations.get(var_name)
|
self.variable_locations.get(var_name)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -264,7 +264,7 @@ impl RegisterAllocator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Spill all registers to stack (useful before function calls)
|
/// Spill all registers to stack (useful before function calls)
|
||||||
pub fn spill_all(&mut self) -> Vec<String> {
|
pub fn _spill_all(&mut self) -> Vec<String> {
|
||||||
let mut code = Vec::new();
|
let mut code = Vec::new();
|
||||||
|
|
||||||
let regs_to_spill: Vec<String> = self.register_contents.keys().cloned().collect();
|
let regs_to_spill: Vec<String> = self.register_contents.keys().cloned().collect();
|
||||||
@@ -284,7 +284,7 @@ impl RegisterAllocator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Get the total stack space needed for local variables
|
/// Get the total stack space needed for local variables
|
||||||
pub fn get_stack_size(&self) -> i32 {
|
pub fn _get_stack_size(&self) -> i32 {
|
||||||
-self.stack_offset // Convert negative offset to positive size
|
-self.stack_offset // Convert negative offset to positive size
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -298,7 +298,7 @@ impl RegisterAllocator {
|
|||||||
|
|
||||||
/// Mark a variable as dead (no longer needed)
|
/// Mark a variable as dead (no longer needed)
|
||||||
/// Frees its register if it's in one
|
/// Frees its register if it's in one
|
||||||
pub fn free_var(&mut self, var_name: &str) {
|
pub fn _free_var(&mut self, var_name: &str) {
|
||||||
if let Some(Location::Register(reg)) = self.variable_locations.get(var_name) {
|
if let Some(Location::Register(reg)) = self.variable_locations.get(var_name) {
|
||||||
let reg = reg.clone();
|
let reg = reg.clone();
|
||||||
self.register_contents.remove(®);
|
self.register_contents.remove(®);
|
||||||
@@ -319,12 +319,12 @@ impl RegisterAllocator {
|
|||||||
|
|
||||||
/// Save caller-saved registers before a function call
|
/// Save caller-saved registers before a function call
|
||||||
/// Returns assembly code to save them
|
/// Returns assembly code to save them
|
||||||
pub fn save_caller_saved(&mut self) -> Vec<String> {
|
pub fn _save_caller_saved(&mut self) -> Vec<String> {
|
||||||
let mut code = Vec::new();
|
let mut code = Vec::new();
|
||||||
|
|
||||||
// For simplicity, save all currently used registers
|
// For simplicity, save all currently used registers
|
||||||
// In a more sophisticated compiler, you'd only save registers that are live
|
// In a more sophisticated compiler, you'd only save registers that are live
|
||||||
for (reg, var_name) in self.register_contents.clone() {
|
for (reg, _) in self.register_contents.clone() {
|
||||||
if *self.in_use.get(®).unwrap_or(&false) {
|
if *self.in_use.get(®).unwrap_or(&false) {
|
||||||
code.push(format!("\tpush {}", reg));
|
code.push(format!("\tpush {}", reg));
|
||||||
}
|
}
|
||||||
@@ -335,7 +335,7 @@ impl RegisterAllocator {
|
|||||||
|
|
||||||
/// Restore caller-saved registers after a function call
|
/// Restore caller-saved registers after a function call
|
||||||
/// Returns assembly code to restore them
|
/// Returns assembly code to restore them
|
||||||
pub fn restore_caller_saved(&mut self, saved_regs: &[String]) -> Vec<String> {
|
pub fn _restore_caller_saved(&mut self, saved_regs: &[String]) -> Vec<String> {
|
||||||
let mut code = Vec::new();
|
let mut code = Vec::new();
|
||||||
|
|
||||||
// Restore in reverse order (LIFO)
|
// Restore in reverse order (LIFO)
|
||||||
@@ -346,53 +346,3 @@ impl RegisterAllocator {
|
|||||||
code
|
code
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_basic_allocation() {
|
|
||||||
let mut allocator = RegisterAllocator::new();
|
|
||||||
|
|
||||||
let (reg1, code1) = allocator.alloc_temp().unwrap();
|
|
||||||
assert_eq!(code1.len(), 0); // No spill needed
|
|
||||||
assert_eq!(reg1, "rg0");
|
|
||||||
|
|
||||||
let (reg2, code2) = allocator.alloc_temp().unwrap();
|
|
||||||
assert_eq!(code2.len(), 0);
|
|
||||||
assert_eq!(reg2, "rg1");
|
|
||||||
|
|
||||||
allocator.free_temp(®1);
|
|
||||||
|
|
||||||
let (reg3, code3) = allocator.alloc_temp().unwrap();
|
|
||||||
assert_eq!(code3.len(), 0);
|
|
||||||
assert_eq!(reg3, "rg0"); // Reuses freed register
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_variable_allocation() {
|
|
||||||
let mut allocator = RegisterAllocator::new();
|
|
||||||
|
|
||||||
let (reg, _) = allocator.alloc_var("x").unwrap();
|
|
||||||
assert_eq!(reg, "rg0");
|
|
||||||
|
|
||||||
// Requesting same variable again should return same register
|
|
||||||
let (reg2, _) = allocator.alloc_var("x").unwrap();
|
|
||||||
assert_eq!(reg2, "rg0");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_stack_allocation() {
|
|
||||||
let mut allocator = RegisterAllocator::new();
|
|
||||||
|
|
||||||
// Allocate all 16 registers
|
|
||||||
for i in 0..16 {
|
|
||||||
allocator.alloc_var(&format!("var{}", i)).unwrap();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Next allocation should spill to stack
|
|
||||||
let (reg, code) = allocator.alloc_var("var16").unwrap();
|
|
||||||
assert!(code.len() > 0); // Should have spill code
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -0,0 +1,13 @@
|
|||||||
|
use crate::model::{CompilerError, Program};
|
||||||
|
|
||||||
|
mod dsa;
|
||||||
|
|
||||||
|
pub fn compiler_backend(ext: &str, ast: &Program) -> Result<String, CompilerError> {
|
||||||
|
match ext {
|
||||||
|
"dsa" => Ok(dsa::generate_code(ast)?),
|
||||||
|
_ => Err(CompilerError::Generic(format!(
|
||||||
|
"File type {} not supported",
|
||||||
|
ext
|
||||||
|
))),
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -44,6 +44,7 @@ pub enum TokenType {
|
|||||||
Eof,
|
Eof,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(unused)]
|
||||||
pub enum Type {
|
pub enum Type {
|
||||||
Int32,
|
Int32,
|
||||||
Int16,
|
Int16,
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
use common::logging::log;
|
||||||
|
|
||||||
|
use crate::model::{CompilerError, Program};
|
||||||
|
use parser::Parser;
|
||||||
|
|
||||||
|
pub mod lexer;
|
||||||
|
pub mod parser;
|
||||||
|
|
||||||
|
pub fn generate_ast(input: &str) -> Result<Program, CompilerError> {
|
||||||
|
log("Tokenising Input...");
|
||||||
|
|
||||||
|
let mut lexer = lexer::Lexer::new(&input);
|
||||||
|
let tokens = lexer.tokenize().map_err(|e| CompilerError::Generic(e))?;
|
||||||
|
// println!("{tokens:?}");
|
||||||
|
|
||||||
|
log(&format!("Parsing {} Tokens...", tokens.len()));
|
||||||
|
|
||||||
|
let mut parser = Parser::new(tokens);
|
||||||
|
let ast = match parser.parse() {
|
||||||
|
Ok(ast) => ast,
|
||||||
|
Err(e) => return Err(CompilerError::Generic(e)),
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(ast)
|
||||||
|
}
|
||||||
@@ -2,167 +2,12 @@
|
|||||||
// AST Node Types
|
// AST Node Types
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
|
|
||||||
use std::fmt;
|
use crate::model::{
|
||||||
|
BinaryOperator, Block, ConstExpr, Declaration, Dependency, Expression, Name, Program,
|
||||||
|
Statement, TypeId, UnaryOperator, Variable,
|
||||||
|
};
|
||||||
|
|
||||||
use crate::lexer::{Token, TokenType};
|
use super::lexer::{Token, TokenType};
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub struct Program {
|
|
||||||
pub declarations: Vec<Declaration>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub enum Declaration {
|
|
||||||
Function {
|
|
||||||
name: String,
|
|
||||||
return_type: Type,
|
|
||||||
params: Vec<Parameter>,
|
|
||||||
body: Block,
|
|
||||||
},
|
|
||||||
Variable {
|
|
||||||
name: String,
|
|
||||||
init: Option<ConstExpr>,
|
|
||||||
},
|
|
||||||
Import {
|
|
||||||
name: String,
|
|
||||||
path: String,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub struct Parameter {
|
|
||||||
pub name: String,
|
|
||||||
pub param_type: Type,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub enum Type {
|
|
||||||
Int,
|
|
||||||
Long,
|
|
||||||
Float,
|
|
||||||
Double,
|
|
||||||
Char,
|
|
||||||
Void,
|
|
||||||
Ptr(Box<Type>),
|
|
||||||
Array(Box<Type>, usize),
|
|
||||||
Struct(String),
|
|
||||||
}
|
|
||||||
|
|
||||||
pub type Block = Vec<Statement>;
|
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub enum Statement {
|
|
||||||
Block(Block),
|
|
||||||
Assign {
|
|
||||||
// left side
|
|
||||||
name: String,
|
|
||||||
declare_type: Option<Type>,
|
|
||||||
|
|
||||||
// right side
|
|
||||||
value: Option<Box<Expression>>,
|
|
||||||
},
|
|
||||||
Expression {
|
|
||||||
expr: Expression,
|
|
||||||
},
|
|
||||||
If {
|
|
||||||
condition: Expression,
|
|
||||||
then_stmt: Block,
|
|
||||||
else_stmt: Block,
|
|
||||||
},
|
|
||||||
While {
|
|
||||||
condition: Expression,
|
|
||||||
body: Vec<Statement>,
|
|
||||||
},
|
|
||||||
Return {
|
|
||||||
expr: Option<Expression>,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub enum ConstExpr {
|
|
||||||
Number(i32),
|
|
||||||
String(String),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for ConstExpr {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
||||||
match self {
|
|
||||||
ConstExpr::Number(n) => write!(f, "{}", n),
|
|
||||||
ConstExpr::String(s) => write!(f, "\"{}\"", s),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub enum Expression {
|
|
||||||
Empty,
|
|
||||||
Binary {
|
|
||||||
op: BinaryOperator,
|
|
||||||
left: Box<Expression>,
|
|
||||||
right: Box<Expression>,
|
|
||||||
},
|
|
||||||
Unary {
|
|
||||||
op: UnaryOperator,
|
|
||||||
operand: Box<Expression>,
|
|
||||||
},
|
|
||||||
Variable {
|
|
||||||
name: String,
|
|
||||||
expr_type: Option<Type>,
|
|
||||||
},
|
|
||||||
Number {
|
|
||||||
value: i32,
|
|
||||||
},
|
|
||||||
Call {
|
|
||||||
name: String,
|
|
||||||
args: Vec<Expression>,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq)]
|
|
||||||
pub enum BinaryOperator {
|
|
||||||
Add,
|
|
||||||
Sub,
|
|
||||||
Mul,
|
|
||||||
Div,
|
|
||||||
Eq,
|
|
||||||
Ne,
|
|
||||||
Lt,
|
|
||||||
Gt,
|
|
||||||
Le,
|
|
||||||
Ge,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for BinaryOperator {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
||||||
match self {
|
|
||||||
BinaryOperator::Add => write!(f, "+"),
|
|
||||||
BinaryOperator::Sub => write!(f, "-"),
|
|
||||||
BinaryOperator::Mul => write!(f, "*"),
|
|
||||||
BinaryOperator::Div => write!(f, "/"),
|
|
||||||
BinaryOperator::Eq => write!(f, "=="),
|
|
||||||
BinaryOperator::Ne => write!(f, "!="),
|
|
||||||
BinaryOperator::Lt => write!(f, "<"),
|
|
||||||
BinaryOperator::Gt => write!(f, ">"),
|
|
||||||
BinaryOperator::Le => write!(f, "<="),
|
|
||||||
BinaryOperator::Ge => write!(f, ">="),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq)]
|
|
||||||
pub enum UnaryOperator {
|
|
||||||
Plus,
|
|
||||||
Minus,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for UnaryOperator {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
||||||
match self {
|
|
||||||
UnaryOperator::Plus => write!(f, "+"),
|
|
||||||
UnaryOperator::Minus => write!(f, "-"),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// Parser
|
// Parser
|
||||||
@@ -252,7 +97,7 @@ impl Parser {
|
|||||||
.ok_or(String::from("Expected string literal"))?;
|
.ok_or(String::from("Expected string literal"))?;
|
||||||
|
|
||||||
self.advance();
|
self.advance();
|
||||||
return Ok(Declaration::Import { name, path });
|
return Ok(Declaration::Dependency(Dependency { name, path }));
|
||||||
}
|
}
|
||||||
|
|
||||||
self.expect(TokenType::Int)?;
|
self.expect(TokenType::Int)?;
|
||||||
@@ -267,16 +112,16 @@ impl Parser {
|
|||||||
TokenType::LParen => {
|
TokenType::LParen => {
|
||||||
// Function declaration
|
// Function declaration
|
||||||
self.advance();
|
self.advance();
|
||||||
let mut params = Vec::<Parameter>::new();
|
let mut params = Vec::<Variable>::new();
|
||||||
|
|
||||||
if !matches!(self.current().token_type, TokenType::RParen) {
|
if !matches!(self.current().token_type, TokenType::RParen) {
|
||||||
self.expect(TokenType::Int)?;
|
self.expect(TokenType::Int)?;
|
||||||
|
|
||||||
match &self.current().token_type {
|
match &self.current().token_type {
|
||||||
TokenType::Identifier(s) => {
|
TokenType::Identifier(s) => {
|
||||||
params.push(Parameter {
|
params.push(Variable {
|
||||||
name: s.clone(),
|
name: s.clone(),
|
||||||
param_type: Type::Int,
|
type_id: TypeId::U32,
|
||||||
});
|
});
|
||||||
self.advance();
|
self.advance();
|
||||||
}
|
}
|
||||||
@@ -289,9 +134,9 @@ impl Parser {
|
|||||||
|
|
||||||
match &self.current().token_type {
|
match &self.current().token_type {
|
||||||
TokenType::Identifier(s) => {
|
TokenType::Identifier(s) => {
|
||||||
params.push(Parameter {
|
params.push(Variable {
|
||||||
name: s.clone(),
|
name: s.clone(),
|
||||||
param_type: Type::Int,
|
type_id: TypeId::U32,
|
||||||
});
|
});
|
||||||
self.advance();
|
self.advance();
|
||||||
}
|
}
|
||||||
@@ -307,7 +152,7 @@ impl Parser {
|
|||||||
name,
|
name,
|
||||||
params,
|
params,
|
||||||
body,
|
body,
|
||||||
return_type: Type::Int,
|
return_type: TypeId::U32,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
_ => {
|
_ => {
|
||||||
@@ -327,7 +172,14 @@ impl Parser {
|
|||||||
};
|
};
|
||||||
|
|
||||||
self.expect(TokenType::Semicolon)?;
|
self.expect(TokenType::Semicolon)?;
|
||||||
Ok(Declaration::Variable { name, init })
|
Ok(Declaration::Variable {
|
||||||
|
var: Variable {
|
||||||
|
name,
|
||||||
|
type_id: TypeId::U32,
|
||||||
|
},
|
||||||
|
init,
|
||||||
|
is_const: false,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -369,9 +221,8 @@ impl Parser {
|
|||||||
|
|
||||||
self.expect(TokenType::Semicolon)?;
|
self.expect(TokenType::Semicolon)?;
|
||||||
Ok(Statement::Assign {
|
Ok(Statement::Assign {
|
||||||
name,
|
varname: name,
|
||||||
value: Some(Box::new(expr)),
|
value: expr,
|
||||||
declare_type: None,
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
// var expression
|
// var expression
|
||||||
@@ -379,7 +230,10 @@ impl Parser {
|
|||||||
self.expect(TokenType::Semicolon)?;
|
self.expect(TokenType::Semicolon)?;
|
||||||
Ok(Statement::Expression {
|
Ok(Statement::Expression {
|
||||||
expr: Expression::Variable {
|
expr: Expression::Variable {
|
||||||
name,
|
name: Name {
|
||||||
|
name,
|
||||||
|
namespace: None,
|
||||||
|
},
|
||||||
expr_type: None,
|
expr_type: None,
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
@@ -406,15 +260,13 @@ impl Parser {
|
|||||||
// Convert to assignment expression statement
|
// Convert to assignment expression statement
|
||||||
let expr = if let Some(init_expr) = init {
|
let expr = if let Some(init_expr) = init {
|
||||||
Statement::Assign {
|
Statement::Assign {
|
||||||
name,
|
varname: name,
|
||||||
value: Some(Box::new(init_expr)),
|
value: init_expr,
|
||||||
declare_type: Some(Type::Int),
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
Statement::Assign {
|
Statement::Assign {
|
||||||
name,
|
varname: name,
|
||||||
value: None,
|
value: Expression::Empty,
|
||||||
declare_type: Some(Type::Int),
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -474,7 +326,7 @@ impl Parser {
|
|||||||
};
|
};
|
||||||
|
|
||||||
self.expect(TokenType::Semicolon)?;
|
self.expect(TokenType::Semicolon)?;
|
||||||
Ok(Statement::Return { expr })
|
Ok(Statement::Return(expr))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_expression(&mut self) -> Result<Expression, String> {
|
fn parse_expression(&mut self) -> Result<Expression, String> {
|
||||||
@@ -566,7 +418,7 @@ impl Parser {
|
|||||||
TokenType::Number(n) => {
|
TokenType::Number(n) => {
|
||||||
let value = *n;
|
let value = *n;
|
||||||
self.advance();
|
self.advance();
|
||||||
Ok(Expression::Number { value })
|
Ok(Expression::Number(value as isize))
|
||||||
}
|
}
|
||||||
TokenType::Identifier(name) => {
|
TokenType::Identifier(name) => {
|
||||||
let name = name.clone();
|
let name = name.clone();
|
||||||
@@ -587,10 +439,19 @@ impl Parser {
|
|||||||
}
|
}
|
||||||
|
|
||||||
self.expect(TokenType::RParen)?;
|
self.expect(TokenType::RParen)?;
|
||||||
Ok(Expression::Call { name, args })
|
Ok(Expression::Call {
|
||||||
|
name: Name {
|
||||||
|
name,
|
||||||
|
namespace: None,
|
||||||
|
},
|
||||||
|
args,
|
||||||
|
})
|
||||||
} else {
|
} else {
|
||||||
Ok(Expression::Variable {
|
Ok(Expression::Variable {
|
||||||
name,
|
name: Name {
|
||||||
|
name,
|
||||||
|
namespace: None,
|
||||||
|
},
|
||||||
expr_type: None,
|
expr_type: None,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -52,14 +52,10 @@ pub enum Token {
|
|||||||
Eof,
|
Eof,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Clone)]
|
|
||||||
pub struct Name {
|
|
||||||
pub name: String,
|
|
||||||
pub namespace: Option<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
|
||||||
|
use crate::model::Name;
|
||||||
|
|
||||||
impl fmt::Display for Name {
|
impl fmt::Display for Name {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
if let Some(ref ns) = self.namespace {
|
if let Some(ref ns) = self.namespace {
|
||||||
@@ -289,7 +285,7 @@ impl<'a> Lexer<'a> {
|
|||||||
|
|
||||||
// Create a temporary check
|
// Create a temporary check
|
||||||
let mut temp_chars = self.chars.clone();
|
let mut temp_chars = self.chars.clone();
|
||||||
let first_peek = temp_chars.next(); // This is the ':' we already saw
|
let _ = temp_chars.next(); // This is the ':' we already saw
|
||||||
let second_peek = temp_chars.peek();
|
let second_peek = temp_chars.peek();
|
||||||
|
|
||||||
if let Some(&':') = second_peek {
|
if let Some(&':') = second_peek {
|
||||||
@@ -547,6 +543,21 @@ impl<'a> Lexer<'a> {
|
|||||||
return token;
|
return token;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Char literals
|
||||||
|
if c == '\'' {
|
||||||
|
let mut value = ' ';
|
||||||
|
self.advance();
|
||||||
|
if let Some(ch) = self.current {
|
||||||
|
value = ch;
|
||||||
|
self.advance();
|
||||||
|
}
|
||||||
|
if self.current == Some('\'') {
|
||||||
|
self.advance();
|
||||||
|
return Token::Char(value);
|
||||||
|
}
|
||||||
|
eprintln!("Lexer error on line {}: Invalid char literal", self.line);
|
||||||
|
}
|
||||||
|
|
||||||
// String literals
|
// String literals
|
||||||
if c == '"' {
|
if c == '"' {
|
||||||
let token = match self.read_string() {
|
let token = match self.read_string() {
|
||||||
@@ -614,14 +625,3 @@ impl<'a> Iterator for Lexer<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_basic() {
|
|
||||||
// Placeholder test
|
|
||||||
assert!(true);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -0,0 +1,38 @@
|
|||||||
|
use common::logging::log;
|
||||||
|
|
||||||
|
use crate::model::{CompilerError, Program};
|
||||||
|
use parser::{ParseResult, Parser};
|
||||||
|
use semantic_analyser::Analyser;
|
||||||
|
|
||||||
|
pub mod lexer;
|
||||||
|
pub mod parser;
|
||||||
|
pub mod semantic_analyser;
|
||||||
|
|
||||||
|
pub fn generate_ast(input: &str) -> Result<Program, CompilerError> {
|
||||||
|
log("Tokenising Input...");
|
||||||
|
|
||||||
|
let lexer = lexer::Lexer::new(&input);
|
||||||
|
let tokens = lexer.collect::<Vec<_>>();
|
||||||
|
// println!("{tokens:?}");
|
||||||
|
|
||||||
|
log(&format!("Parsing {} Tokens...", tokens.len()));
|
||||||
|
|
||||||
|
let mut parser = Parser::new(tokens);
|
||||||
|
let ast = match parser.parse() {
|
||||||
|
ParseResult::Accept(ast) => ast,
|
||||||
|
ParseResult::Reject(e) => return Err(e),
|
||||||
|
ParseResult::Deny => {
|
||||||
|
return Err(CompilerError::Generic("Parser used ::Deny".to_string()));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
// println!("{ast:#?}");
|
||||||
|
|
||||||
|
log("Analyzing AST...");
|
||||||
|
log("Checking Type Information...");
|
||||||
|
|
||||||
|
let analyser = Analyser::new();
|
||||||
|
analyser.analyse(ast.clone()).unwrap();
|
||||||
|
|
||||||
|
log("Type Checking Complete...");
|
||||||
|
Ok(ast)
|
||||||
|
}
|
||||||
@@ -1,6 +1,9 @@
|
|||||||
use crate::lexer::{Name, Token};
|
use super::lexer::Token;
|
||||||
|
use crate::model::{
|
||||||
|
BinaryOperator, Block, CompilerError, ConstExpr, Declaration, Dependency, Expression,
|
||||||
|
Program, Statement, TypeId, UnaryOperator, Variable,
|
||||||
|
};
|
||||||
use crate::{expect_tt, expect_value};
|
use crate::{expect_tt, expect_value};
|
||||||
use core::fmt;
|
|
||||||
use std::ops::{ControlFlow, FromResidual, Try};
|
use std::ops::{ControlFlow, FromResidual, Try};
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
@@ -10,16 +13,6 @@ pub enum ParseResult<T, E> {
|
|||||||
Reject(E),
|
Reject(E),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub enum CompilerError {
|
|
||||||
UnexpectedToken(Token),
|
|
||||||
UnexpectedEndOfInput,
|
|
||||||
UnexpectedCharacter(char),
|
|
||||||
Undefined(Name),
|
|
||||||
InvalidSyntax(String),
|
|
||||||
Generic(String),
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct Parser {
|
pub struct Parser {
|
||||||
tokens: Vec<Token>,
|
tokens: Vec<Token>,
|
||||||
idx: usize,
|
idx: usize,
|
||||||
@@ -86,7 +79,11 @@ impl Parser {
|
|||||||
let init = match value {
|
let init = match value {
|
||||||
Token::String(x) => Some(ConstExpr::String(x)),
|
Token::String(x) => Some(ConstExpr::String(x)),
|
||||||
Token::Integer(x) => Some(ConstExpr::Number(x as i32)),
|
Token::Integer(x) => Some(ConstExpr::Number(x as i32)),
|
||||||
_ => return ParseResult::Reject(CompilerError::UnexpectedToken(value)),
|
_ => {
|
||||||
|
return ParseResult::Reject(CompilerError::UnexpectedToken(
|
||||||
|
value.tt().to_string(),
|
||||||
|
));
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let _ = expect_tt!(self.next()?, Semicolon)?;
|
let _ = expect_tt!(self.next()?, Semicolon)?;
|
||||||
@@ -141,7 +138,9 @@ impl Parser {
|
|||||||
body: self.parse_block()?,
|
body: self.parse_block()?,
|
||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
ParseResult::Reject(CompilerError::UnexpectedToken(self.peek_next()?))
|
ParseResult::Reject(CompilerError::UnexpectedToken(
|
||||||
|
self.peek_next()?.tt().to_string(),
|
||||||
|
))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -268,7 +267,7 @@ impl Parser {
|
|||||||
expr
|
expr
|
||||||
} else {
|
} else {
|
||||||
return ParseResult::Reject(CompilerError::UnexpectedToken(
|
return ParseResult::Reject(CompilerError::UnexpectedToken(
|
||||||
self.peek_next()?,
|
self.peek_next()?.tt().to_string(),
|
||||||
));
|
));
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -341,7 +340,9 @@ impl Parser {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
ParseResult::Reject(CompilerError::UnexpectedToken(self.peek_next()?))
|
ParseResult::Reject(CompilerError::UnexpectedToken(
|
||||||
|
self.peek_next()?.tt().to_string(),
|
||||||
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_expression(&mut self) -> ParseResult<Expression, CompilerError> {
|
fn parse_expression(&mut self) -> ParseResult<Expression, CompilerError> {
|
||||||
@@ -463,7 +464,9 @@ impl Parser {
|
|||||||
let _ = expect_tt!(self.next()?, RightParen)?;
|
let _ = expect_tt!(self.next()?, RightParen)?;
|
||||||
ParseResult::Accept(expr)
|
ParseResult::Accept(expr)
|
||||||
}
|
}
|
||||||
_ => ParseResult::Reject(CompilerError::UnexpectedToken(self.peek_next()?)),
|
_ => ParseResult::Reject(CompilerError::UnexpectedToken(
|
||||||
|
self.peek_next()?.tt().to_string(),
|
||||||
|
)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -525,197 +528,6 @@ impl Parser {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub struct Program {
|
|
||||||
pub declarations: Vec<Declaration>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub enum Declaration {
|
|
||||||
Function {
|
|
||||||
name: String,
|
|
||||||
return_type: TypeId,
|
|
||||||
params: Vec<Variable>,
|
|
||||||
body: Block,
|
|
||||||
},
|
|
||||||
Variable {
|
|
||||||
var: Variable,
|
|
||||||
init: Option<ConstExpr>,
|
|
||||||
is_const: bool,
|
|
||||||
},
|
|
||||||
Dependency(Dependency),
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub struct Dependency {
|
|
||||||
pub name: String,
|
|
||||||
pub path: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub enum TypeId {
|
|
||||||
U8,
|
|
||||||
U16,
|
|
||||||
U32,
|
|
||||||
I8,
|
|
||||||
I16,
|
|
||||||
I32,
|
|
||||||
Char,
|
|
||||||
Void,
|
|
||||||
Ptr(Box<TypeId>),
|
|
||||||
Ref(Box<TypeId>),
|
|
||||||
Array(Box<TypeId>, usize),
|
|
||||||
Struct { name: Name, fields: Vec<Variable> },
|
|
||||||
}
|
|
||||||
|
|
||||||
pub type Block = Vec<Statement>;
|
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub struct Variable {
|
|
||||||
pub name: String,
|
|
||||||
pub type_id: TypeId,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub enum Statement {
|
|
||||||
Block(Block),
|
|
||||||
Declaration {
|
|
||||||
var: Variable,
|
|
||||||
value: Option<Expression>,
|
|
||||||
},
|
|
||||||
Assign {
|
|
||||||
varname: String,
|
|
||||||
value: Expression,
|
|
||||||
},
|
|
||||||
PtrWrite {
|
|
||||||
ptr: Expression,
|
|
||||||
value: Expression,
|
|
||||||
},
|
|
||||||
Expression {
|
|
||||||
expr: Expression,
|
|
||||||
},
|
|
||||||
If {
|
|
||||||
condition: Expression,
|
|
||||||
then_stmt: Block,
|
|
||||||
else_stmt: Block,
|
|
||||||
},
|
|
||||||
While {
|
|
||||||
condition: Expression,
|
|
||||||
body: Vec<Statement>,
|
|
||||||
},
|
|
||||||
Loop(Block),
|
|
||||||
Break,
|
|
||||||
Continue,
|
|
||||||
Return(Option<Expression>),
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub enum ConstExpr {
|
|
||||||
Number(i32),
|
|
||||||
String(String),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for ConstExpr {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
||||||
match self {
|
|
||||||
ConstExpr::Number(n) => write!(f, "{}", n),
|
|
||||||
ConstExpr::String(s) => write!(f, "\"{}\"", s),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub enum Expression {
|
|
||||||
Empty,
|
|
||||||
Binary {
|
|
||||||
op: BinaryOperator,
|
|
||||||
left: Box<Expression>,
|
|
||||||
right: Box<Expression>,
|
|
||||||
},
|
|
||||||
Unary {
|
|
||||||
op: UnaryOperator,
|
|
||||||
operand: Box<Expression>,
|
|
||||||
},
|
|
||||||
Variable {
|
|
||||||
name: Name,
|
|
||||||
expr_type: Option<TypeId>,
|
|
||||||
},
|
|
||||||
Call {
|
|
||||||
name: Name,
|
|
||||||
args: Vec<Expression>,
|
|
||||||
},
|
|
||||||
Number(isize),
|
|
||||||
StringLiteral(String),
|
|
||||||
CharLiteral(char),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Expression {
|
|
||||||
pub fn is_pure(&self) -> bool {
|
|
||||||
match self {
|
|
||||||
Expression::Number(_) => true,
|
|
||||||
Expression::StringLiteral(_) => true,
|
|
||||||
Expression::CharLiteral(_) => true,
|
|
||||||
Expression::Call { name, args } => false, /* TODO: will require checking */
|
|
||||||
// if the associated function
|
|
||||||
// body is pure
|
|
||||||
Expression::Binary { left, right, .. } => left.is_pure() && right.is_pure(),
|
|
||||||
Expression::Unary { op, operand } => operand.is_pure(),
|
|
||||||
Expression::Empty => true,
|
|
||||||
Expression::Variable { name, expr_type } => true,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq)]
|
|
||||||
pub enum BinaryOperator {
|
|
||||||
Add,
|
|
||||||
Sub,
|
|
||||||
Mul,
|
|
||||||
Div,
|
|
||||||
Eq,
|
|
||||||
Ne,
|
|
||||||
Lt,
|
|
||||||
Gt,
|
|
||||||
Le,
|
|
||||||
Ge,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for BinaryOperator {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
||||||
match self {
|
|
||||||
BinaryOperator::Add => write!(f, "+"),
|
|
||||||
BinaryOperator::Sub => write!(f, "-"),
|
|
||||||
BinaryOperator::Mul => write!(f, "*"),
|
|
||||||
BinaryOperator::Div => write!(f, "/"),
|
|
||||||
BinaryOperator::Eq => write!(f, "=="),
|
|
||||||
BinaryOperator::Ne => write!(f, "!="),
|
|
||||||
BinaryOperator::Lt => write!(f, "<"),
|
|
||||||
BinaryOperator::Gt => write!(f, ">"),
|
|
||||||
BinaryOperator::Le => write!(f, "<="),
|
|
||||||
BinaryOperator::Ge => write!(f, ">="),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq)]
|
|
||||||
pub enum UnaryOperator {
|
|
||||||
Plus,
|
|
||||||
Minus,
|
|
||||||
Reference,
|
|
||||||
Dereference,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for UnaryOperator {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
||||||
match self {
|
|
||||||
UnaryOperator::Plus => write!(f, "+"),
|
|
||||||
UnaryOperator::Minus => write!(f, "-"),
|
|
||||||
UnaryOperator::Dereference => write!(f, "*"),
|
|
||||||
UnaryOperator::Reference => write!(f, "&"),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T, E> ParseResult<T, E> {
|
impl<T, E> ParseResult<T, E> {
|
||||||
pub fn accepted(&self) -> bool {
|
pub fn accepted(&self) -> bool {
|
||||||
matches!(self, ParseResult::Accept(_))
|
matches!(self, ParseResult::Accept(_))
|
||||||
@@ -772,7 +584,7 @@ macro_rules! expect_tt {
|
|||||||
)+
|
)+
|
||||||
_ => {
|
_ => {
|
||||||
// let expected = format!("[{}]", vec![$(stringify!($variant)),+].join(" | "));
|
// let expected = format!("[{}]", vec![$(stringify!($variant)),+].join(" | "));
|
||||||
ParseResult::Reject(CompilerError::UnexpectedToken(token))
|
ParseResult::Reject(CompilerError::UnexpectedToken(tt))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}};
|
}};
|
||||||
@@ -784,7 +596,9 @@ macro_rules! expect_value {
|
|||||||
let tok = $expr;
|
let tok = $expr;
|
||||||
match tok.clone() {
|
match tok.clone() {
|
||||||
Token::$variant(value) => ParseResult::Accept(value),
|
Token::$variant(value) => ParseResult::Accept(value),
|
||||||
_ => ParseResult::Reject(CompilerError::UnexpectedToken(tok)),
|
_ => {
|
||||||
|
ParseResult::Reject(CompilerError::UnexpectedToken(tok.tt().to_string()))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}};
|
}};
|
||||||
}
|
}
|
||||||
@@ -0,0 +1,13 @@
|
|||||||
|
use crate::model::{CompilerError, Program};
|
||||||
|
|
||||||
|
pub struct Analyser;
|
||||||
|
|
||||||
|
impl Analyser {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn analyse(&self, _ast: Program) -> Result<(), CompilerError> {
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
use crate::model::{CompilerError, Program};
|
||||||
|
|
||||||
|
mod c;
|
||||||
|
mod dsc;
|
||||||
|
|
||||||
|
pub fn compiler_frontend(ext: &str, data: &str) -> Result<Program, CompilerError> {
|
||||||
|
match ext {
|
||||||
|
"dsc" => Ok(dsc::generate_ast(&data)?),
|
||||||
|
"c" => Ok(c::generate_ast(&data)?),
|
||||||
|
_ => Err(CompilerError::Generic(format!(
|
||||||
|
"File type {} not supported",
|
||||||
|
ext
|
||||||
|
))),
|
||||||
|
}
|
||||||
|
}
|
||||||
+37
-41
@@ -4,17 +4,12 @@ use std::path::Path;
|
|||||||
|
|
||||||
use common::logging::log;
|
use common::logging::log;
|
||||||
|
|
||||||
use crate::{
|
use crate::specialised::build_specialised;
|
||||||
codegen::CodeGenerator,
|
|
||||||
parser::{ParseResult, Parser},
|
|
||||||
semantic_analyser::Analyser,
|
|
||||||
};
|
|
||||||
|
|
||||||
mod codegen;
|
mod backend;
|
||||||
mod lexer;
|
mod frontend;
|
||||||
mod parser;
|
mod model;
|
||||||
mod registers;
|
mod specialised;
|
||||||
mod semantic_analyser;
|
|
||||||
|
|
||||||
pub fn compile_file(
|
pub fn compile_file(
|
||||||
input_path: &Path,
|
input_path: &Path,
|
||||||
@@ -22,43 +17,44 @@ pub fn compile_file(
|
|||||||
) -> Result<(), Box<dyn std::error::Error>> {
|
) -> Result<(), Box<dyn std::error::Error>> {
|
||||||
let input = std::fs::read_to_string(input_path).expect("Failed to read input file");
|
let input = std::fs::read_to_string(input_path).expect("Failed to read input file");
|
||||||
|
|
||||||
log("Tokenising Input...");
|
let input_ext = input_path
|
||||||
|
.extension()
|
||||||
|
.and_then(|s| s.to_str())
|
||||||
|
.unwrap_or("");
|
||||||
|
|
||||||
let lexer = lexer::Lexer::new(&input);
|
// check if we're using a specialised compiler
|
||||||
let tokens = lexer.collect::<Vec<_>>();
|
if let Some(output) = build_specialised(input_ext, &input) {
|
||||||
// println!("{tokens:?}");
|
let result = match output {
|
||||||
|
Ok(output) => output,
|
||||||
|
Err(err) => return Err(format!("Compilation failed: {err:?}").into()),
|
||||||
|
};
|
||||||
|
|
||||||
log(&format!("Parsing {} Tokens...", tokens.len()));
|
std::fs::write(output_path, &result).expect("Failed to write output");
|
||||||
|
|
||||||
let mut parser = Parser::new(tokens);
|
log(&format!(
|
||||||
let ast = match parser.parse() {
|
"Compilation Successful ✅ \n\tSource: {}\n\tOutput: {}\n",
|
||||||
ParseResult::Accept(ast) => ast,
|
input_path.display(),
|
||||||
ParseResult::Reject(e) => {
|
output_path.display(),
|
||||||
eprintln!("Error: {e:?}");
|
));
|
||||||
return Err("Parsing error".into());
|
|
||||||
}
|
return Ok(());
|
||||||
ParseResult::Deny => {
|
}
|
||||||
panic!("Parser denied parsing")
|
|
||||||
}
|
// Parse the input using the frontend, providing the file extension and data.
|
||||||
|
let ast = match frontend::compiler_frontend(input_ext, &input) {
|
||||||
|
Ok(ast) => ast,
|
||||||
|
Err(err) => return Err(format!("Compilation failed: {err:?}").into()),
|
||||||
};
|
};
|
||||||
// println!("{ast:#?}");
|
|
||||||
|
|
||||||
log("Analyzing AST...");
|
let output_ext = output_path
|
||||||
log("Checking Type Information...");
|
.extension()
|
||||||
|
.and_then(|s| s.to_str())
|
||||||
|
.unwrap_or("");
|
||||||
|
|
||||||
let analyser = Analyser::new();
|
// Generate the output using the backend with the parsed result.
|
||||||
analyser.analyse(ast.clone()).unwrap();
|
let result = match backend::compiler_backend(output_ext, &ast) {
|
||||||
|
Ok(result) => result,
|
||||||
log("Generating Code...");
|
Err(err) => return Err(format!("Compilation failed: {err:?}").into()),
|
||||||
|
|
||||||
// Code Gen
|
|
||||||
let mut generator = CodeGenerator::new(ast);
|
|
||||||
let result = match generator.generate() {
|
|
||||||
Ok(code) => code,
|
|
||||||
Err(e) => {
|
|
||||||
eprintln!("Parsing error: {:?}", e);
|
|
||||||
return Err("Code generation error".into());
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// println!("{result}");
|
// println!("{result}");
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ fn main() {
|
|||||||
// read from input file: syntax "c_compiler <src.c> [output.dsa]"
|
// read from input file: syntax "c_compiler <src.c> [output.dsa]"
|
||||||
let args: Vec<String> = std::env::args().collect();
|
let args: Vec<String> = std::env::args().collect();
|
||||||
if args.len() < 2 {
|
if args.len() < 2 {
|
||||||
eprintln!("Usage: c_compiler <src.c> [output.dsa]");
|
eprintln!("Usage: c_compiler <src.dsc> [output.dsa]");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,213 @@
|
|||||||
|
use core::fmt;
|
||||||
|
|
||||||
|
#[allow(unused)]
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub enum CompilerError {
|
||||||
|
UnexpectedToken(String),
|
||||||
|
UnexpectedEndOfInput,
|
||||||
|
UnexpectedCharacter(char),
|
||||||
|
Undefined(Name),
|
||||||
|
InvalidSyntax(String),
|
||||||
|
Generic(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Clone)]
|
||||||
|
pub struct Name {
|
||||||
|
pub name: String,
|
||||||
|
pub namespace: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct Program {
|
||||||
|
pub declarations: Vec<Declaration>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(unused)]
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub enum Declaration {
|
||||||
|
Function {
|
||||||
|
name: String,
|
||||||
|
return_type: TypeId,
|
||||||
|
params: Vec<Variable>,
|
||||||
|
body: Block,
|
||||||
|
},
|
||||||
|
Variable {
|
||||||
|
var: Variable,
|
||||||
|
init: Option<ConstExpr>,
|
||||||
|
is_const: bool,
|
||||||
|
},
|
||||||
|
Dependency(Dependency),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct Dependency {
|
||||||
|
pub name: String,
|
||||||
|
pub path: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(unused)]
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub enum TypeId {
|
||||||
|
U8,
|
||||||
|
U16,
|
||||||
|
U32,
|
||||||
|
I8,
|
||||||
|
I16,
|
||||||
|
I32,
|
||||||
|
Char,
|
||||||
|
Void,
|
||||||
|
Ptr(Box<TypeId>),
|
||||||
|
Ref(Box<TypeId>),
|
||||||
|
Array(Box<TypeId>, usize),
|
||||||
|
Struct { name: Name, fields: Vec<Variable> },
|
||||||
|
}
|
||||||
|
|
||||||
|
pub type Block = Vec<Statement>;
|
||||||
|
|
||||||
|
#[allow(unused)]
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct Variable {
|
||||||
|
pub name: String,
|
||||||
|
pub type_id: TypeId,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(unused)]
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub enum Statement {
|
||||||
|
Block(Block),
|
||||||
|
Declaration {
|
||||||
|
var: Variable,
|
||||||
|
value: Option<Expression>,
|
||||||
|
},
|
||||||
|
Assign {
|
||||||
|
varname: String,
|
||||||
|
value: Expression,
|
||||||
|
},
|
||||||
|
PtrWrite {
|
||||||
|
ptr: Expression,
|
||||||
|
value: Expression,
|
||||||
|
},
|
||||||
|
Expression {
|
||||||
|
expr: Expression,
|
||||||
|
},
|
||||||
|
If {
|
||||||
|
condition: Expression,
|
||||||
|
then_stmt: Block,
|
||||||
|
else_stmt: Block,
|
||||||
|
},
|
||||||
|
While {
|
||||||
|
condition: Expression,
|
||||||
|
body: Vec<Statement>,
|
||||||
|
},
|
||||||
|
Loop(Block),
|
||||||
|
Break,
|
||||||
|
Continue,
|
||||||
|
Return(Option<Expression>),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub enum ConstExpr {
|
||||||
|
Number(i32),
|
||||||
|
String(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for ConstExpr {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
ConstExpr::Number(n) => write!(f, "{}", n),
|
||||||
|
ConstExpr::String(s) => write!(f, "\"{}\"", s),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(unused)]
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub enum Expression {
|
||||||
|
Empty,
|
||||||
|
Binary {
|
||||||
|
op: BinaryOperator,
|
||||||
|
left: Box<Expression>,
|
||||||
|
right: Box<Expression>,
|
||||||
|
},
|
||||||
|
Unary {
|
||||||
|
op: UnaryOperator,
|
||||||
|
operand: Box<Expression>,
|
||||||
|
},
|
||||||
|
Variable {
|
||||||
|
name: Name,
|
||||||
|
expr_type: Option<TypeId>,
|
||||||
|
},
|
||||||
|
Call {
|
||||||
|
name: Name,
|
||||||
|
args: Vec<Expression>,
|
||||||
|
},
|
||||||
|
Number(isize),
|
||||||
|
StringLiteral(String),
|
||||||
|
CharLiteral(char),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Expression {
|
||||||
|
pub fn is_pure(&self) -> bool {
|
||||||
|
match self {
|
||||||
|
Expression::Number(_) => true,
|
||||||
|
Expression::StringLiteral(_) => true,
|
||||||
|
Expression::CharLiteral(_) => true,
|
||||||
|
Expression::Call { .. } => false,
|
||||||
|
Expression::Binary { left, right, .. } => left.is_pure() && right.is_pure(),
|
||||||
|
Expression::Unary { operand, .. } => operand.is_pure(),
|
||||||
|
Expression::Empty => true,
|
||||||
|
Expression::Variable { .. } => true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(unused)]
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
pub enum BinaryOperator {
|
||||||
|
Add,
|
||||||
|
Sub,
|
||||||
|
Mul,
|
||||||
|
Div,
|
||||||
|
Eq,
|
||||||
|
Ne,
|
||||||
|
Lt,
|
||||||
|
Gt,
|
||||||
|
Le,
|
||||||
|
Ge,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for BinaryOperator {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
BinaryOperator::Add => write!(f, "+"),
|
||||||
|
BinaryOperator::Sub => write!(f, "-"),
|
||||||
|
BinaryOperator::Mul => write!(f, "*"),
|
||||||
|
BinaryOperator::Div => write!(f, "/"),
|
||||||
|
BinaryOperator::Eq => write!(f, "=="),
|
||||||
|
BinaryOperator::Ne => write!(f, "!="),
|
||||||
|
BinaryOperator::Lt => write!(f, "<"),
|
||||||
|
BinaryOperator::Gt => write!(f, ">"),
|
||||||
|
BinaryOperator::Le => write!(f, "<="),
|
||||||
|
BinaryOperator::Ge => write!(f, ">="),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
pub enum UnaryOperator {
|
||||||
|
Plus,
|
||||||
|
Minus,
|
||||||
|
Reference,
|
||||||
|
Dereference,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for UnaryOperator {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
UnaryOperator::Plus => write!(f, "+"),
|
||||||
|
UnaryOperator::Minus => write!(f, "-"),
|
||||||
|
UnaryOperator::Dereference => write!(f, "*"),
|
||||||
|
UnaryOperator::Reference => write!(f, "&"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,13 +0,0 @@
|
|||||||
use crate::parser::{CompilerError, Program};
|
|
||||||
|
|
||||||
pub struct Analyser;
|
|
||||||
|
|
||||||
impl Analyser {
|
|
||||||
pub fn new() -> Self {
|
|
||||||
Self
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn analyse(&self, ast: Program) -> Result<(), CompilerError> {
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -0,0 +1,135 @@
|
|||||||
|
#[must_use]
|
||||||
|
pub fn build(src: &str) -> String {
|
||||||
|
parse(src).join("\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
|
#[expect(clippy::too_many_lines)]
|
||||||
|
pub fn parse(src: &str) -> Vec<String> {
|
||||||
|
let stack = "0x10000";
|
||||||
|
let acc = "acc";
|
||||||
|
let rga = "rga";
|
||||||
|
|
||||||
|
let bpr = "bpr";
|
||||||
|
let spr = "spr";
|
||||||
|
let mut instrs = Vec::<String>::new();
|
||||||
|
|
||||||
|
// Define symbols
|
||||||
|
let print_start = "print";
|
||||||
|
|
||||||
|
let tokens = lex(src);
|
||||||
|
|
||||||
|
let mut idstack = Vec::<u32>::new();
|
||||||
|
|
||||||
|
// set up a stack
|
||||||
|
instrs.push(format!("\tlwi {}, {}", stack, bpr));
|
||||||
|
instrs.push(format!("\tmov {}, {}", bpr, spr));
|
||||||
|
// set up the data pointer
|
||||||
|
instrs.push(format!("{}: \t lwi 0x30000, {}", "main", rga));
|
||||||
|
|
||||||
|
for (id, tok) in tokens.iter().enumerate() {
|
||||||
|
match tok {
|
||||||
|
BfToken::Inc => {
|
||||||
|
instrs.push(format!("\tinc {}", acc));
|
||||||
|
}
|
||||||
|
BfToken::Dec => {
|
||||||
|
instrs.push(format!("\tdec {}", acc));
|
||||||
|
}
|
||||||
|
BfToken::IncPtr => {
|
||||||
|
instrs.push(format!("\tstw {}, {}, 0", acc, rga));
|
||||||
|
instrs.push(format!("\taddi {}, 4, {}", rga, rga));
|
||||||
|
instrs.push(format!("\tlwd {}, {}, 0", rga, acc));
|
||||||
|
}
|
||||||
|
BfToken::DecPtr => {
|
||||||
|
instrs.push(format!("\tstw {}, {}, 0", acc, rga));
|
||||||
|
instrs.push(format!("\tsubi {}, 4, {}", rga, rga));
|
||||||
|
instrs.push(format!("\tlwd {}, {}, 0", rga, acc));
|
||||||
|
}
|
||||||
|
BfToken::Out => {
|
||||||
|
instrs.push(format!("\tpush {}", acc));
|
||||||
|
instrs.push(format!("\tcall {}", print_start));
|
||||||
|
instrs.push(format!("\tpop zero"));
|
||||||
|
}
|
||||||
|
BfToken::In => {
|
||||||
|
instrs.push(format!("\tlwd 0x40000, {}, 0", acc));
|
||||||
|
}
|
||||||
|
BfToken::Forward => {
|
||||||
|
let loop_start = format!("loop_start_{}", id);
|
||||||
|
let loop_end = format!("loop_end_{}", id);
|
||||||
|
idstack.push(id as u32);
|
||||||
|
instrs.push(format!("\tcmp {}, zero", acc));
|
||||||
|
instrs.push(format!("\tjeq {}, zero", loop_end));
|
||||||
|
instrs.push(format!("{}: \tnop", loop_start));
|
||||||
|
}
|
||||||
|
BfToken::Back => {
|
||||||
|
if let Some(start_id) = idstack.pop() {
|
||||||
|
let loop_start = format!("loop_start_{}", start_id);
|
||||||
|
let loop_end = format!("loop_end_{}", start_id);
|
||||||
|
instrs.push(format!("\tcmp {}, zero", acc));
|
||||||
|
instrs.push(format!("\tjne {}, zero", loop_start));
|
||||||
|
instrs.push(format!("{}: \tnop", loop_end));
|
||||||
|
} else {
|
||||||
|
eprintln!("Warning: Unmatched ']' at position {}", id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
instrs.push("\thlt".to_string());
|
||||||
|
|
||||||
|
insert_lib(&mut instrs);
|
||||||
|
|
||||||
|
instrs
|
||||||
|
}
|
||||||
|
|
||||||
|
fn insert_lib(instrs: &mut Vec<String>) {
|
||||||
|
let bpr = "bpr";
|
||||||
|
let spr = "spr";
|
||||||
|
let rg0 = "rg0";
|
||||||
|
let rg1 = "rg1";
|
||||||
|
|
||||||
|
let print_start = "print";
|
||||||
|
let current = "current";
|
||||||
|
instrs.push(format!("\tdw {}, 0x20000", current));
|
||||||
|
instrs.push(format!("{}: \tpush {}", print_start, bpr));
|
||||||
|
instrs.push(format!("\tmov {}, {}", spr, bpr));
|
||||||
|
instrs.push(format!("\tlwd {}, {}, 8", bpr, rg0));
|
||||||
|
instrs.push(format!("\tlwd {}, {}, 0", current, rg1));
|
||||||
|
instrs.push(format!("\tstb {}, {}, 0", rg0, rg1));
|
||||||
|
instrs.push(format!("\taddi {}, 1, {}", rg1, rg1));
|
||||||
|
instrs.push(format!("\tstw {}, {}, 0", rg1, current));
|
||||||
|
instrs.push(format!("\tmov {}, {}", bpr, spr));
|
||||||
|
instrs.push(format!("\tpop {}", bpr));
|
||||||
|
instrs.push("\treturn".to_string());
|
||||||
|
}
|
||||||
|
|
||||||
|
enum BfToken {
|
||||||
|
Inc,
|
||||||
|
Dec,
|
||||||
|
IncPtr,
|
||||||
|
DecPtr,
|
||||||
|
Out,
|
||||||
|
In,
|
||||||
|
Forward,
|
||||||
|
Back,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn lex(src: &str) -> Vec<BfToken> {
|
||||||
|
src.chars()
|
||||||
|
.filter_map(|c| match c {
|
||||||
|
'+' => Some(BfToken::Inc),
|
||||||
|
'-' => Some(BfToken::Dec),
|
||||||
|
'>' => Some(BfToken::IncPtr),
|
||||||
|
'<' => Some(BfToken::DecPtr),
|
||||||
|
'.' => Some(BfToken::Out),
|
||||||
|
',' => Some(BfToken::In),
|
||||||
|
'[' => Some(BfToken::Forward),
|
||||||
|
']' => Some(BfToken::Back),
|
||||||
|
_ => None,
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn _create_symbol(id: u32) -> String {
|
||||||
|
format!("label_{}", id)
|
||||||
|
}
|
||||||
@@ -0,0 +1,13 @@
|
|||||||
|
use crate::model::CompilerError;
|
||||||
|
|
||||||
|
pub mod brainf;
|
||||||
|
|
||||||
|
pub fn build_specialised(ext: &str, data: &str) -> Option<Result<String, CompilerError>> {
|
||||||
|
match ext {
|
||||||
|
"bf" => {
|
||||||
|
let res = brainf::build(data);
|
||||||
|
Some(Ok(res))
|
||||||
|
}
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,944 @@
|
|||||||
|
# DSA Assembly Language Reference
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
This document is the comprehensive reference for writing DSA assembly programs. It covers assembly syntax, pseudo-instructions, directives, the module system, calling conventions, and provides complete examples.
|
||||||
|
|
||||||
|
**Related Documents:**
|
||||||
|
- For hardware instruction details and encoding: See *DSA ISA Specification*
|
||||||
|
- For build system and toolchain: See project documentation
|
||||||
|
|
||||||
|
## Assembly Syntax
|
||||||
|
|
||||||
|
### General Rules
|
||||||
|
|
||||||
|
- **Case Insensitive:** Mnemonics can be uppercase or lowercase (`mov` = `MOV`)
|
||||||
|
- **Comments:** Use `//` for line comments or `/* */` for block comments
|
||||||
|
- **Labels:** Identifier followed by colon (e.g., `main:`, `loop:`)
|
||||||
|
- **Whitespace:** Flexible spacing between operands
|
||||||
|
- **Numbers:**
|
||||||
|
- Decimal: `100`, `255`
|
||||||
|
- Hexadecimal: `0x10`, `0xFFFF`
|
||||||
|
- Binary: `0b1010` (if supported by assembler)
|
||||||
|
|
||||||
|
### Operand Order Convention
|
||||||
|
|
||||||
|
DSA assembly uses **GAS-style syntax** (source → destination):
|
||||||
|
|
||||||
|
```asm
|
||||||
|
mov rg0, rg1 ; Copy rg0 TO rg1 (destination is last)
|
||||||
|
add rg0, rg1, rg2 ; rg2 = rg0 + rg1 (destination is last)
|
||||||
|
```
|
||||||
|
|
||||||
|
For load/store with immediates:
|
||||||
|
```asm
|
||||||
|
lli 0x1234, rg0 ; Load immediate 0x1234 INTO rg0
|
||||||
|
ldw rg0, rg1, 8 ; Load from (rg0+8) INTO rg1
|
||||||
|
stw rg0, rg1, 8 ; Store rg0 TO address (rg1+8)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Registers
|
||||||
|
|
||||||
|
| Register(s) | Type | Description | Usage Notes |
|
||||||
|
|-------------|------|-------------|-------------|
|
||||||
|
| **rg0-rgf** | General | 16 general-purpose registers | Use for variables, temporaries |
|
||||||
|
| **acc** | Special | Accumulator | ⚠️ Volatile - pseudo-instructions may overwrite |
|
||||||
|
| **spr** | Special | Stack pointer | Points to top of stack |
|
||||||
|
| **bpr** | Special | Base pointer | Used for stack frames |
|
||||||
|
| **ret** | Special | Return address | Holds return address for functions |
|
||||||
|
| **zero** | Read-only | Always zero | Reads return 0, writes discarded |
|
||||||
|
| **pcx** | Read-only | Program counter | Cannot be written directly |
|
||||||
|
| **idr** | Privileged | Interrupt descriptor table | Kernel mode only |
|
||||||
|
| **mmr** | Privileged | Memory map register | Kernel mode only |
|
||||||
|
| **noreg** | Placeholder | No register | Used in encoding, triggers fault if accessed |
|
||||||
|
|
||||||
|
**Register Conventions:**
|
||||||
|
- **acc**: Used by pseudo-instructions for temporary values - do not rely on it being preserved
|
||||||
|
- **rgf**: Used by label-addressing pseudo-instructions as a scratch register
|
||||||
|
- **rg0-rge**: Available for general use; calling convention defines which are preserved
|
||||||
|
|
||||||
|
## Hardware Instructions
|
||||||
|
|
||||||
|
This section shows assembly syntax. For encoding details, see the ISA Specification.
|
||||||
|
|
||||||
|
### Data Movement
|
||||||
|
|
||||||
|
```asm
|
||||||
|
mov src_reg, dest_reg ; Copy value from src_reg to dest_reg
|
||||||
|
movs src_reg, dest_reg ; Copy with sign extension
|
||||||
|
```
|
||||||
|
|
||||||
|
**Examples:**
|
||||||
|
```asm
|
||||||
|
mov rg0, rg1 ; rg1 = rg0
|
||||||
|
movs acc, rg2 ; rg2 = sign_extend(acc)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Memory Load Instructions
|
||||||
|
|
||||||
|
```asm
|
||||||
|
ldb base_reg, dest_reg [, offset] ; Load byte (zero-extend)
|
||||||
|
ldbs base_reg, dest_reg [, offset] ; Load byte (sign-extend)
|
||||||
|
ldh base_reg, dest_reg [, offset] ; Load halfword (zero-extend)
|
||||||
|
ldhs base_reg, dest_reg [, offset] ; Load halfword (sign-extend)
|
||||||
|
ldw base_reg, dest_reg [, offset] ; Load word
|
||||||
|
```
|
||||||
|
|
||||||
|
**Offset:** Optional signed 16-bit offset (defaults to 0)
|
||||||
|
|
||||||
|
**Examples:**
|
||||||
|
```asm
|
||||||
|
ldb rg0, rg1 ; Load byte from address in rg0
|
||||||
|
ldw rg0, rg1, 8 ; Load word from (rg0 + 8)
|
||||||
|
ldhs rg2, rg3, -4 ; Load signed halfword from (rg2 - 4)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Alignment Requirements:**
|
||||||
|
- `ldb/ldbs`: No alignment required
|
||||||
|
- `ldh/ldhs`: Must be 2-byte aligned
|
||||||
|
- `ldw`: Must be 4-byte aligned
|
||||||
|
|
||||||
|
### Memory Store Instructions
|
||||||
|
|
||||||
|
```asm
|
||||||
|
stb src_reg, base_reg [, offset] ; Store byte
|
||||||
|
sth src_reg, base_reg [, offset] ; Store halfword
|
||||||
|
stw src_reg, base_reg [, offset] ; Store word
|
||||||
|
```
|
||||||
|
|
||||||
|
**Examples:**
|
||||||
|
```asm
|
||||||
|
stb rg0, rg1 ; Store byte to address in rg1
|
||||||
|
stw rg0, rg1, 12 ; Store word to (rg1 + 12)
|
||||||
|
sth acc, spr, -2 ; Store halfword to (spr - 2)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Alignment Requirements:** Same as loads
|
||||||
|
|
||||||
|
### Immediate Load Instructions
|
||||||
|
|
||||||
|
```asm
|
||||||
|
lli immediate, dest_reg ; Load lower 16 bits (CLEARS upper 16!)
|
||||||
|
lui immediate, dest_reg ; Load upper 16 bits (preserves lower 16)
|
||||||
|
```
|
||||||
|
|
||||||
|
**⚠️ CRITICAL:** `lli` clears the upper 16 bits! Always use `lli` before `lui`.
|
||||||
|
|
||||||
|
**Loading 32-bit Constants:**
|
||||||
|
```asm
|
||||||
|
lli 0x1234, rg0 ; rg0 = 0x00001234
|
||||||
|
lui 0xABCD, rg0 ; rg0 = 0xABCD1234
|
||||||
|
```
|
||||||
|
|
||||||
|
**Loading Addresses:** See `lwi` pseudo-instruction
|
||||||
|
|
||||||
|
### Jump and Branch Instructions
|
||||||
|
|
||||||
|
```asm
|
||||||
|
jmp addr [, offset_reg] ; Unconditional jump
|
||||||
|
jeq addr [, offset_reg] ; Jump if equal
|
||||||
|
jne addr [, offset_reg] ; Jump if not equal
|
||||||
|
jgt addr [, offset_reg] ; Jump if greater than
|
||||||
|
jge addr [, offset_reg] ; Jump if greater or equal
|
||||||
|
jlt addr [, offset_reg] ; Jump if less than
|
||||||
|
jle addr [, offset_reg] ; Jump if less or equal
|
||||||
|
```
|
||||||
|
|
||||||
|
**Jump Modes:**
|
||||||
|
```asm
|
||||||
|
; Absolute jump (using zero register)
|
||||||
|
jmp label, zero ; Jump to label address
|
||||||
|
jmp 0x4000, zero ; Jump to absolute address 0x4000
|
||||||
|
|
||||||
|
; Register-based jump
|
||||||
|
jmp 0, ret ; Jump to address in ret register
|
||||||
|
jmp 4, ret ; Jump to (ret + 4)
|
||||||
|
|
||||||
|
; PC-relative (if assembler supports)
|
||||||
|
jeq loop_start ; Jump to loop_start if equal flag set
|
||||||
|
```
|
||||||
|
|
||||||
|
**Conditional Jumps:** Based on flags set by `cmp` instruction
|
||||||
|
|
||||||
|
### Comparison
|
||||||
|
|
||||||
|
```asm
|
||||||
|
cmp reg1, reg2 ; Compare reg1 with reg2, set flags
|
||||||
|
```
|
||||||
|
|
||||||
|
**Flags Set:**
|
||||||
|
- Equal: `reg1 == reg2`
|
||||||
|
- GreaterThan: `reg1 > reg2`
|
||||||
|
- LessThan: `reg1 < reg2`
|
||||||
|
- GreaterThanOrEqual: `reg1 >= reg2`
|
||||||
|
- LessThanOrEqual: `reg1 <= reg2`
|
||||||
|
|
||||||
|
**Example:**
|
||||||
|
```asm
|
||||||
|
cmp rg0, zero ; Compare rg0 with 0
|
||||||
|
jeq is_zero ; Branch if rg0 == 0
|
||||||
|
jgt is_positive ; Branch if rg0 > 0
|
||||||
|
jlt is_negative ; Branch if rg0 < 0
|
||||||
|
```
|
||||||
|
|
||||||
|
### Arithmetic Instructions
|
||||||
|
|
||||||
|
```asm
|
||||||
|
add src1, src2, dest ; dest = src1 + src2
|
||||||
|
sub src1, src2, dest ; dest = src1 - src2
|
||||||
|
iadd src, immediate, dest ; dest = src + immediate
|
||||||
|
isub src, immediate, dest ; dest = src - immediate
|
||||||
|
inc reg ; reg = reg + 1
|
||||||
|
dec reg ; reg = reg - 1
|
||||||
|
```
|
||||||
|
|
||||||
|
**Examples:**
|
||||||
|
```asm
|
||||||
|
add rg0, rg1, rg2 ; rg2 = rg0 + rg1
|
||||||
|
sub rg0, rg1, rg2 ; rg2 = rg0 - rg1
|
||||||
|
iadd rg0, 10, rg0 ; rg0 = rg0 + 10
|
||||||
|
isub rg1, 5, rg2 ; rg2 = rg1 - 5
|
||||||
|
inc spr ; spr = spr + 1
|
||||||
|
dec spr ; spr = spr - 1
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note:** For `iadd`/`isub`, destination can be the same as source for in-place operations.
|
||||||
|
|
||||||
|
### Bitwise Logical Operations
|
||||||
|
|
||||||
|
```asm
|
||||||
|
and src1, src2, dest ; dest = src1 & src2
|
||||||
|
or src1, src2, dest ; dest = src1 | src2
|
||||||
|
xor src1, src2, dest ; dest = src1 ^ src2
|
||||||
|
not src, dest ; dest = ~src
|
||||||
|
nand src1, src2, dest ; dest = ~(src1 & src2)
|
||||||
|
nor src1, src2, dest ; dest = ~(src1 | src2)
|
||||||
|
xnor src1, src2, dest ; dest = ~(src1 ^ src2)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Examples:**
|
||||||
|
```asm
|
||||||
|
and rg0, rg1, rg2 ; rg2 = rg0 & rg1
|
||||||
|
or rg0, rg1, rg2 ; rg2 = rg0 | rg1
|
||||||
|
not rg0, rg1 ; rg1 = ~rg0
|
||||||
|
xor rg0, rg0, rg0 ; rg0 = 0 (XOR register with itself)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Shift Operations
|
||||||
|
|
||||||
|
```asm
|
||||||
|
shl reg, shift_amount ; Shift left by amount (0-31)
|
||||||
|
shr reg, shift_amount ; Shift right by amount (0-31)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Shift Amount:**
|
||||||
|
- Can be a literal: `shl rg0, 2` (shift by 2)
|
||||||
|
- Can be a register: `shl rg0, rg1` (shift by value in rg1, uses low 5 bits)
|
||||||
|
|
||||||
|
**Examples:**
|
||||||
|
```asm
|
||||||
|
shl rg0, 2 ; rg0 = rg0 << 2
|
||||||
|
shr rg1, 3 ; rg1 = rg1 >> 3
|
||||||
|
shl rg0, rg1 ; rg0 = rg0 << (rg1 & 0x1F)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note:** Shift right is logical (zero-fill), not arithmetic
|
||||||
|
|
||||||
|
### System and Control Instructions
|
||||||
|
|
||||||
|
```asm
|
||||||
|
hlt ; Halt processor
|
||||||
|
nop ; No operation
|
||||||
|
int interrupt_code ; Trigger interrupt (8-bit code)
|
||||||
|
irt ; Return from interrupt
|
||||||
|
```
|
||||||
|
|
||||||
|
**Examples:**
|
||||||
|
```asm
|
||||||
|
hlt ; Stop execution
|
||||||
|
nop ; Do nothing (timing/alignment)
|
||||||
|
int 0x21 ; Trigger interrupt 0x21
|
||||||
|
irt ; Return from interrupt handler
|
||||||
|
```
|
||||||
|
|
||||||
|
## Pseudo-Instructions
|
||||||
|
|
||||||
|
Pseudo-instructions are assembly-level constructs that expand into one or more hardware instructions.
|
||||||
|
|
||||||
|
### Data Definition Directives
|
||||||
|
|
||||||
|
```asm
|
||||||
|
db label: value1 [, value2, ...] ; Define bytes
|
||||||
|
dh label: value1 [, value2, ...] ; Define halfwords (16-bit)
|
||||||
|
dw label: value1 [, value2, ...] ; Define words (32-bit)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Examples:**
|
||||||
|
```asm
|
||||||
|
db message: "Hello, World!", 0 ; String with null terminator
|
||||||
|
db bytes: 0x01, 0x02, 0x03 ; Array of bytes
|
||||||
|
dh numbers: 1000, 2000, 3000 ; Array of halfwords
|
||||||
|
dw stack_base: 0x10000 ; Single word value
|
||||||
|
dw table: 0, 0, 0, 0 ; Array of 4 words
|
||||||
|
```
|
||||||
|
|
||||||
|
**String Encoding:** Strings are encoded as byte sequences with escape sequences:
|
||||||
|
- `\n` = newline (0x0A)
|
||||||
|
- `\t` = tab (0x09)
|
||||||
|
- `\r` = carriage return (0x0D)
|
||||||
|
- `\\` = backslash
|
||||||
|
- `\"` = double quote
|
||||||
|
- `\0` = null (0x00)
|
||||||
|
|
||||||
|
### Memory Reservation Directives
|
||||||
|
|
||||||
|
```asm
|
||||||
|
resb label: size ; Reserve 'size' bytes
|
||||||
|
resh label: size ; Reserve 'size' halfwords
|
||||||
|
resw label: size ; Reserve 'size' words
|
||||||
|
```
|
||||||
|
|
||||||
|
**Examples:**
|
||||||
|
```asm
|
||||||
|
resb buffer: 256 ; Reserve 256 bytes
|
||||||
|
resh array: 100 ; Reserve 100 halfwords (200 bytes)
|
||||||
|
resw heap: 1024 ; Reserve 1024 words (4096 bytes)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note:** Reserved memory is uninitialized (contents undefined).
|
||||||
|
|
||||||
|
### Stack Operations
|
||||||
|
|
||||||
|
```asm
|
||||||
|
push reg ; Push register onto stack
|
||||||
|
pop reg ; Pop stack into register
|
||||||
|
```
|
||||||
|
|
||||||
|
**Expansion:**
|
||||||
|
```asm
|
||||||
|
; push rg0 expands to:
|
||||||
|
iadd spr, 4, spr ; spr = spr + 4 (stack grows up)
|
||||||
|
stw rg0, spr, 0 ; Store rg0 to [spr]
|
||||||
|
|
||||||
|
; pop rg0 expands to:
|
||||||
|
ldw spr, rg0, 0 ; Load [spr] into rg0
|
||||||
|
isub spr, 4, spr ; spr = spr - 4
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note:** DSA stack grows upward (toward higher addresses).
|
||||||
|
|
||||||
|
**Examples:**
|
||||||
|
```asm
|
||||||
|
push rg0 ; Save rg0 on stack
|
||||||
|
push rg1 ; Save rg1 on stack
|
||||||
|
; ... do work ...
|
||||||
|
pop rg1 ; Restore rg1
|
||||||
|
pop rg0 ; Restore rg0
|
||||||
|
```
|
||||||
|
|
||||||
|
### Load Address Pseudo-Instruction
|
||||||
|
|
||||||
|
```asm
|
||||||
|
lwi label, dest_reg ; Load address of label into register
|
||||||
|
```
|
||||||
|
|
||||||
|
**Expansion:**
|
||||||
|
```asm
|
||||||
|
; lwi message, rg0 expands to:
|
||||||
|
lli message, rg0 ; Load lower 16 bits of address
|
||||||
|
lui message, rg0 ; Load upper 16 bits of address
|
||||||
|
```
|
||||||
|
|
||||||
|
**Example:**
|
||||||
|
```asm
|
||||||
|
db message: "Hello!", 0
|
||||||
|
|
||||||
|
lwi message, rg0 ; rg0 = address of message
|
||||||
|
ldb rg0, rg1 ; rg1 = first byte of message ('H')
|
||||||
|
```
|
||||||
|
|
||||||
|
### Memory Access with Labels
|
||||||
|
|
||||||
|
Load and store instructions can use labels directly:
|
||||||
|
|
||||||
|
```asm
|
||||||
|
ldb label, dest_reg [, offset]
|
||||||
|
ldh label, dest_reg [, offset]
|
||||||
|
ldw label, dest_reg [, offset]
|
||||||
|
stb src_reg, label [, offset]
|
||||||
|
sth src_reg, label [, offset]
|
||||||
|
stw src_reg, label [, offset]
|
||||||
|
```
|
||||||
|
|
||||||
|
**Expansion (uses rgf as scratch):**
|
||||||
|
```asm
|
||||||
|
; ldb buffer, rg2 expands to:
|
||||||
|
lli buffer, rgf ; Load lower 16 bits of buffer address
|
||||||
|
lui buffer, rgf ; Load upper 16 bits of buffer address
|
||||||
|
ldb rgf, rg2, 0 ; Load byte from address in rgf
|
||||||
|
|
||||||
|
; stw rg1, current expands to:
|
||||||
|
lli current, rgf ; Load lower 16 bits of current address
|
||||||
|
lui current, rgf ; Load upper 16 bits of current address
|
||||||
|
stw rg1, rgf, 0 ; Store word to address in rgf
|
||||||
|
```
|
||||||
|
|
||||||
|
**⚠️ Important:** These pseudo-instructions use `rgf` as a scratch register! Do not use `rgf` for other purposes when using label-based memory access.
|
||||||
|
|
||||||
|
**Examples:**
|
||||||
|
```asm
|
||||||
|
dw counter: 0
|
||||||
|
|
||||||
|
ldw counter, rg0 ; Load value of counter
|
||||||
|
iadd rg0, 1, rg0 ; Increment
|
||||||
|
stw rg0, counter ; Store back
|
||||||
|
```
|
||||||
|
|
||||||
|
### Function Call Pseudo-Instructions
|
||||||
|
|
||||||
|
```asm
|
||||||
|
call namespace::function ; Call function from included module
|
||||||
|
return ; Return from function
|
||||||
|
```
|
||||||
|
|
||||||
|
**Expansion:**
|
||||||
|
```asm
|
||||||
|
; call print::print expands to:
|
||||||
|
lwi print::print, ret ; Load function address into ret
|
||||||
|
jmp 0, ret ; Jump to function (saves return in pcx)
|
||||||
|
; (The assembler/linker resolves namespace::function to address)
|
||||||
|
|
||||||
|
; return expands to:
|
||||||
|
jmp 0, ret ; Jump to address in ret register
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note:** The actual return address handling may be more complex depending on the calling convention.
|
||||||
|
|
||||||
|
### Module System
|
||||||
|
|
||||||
|
```asm
|
||||||
|
include namespace "path/to/file.dsa"
|
||||||
|
```
|
||||||
|
|
||||||
|
**Example:**
|
||||||
|
```asm
|
||||||
|
include print "lib/print.dsa"
|
||||||
|
include math "lib/math.dsa"
|
||||||
|
|
||||||
|
; Can now call:
|
||||||
|
call print::print
|
||||||
|
call math::multiply
|
||||||
|
```
|
||||||
|
|
||||||
|
**Namespace Resolution:**
|
||||||
|
- Functions in included modules are accessible via `namespace::label`
|
||||||
|
- Namespace is the identifier before the filename
|
||||||
|
- Labels in included files are prefixed with the namespace
|
||||||
|
|
||||||
|
## Calling Convention
|
||||||
|
|
||||||
|
DSA uses a standard calling convention for function calls.
|
||||||
|
|
||||||
|
### Stack Frame Layout
|
||||||
|
|
||||||
|
```
|
||||||
|
Higher Addresses
|
||||||
|
├─────────────┤
|
||||||
|
│ Arg N │ ← spr + (8 + 4*(N-1))
|
||||||
|
│ ... │
|
||||||
|
│ Arg 2 │ ← spr + 16
|
||||||
|
│ Arg 1 │ ← spr + 12
|
||||||
|
│ Arg 0 │ ← spr + 8 (first argument)
|
||||||
|
├─────────────┤
|
||||||
|
│ Ret Addr │ ← spr + 4 (return address)
|
||||||
|
├─────────────┤
|
||||||
|
│ Old BPR │ ← spr + 0 (saved base pointer)
|
||||||
|
├─────────────┤ ← bpr, spr (current frame)
|
||||||
|
│ Locals │ (local variables, if any)
|
||||||
|
Lower Addresses
|
||||||
|
```
|
||||||
|
|
||||||
|
### Calling Sequence
|
||||||
|
|
||||||
|
**Caller Responsibilities:**
|
||||||
|
|
||||||
|
1. **Push arguments in reverse order** (last argument first):
|
||||||
|
```asm
|
||||||
|
push arg2
|
||||||
|
push arg1
|
||||||
|
push arg0
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Call the function:**
|
||||||
|
```asm
|
||||||
|
call namespace::function
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Clean up arguments** after return:
|
||||||
|
```asm
|
||||||
|
pop zero ; Discard or retrieve arg0
|
||||||
|
pop zero ; Discard arg1
|
||||||
|
pop zero ; Discard arg2
|
||||||
|
```
|
||||||
|
|
||||||
|
**Callee Responsibilities:**
|
||||||
|
|
||||||
|
1. **Set up stack frame:**
|
||||||
|
```asm
|
||||||
|
function:
|
||||||
|
push bpr ; Save old base pointer
|
||||||
|
mov spr, bpr ; Establish new base pointer
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Access arguments:**
|
||||||
|
```asm
|
||||||
|
ldw bpr, rg0, 8 ; Load arg0 from spr+8
|
||||||
|
ldw bpr, rg1, 12 ; Load arg1 from spr+12
|
||||||
|
ldw bpr, rg2, 16 ; Load arg2 from spr+16
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Execute function body:**
|
||||||
|
```asm
|
||||||
|
; Function logic here
|
||||||
|
add rg0, rg1, acc ; Example: acc = arg0 + arg1
|
||||||
|
```
|
||||||
|
|
||||||
|
4. **Store return value** (optional, overwrites arg0):
|
||||||
|
```asm
|
||||||
|
stw acc, bpr, 8 ; Store result where arg0 was
|
||||||
|
```
|
||||||
|
|
||||||
|
5. **Restore stack frame:**
|
||||||
|
```asm
|
||||||
|
mov bpr, spr ; Restore stack pointer
|
||||||
|
pop bpr ; Restore old base pointer
|
||||||
|
```
|
||||||
|
|
||||||
|
6. **Return to caller:**
|
||||||
|
```asm
|
||||||
|
return
|
||||||
|
```
|
||||||
|
|
||||||
|
### Complete Example
|
||||||
|
|
||||||
|
```asm
|
||||||
|
; Function: add two numbers
|
||||||
|
; Args: arg0, arg1
|
||||||
|
; Returns: sum in arg0 position
|
||||||
|
|
||||||
|
add_function:
|
||||||
|
push bpr ; Save base pointer
|
||||||
|
mov spr, bpr ; Set up stack frame
|
||||||
|
|
||||||
|
ldw bpr, rg0, 8 ; Load arg0
|
||||||
|
ldw bpr, rg1, 12 ; Load arg1
|
||||||
|
add rg0, rg1, acc ; acc = arg0 + arg1
|
||||||
|
|
||||||
|
stw acc, bpr, 8 ; Store result
|
||||||
|
|
||||||
|
mov bpr, spr ; Restore stack
|
||||||
|
pop bpr ; Restore base pointer
|
||||||
|
return
|
||||||
|
|
||||||
|
; Caller:
|
||||||
|
main:
|
||||||
|
lwi stack_base, bpr
|
||||||
|
mov bpr, spr
|
||||||
|
|
||||||
|
lli 5, rg0
|
||||||
|
lli 7, rg1
|
||||||
|
|
||||||
|
push rg1 ; Push arg1 (7)
|
||||||
|
push rg0 ; Push arg0 (5)
|
||||||
|
call local::add_function
|
||||||
|
pop rg2 ; Get result (12)
|
||||||
|
pop zero ; Discard arg1
|
||||||
|
|
||||||
|
hlt
|
||||||
|
|
||||||
|
dw stack_base: 0x10000
|
||||||
|
```
|
||||||
|
|
||||||
|
### Register Usage Conventions
|
||||||
|
|
||||||
|
| Register(s) | Usage | Preserved? |
|
||||||
|
|-------------|-------|------------|
|
||||||
|
| rg0-rg3 | Function arguments, temporaries | No (caller-saved) |
|
||||||
|
| rg4-rge | Local variables | Yes (callee-saved if used) |
|
||||||
|
| rgf | Scratch (used by label addressing) | No |
|
||||||
|
| acc | Temporary calculations | No |
|
||||||
|
| spr | Stack pointer | Yes (must be restored) |
|
||||||
|
| bpr | Base pointer | Yes (must be restored) |
|
||||||
|
| ret | Return address | Managed by call/return |
|
||||||
|
|
||||||
|
**Notes:**
|
||||||
|
- Functions should save and restore rg4-rge if they use them
|
||||||
|
- rg0-rg3 may be overwritten by called functions
|
||||||
|
- acc and rgf are volatile - assume they're overwritten
|
||||||
|
|
||||||
|
## Complete Examples
|
||||||
|
|
||||||
|
### Example 1: Multiplication Library
|
||||||
|
|
||||||
|
```asm
|
||||||
|
// multiply.dsa
|
||||||
|
// Multiplies two numbers using repeated addition
|
||||||
|
//
|
||||||
|
// Usage:
|
||||||
|
// include multiply "multiply.dsa"
|
||||||
|
// push arg1
|
||||||
|
// push arg0
|
||||||
|
// call multiply::multiply
|
||||||
|
// pop result
|
||||||
|
// pop zero ; discard second argument
|
||||||
|
|
||||||
|
multiply:
|
||||||
|
push bpr
|
||||||
|
mov spr, bpr
|
||||||
|
|
||||||
|
ldw bpr, rg0, 8 ; Load multiplier
|
||||||
|
ldw bpr, rg1, 12 ; Load multiplicand
|
||||||
|
|
||||||
|
lli 0, acc ; Initialize result to 0
|
||||||
|
|
||||||
|
loop_start:
|
||||||
|
add acc, rg0, acc ; acc += multiplier
|
||||||
|
dec rg1 ; multiplicand--
|
||||||
|
|
||||||
|
cmp rg1, zero
|
||||||
|
jgt loop_start ; Continue if multiplicand > 0
|
||||||
|
|
||||||
|
stw acc, bpr, 8 ; Store result for caller
|
||||||
|
|
||||||
|
mov bpr, spr
|
||||||
|
pop bpr
|
||||||
|
return
|
||||||
|
```
|
||||||
|
|
||||||
|
### Example 2: Print Library
|
||||||
|
|
||||||
|
```asm
|
||||||
|
// print.dsa
|
||||||
|
// Prints null-terminated string to display memory
|
||||||
|
//
|
||||||
|
// Usage:
|
||||||
|
// include print "print.dsa"
|
||||||
|
//
|
||||||
|
// push string_address
|
||||||
|
// call print::print
|
||||||
|
// pop zero
|
||||||
|
//
|
||||||
|
// call print::reset ; Reset cursor (no args)
|
||||||
|
|
||||||
|
dw display: 0x20000 ; Display memory base address
|
||||||
|
dw current: 0x20000 ; Current cursor position
|
||||||
|
|
||||||
|
// Print function
|
||||||
|
print:
|
||||||
|
push bpr
|
||||||
|
mov spr, bpr
|
||||||
|
|
||||||
|
ldw bpr, rg0, 8 ; Get string address argument
|
||||||
|
ldw current, rg1 ; Get current cursor position
|
||||||
|
|
||||||
|
print_loop:
|
||||||
|
ldb rg0, acc ; Load character
|
||||||
|
stb acc, rg1 ; Store to display
|
||||||
|
|
||||||
|
iadd rg0, 1, rg0 ; Advance string pointer
|
||||||
|
iadd rg1, 1, rg1 ; Advance cursor
|
||||||
|
|
||||||
|
cmp acc, zero ; Check for null terminator
|
||||||
|
jne print_loop ; Continue if not null
|
||||||
|
|
||||||
|
stw rg1, current ; Save cursor position
|
||||||
|
|
||||||
|
mov bpr, spr
|
||||||
|
pop bpr
|
||||||
|
return
|
||||||
|
|
||||||
|
// Reset cursor function
|
||||||
|
reset:
|
||||||
|
push bpr
|
||||||
|
mov spr, bpr
|
||||||
|
|
||||||
|
ldw display, rg1 ; Load display base
|
||||||
|
stw rg1, current ; Reset cursor to start
|
||||||
|
|
||||||
|
mov bpr, spr
|
||||||
|
pop bpr
|
||||||
|
return
|
||||||
|
```
|
||||||
|
|
||||||
|
### Example 3: Main Program
|
||||||
|
|
||||||
|
```asm
|
||||||
|
// main.dsa
|
||||||
|
// Demonstrates using included libraries
|
||||||
|
|
||||||
|
include print "./print.dsa"
|
||||||
|
|
||||||
|
dw stack: 0x10000
|
||||||
|
db string: "'To confuse your enemy, you must first confuse yourself' - Probably Sun Tzu.", 0
|
||||||
|
|
||||||
|
init:
|
||||||
|
// Set up stack
|
||||||
|
ldw stack, bpr
|
||||||
|
mov bpr, spr
|
||||||
|
|
||||||
|
start:
|
||||||
|
// Load string address
|
||||||
|
lwi string, rg1
|
||||||
|
|
||||||
|
// Call print function
|
||||||
|
push rg1
|
||||||
|
call print::print
|
||||||
|
pop rg1 ; Clean up (rg1 now contains arg we passed)
|
||||||
|
|
||||||
|
hlt
|
||||||
|
```
|
||||||
|
|
||||||
|
### Example 4: Conditional Logic
|
||||||
|
|
||||||
|
```asm
|
||||||
|
// Demonstrates comparisons and branching
|
||||||
|
|
||||||
|
dw value: 42
|
||||||
|
|
||||||
|
main:
|
||||||
|
ldw value, rg0
|
||||||
|
|
||||||
|
cmp rg0, zero
|
||||||
|
jeq is_zero
|
||||||
|
jgt is_positive
|
||||||
|
jlt is_negative
|
||||||
|
|
||||||
|
is_zero:
|
||||||
|
// Handle zero case
|
||||||
|
lwi zero_msg, rg1
|
||||||
|
jmp print_and_exit
|
||||||
|
|
||||||
|
is_positive:
|
||||||
|
// Handle positive case
|
||||||
|
lwi positive_msg, rg1
|
||||||
|
jmp print_and_exit
|
||||||
|
|
||||||
|
is_negative:
|
||||||
|
// Handle negative case
|
||||||
|
lwi negative_msg, rg1
|
||||||
|
jmp print_and_exit
|
||||||
|
|
||||||
|
print_and_exit:
|
||||||
|
push rg1
|
||||||
|
call print::print
|
||||||
|
pop zero
|
||||||
|
hlt
|
||||||
|
|
||||||
|
db zero_msg: "Value is zero", 0
|
||||||
|
db positive_msg: "Value is positive", 0
|
||||||
|
db negative_msg: "Value is negative", 0
|
||||||
|
```
|
||||||
|
|
||||||
|
### Example 5: Loop with Counter
|
||||||
|
|
||||||
|
```asm
|
||||||
|
// Count from 0 to 9
|
||||||
|
|
||||||
|
dw stack: 0x10000
|
||||||
|
|
||||||
|
main:
|
||||||
|
ldw stack, bpr
|
||||||
|
mov bpr, spr
|
||||||
|
|
||||||
|
lli 0, rg0 ; Counter = 0
|
||||||
|
lli 10, rg1 ; Limit = 10
|
||||||
|
|
||||||
|
loop:
|
||||||
|
// Do something with counter in rg0
|
||||||
|
push rg0
|
||||||
|
call process_value
|
||||||
|
pop zero
|
||||||
|
|
||||||
|
inc rg0 ; Counter++
|
||||||
|
cmp rg0, rg1 ; Compare with limit
|
||||||
|
jlt loop ; Loop if counter < limit
|
||||||
|
|
||||||
|
hlt
|
||||||
|
|
||||||
|
process_value:
|
||||||
|
push bpr
|
||||||
|
mov spr, bpr
|
||||||
|
|
||||||
|
ldw bpr, rg0, 8 ; Get value
|
||||||
|
; Process value here...
|
||||||
|
|
||||||
|
mov bpr, spr
|
||||||
|
pop bpr
|
||||||
|
return
|
||||||
|
```
|
||||||
|
|
||||||
|
## Best Practices
|
||||||
|
|
||||||
|
### 1. Stack Management
|
||||||
|
- Always balance push/pop operations
|
||||||
|
- Set up stack frame in every function
|
||||||
|
- Clean up arguments after function calls
|
||||||
|
- Use `pop zero` to discard unwanted values
|
||||||
|
|
||||||
|
### 2. Register Usage
|
||||||
|
- Don't rely on `acc` being preserved
|
||||||
|
- Don't use `rgf` for variables (used by label addressing)
|
||||||
|
- Save callee-saved registers if you modify them
|
||||||
|
- Use `zero` register for zero constants
|
||||||
|
|
||||||
|
### 3. Memory Access
|
||||||
|
- Ensure proper alignment for halfword/word access
|
||||||
|
- Use label-based addressing for clearer code
|
||||||
|
- Check that labels are defined before use
|
||||||
|
|
||||||
|
### 4. Function Design
|
||||||
|
- Document calling convention in comments
|
||||||
|
- Validate input arguments when appropriate
|
||||||
|
- Use consistent parameter order
|
||||||
|
- Return values via stack or designated register
|
||||||
|
|
||||||
|
### 5. Code Organization
|
||||||
|
- Use meaningful label names
|
||||||
|
- Comment complex operations
|
||||||
|
- Group related functions in modules
|
||||||
|
- Use includes for code reuse
|
||||||
|
|
||||||
|
### 6. Performance
|
||||||
|
- Minimize memory accesses (use registers)
|
||||||
|
- Avoid unnecessary comparisons
|
||||||
|
- Use shifts for multiplication/division by powers of 2
|
||||||
|
- Consider instruction pipelining if supported
|
||||||
|
|
||||||
|
## Assembler Directives
|
||||||
|
|
||||||
|
### Alignment (if supported)
|
||||||
|
```asm
|
||||||
|
.align 4 ; Align to 4-byte boundary
|
||||||
|
.align 2 ; Align to 2-byte boundary
|
||||||
|
```
|
||||||
|
|
||||||
|
### Origin (if supported)
|
||||||
|
```asm
|
||||||
|
.org 0x1000 ; Set location counter to 0x1000
|
||||||
|
```
|
||||||
|
|
||||||
|
### Section Control (if supported)
|
||||||
|
```asm
|
||||||
|
.text ; Code section
|
||||||
|
.data ; Data section
|
||||||
|
.bss ; Uninitialized data section
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note:** Assembler directive support depends on the specific DSA assembler implementation.
|
||||||
|
|
||||||
|
## Common Patterns
|
||||||
|
|
||||||
|
### Loading 32-bit Constants
|
||||||
|
```asm
|
||||||
|
lli lower_16_bits, reg
|
||||||
|
lui upper_16_bits, reg
|
||||||
|
```
|
||||||
|
|
||||||
|
### Zero a Register
|
||||||
|
```asm
|
||||||
|
mov zero, reg ; Method 1
|
||||||
|
xor reg, reg, reg ; Method 2
|
||||||
|
lli 0, reg ; Method 3
|
||||||
|
```
|
||||||
|
|
||||||
|
### Copy Memory
|
||||||
|
```asm
|
||||||
|
ldw src_addr, rg0 ; Load from source
|
||||||
|
stw rg0, dest_addr ; Store to destination
|
||||||
|
```
|
||||||
|
|
||||||
|
### Multiply by Power of 2
|
||||||
|
```asm
|
||||||
|
shl reg, 3 ; Multiply by 8 (2^3)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Divide by Power of 2
|
||||||
|
```asm
|
||||||
|
shr reg, 2 ; Divide by 4 (2^2)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Boolean NOT
|
||||||
|
```asm
|
||||||
|
cmp reg, zero
|
||||||
|
jeq was_zero ; If reg == 0, result is 1
|
||||||
|
lli 0, reg
|
||||||
|
jmp done
|
||||||
|
was_zero:
|
||||||
|
lli 1, reg
|
||||||
|
done:
|
||||||
|
```
|
||||||
|
|
||||||
|
### Min/Max
|
||||||
|
```asm
|
||||||
|
; max(rg0, rg1) -> rg2
|
||||||
|
mov rg0, rg2 ; Assume rg0 is max
|
||||||
|
cmp rg0, rg1
|
||||||
|
jge done
|
||||||
|
mov rg1, rg2 ; rg1 was larger
|
||||||
|
done:
|
||||||
|
```
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Common Errors
|
||||||
|
|
||||||
|
**Alignment Fault:**
|
||||||
|
- Check that halfword loads/stores use even addresses
|
||||||
|
- Check that word loads/stores use addresses divisible by 4
|
||||||
|
|
||||||
|
**Illegal Instruction:**
|
||||||
|
- Verify opcode is valid
|
||||||
|
- Check that shift amount is 0 for non-shift instructions
|
||||||
|
- Ensure you're not using `noreg` as a source/destination
|
||||||
|
|
||||||
|
**Stack Corruption:**
|
||||||
|
- Verify push/pop balance
|
||||||
|
- Check that functions restore `bpr` before returning
|
||||||
|
- Ensure caller cleans up arguments
|
||||||
|
|
||||||
|
**Wrong Results:**
|
||||||
|
- Verify `lli` is called before `lui` when loading constants
|
||||||
|
- Check that you're not relying on `acc` or `rgf` being preserved
|
||||||
|
- Verify signed vs. unsigned loads (ldb vs. ldbs)
|
||||||
|
|
||||||
|
### Debugging Tips
|
||||||
|
|
||||||
|
1. Add `nop` instructions as breakpoint markers
|
||||||
|
2. Print register values using display memory
|
||||||
|
3. Use single-step execution to trace program flow
|
||||||
|
4. Verify stack pointer values at function boundaries
|
||||||
|
5. Check label addresses in disassembly
|
||||||
|
|
||||||
|
## Appendix: Instruction Quick Reference
|
||||||
|
|
||||||
|
| Category | Instructions |
|
||||||
|
|----------|-------------|
|
||||||
|
| **Data Movement** | mov, movs |
|
||||||
|
| **Memory Load** | ldb, ldbs, ldh, ldhs, ldw |
|
||||||
|
| **Memory Store** | stb, sth, stw |
|
||||||
|
| **Immediate Load** | lli, lui |
|
||||||
|
| **Jump/Branch** | jmp, jeq, jne, jgt, jge, jlt, jle |
|
||||||
|
| **Comparison** | cmp |
|
||||||
|
| **Arithmetic** | add, sub, iadd, isub, inc, dec |
|
||||||
|
| **Logical** | and, or, xor, not, nand, nor, xnor |
|
||||||
|
| **Shift** | shl, shr |
|
||||||
|
| **System** | hlt, nop, int, irt |
|
||||||
|
| **Pseudo** | db, dh, dw, resb, resh, resw, push, pop, lwi, call, return, include |
|
||||||
|
|
||||||
|
## Version History
|
||||||
|
|
||||||
|
- **v1.0** - Initial comprehensive reference
|
||||||
|
- Combined hardware instructions and pseudo-instructions
|
||||||
|
- Added complete calling convention
|
||||||
|
- Included practical examples
|
||||||
|
- Documented common patterns and best practices
|
||||||
@@ -0,0 +1,401 @@
|
|||||||
|
# DSA Instruction Set Architecture Specification
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
The Damn Simple Architecture (DSA) is a 32-bit RISC-style architecture designed for simplicity and educational purposes. This document provides the complete instruction set architecture specification, including all hardware instructions, registers, and encoding formats.
|
||||||
|
|
||||||
|
## Data Types and Sizes
|
||||||
|
|
||||||
|
| Type | Size | Alignment |
|
||||||
|
|------|------|-----------|
|
||||||
|
| Byte | 8 bits | 1-byte aligned |
|
||||||
|
| Halfword | 16 bits | 2-byte aligned |
|
||||||
|
| Word | 32 bits | 4-byte aligned |
|
||||||
|
|
||||||
|
All multi-byte values use little-endian byte order.
|
||||||
|
|
||||||
|
## Registers
|
||||||
|
|
||||||
|
DSA provides 32 programmer-accessible registers plus several internal system registers.
|
||||||
|
|
||||||
|
### Programmer-Accessible Registers
|
||||||
|
|
||||||
|
| Hex | Register | Type | Description |
|
||||||
|
|-----|----------|------|-------------|
|
||||||
|
| 0x00-0x0F | **rg0-rgf** | General Purpose | 16 general-purpose registers for variables and temporary values |
|
||||||
|
| 0x10 | **acc** | Special | Accumulator for calculations and temporary storage<br/>⚠️ May be overwritten by pseudo-instructions |
|
||||||
|
| 0x11 | **spr** | Special | Stack pointer - points to top of stack |
|
||||||
|
| 0x12 | **bpr** | Special | Base pointer - used for stack frame management |
|
||||||
|
| 0x13 | **ret** | Special | Return address register - stores function return addresses |
|
||||||
|
| 0x14 | **idr** | Privileged | Interrupt descriptor table address<br/>Read/write triggers protection fault in user mode |
|
||||||
|
| 0x15 | **mmr** | Privileged | Hardware memory map table address<br/>Read/write triggers protection fault in user mode |
|
||||||
|
| 0x16 | **zero** | Read-only | Constant zero value<br/>Reads always return 0, writes are discarded |
|
||||||
|
| 0x17 | **noreg** | Placeholder | Indicates unused register field<br/>Read/write triggers illegal instruction fault |
|
||||||
|
| 0x18-0x1F | - | Reserved | Reserved for future use |
|
||||||
|
|
||||||
|
**Note on PCX (Program Counter):**
|
||||||
|
- PCX is a read-only system register that can be accessed in some contexts
|
||||||
|
- Writing to PCX triggers a protection fault
|
||||||
|
- PCX is automatically updated by jump and branch instructions
|
||||||
|
|
||||||
|
### System Registers (Internal)
|
||||||
|
|
||||||
|
These registers are used internally by the CPU and are not directly accessible via assembly instructions:
|
||||||
|
|
||||||
|
| Register | Description |
|
||||||
|
|----------|-------------|
|
||||||
|
| **MAR** | Memory Address Register - holds address for memory operations |
|
||||||
|
| **MDR** | Memory Data Register - holds data for memory transfers |
|
||||||
|
| **CIR** | Current Instruction Register - holds instruction being executed |
|
||||||
|
| **STS** | Status Register - stores comparison and arithmetic flags |
|
||||||
|
| **PCX** | Program Counter - stores address of next instruction |
|
||||||
|
|
||||||
|
### Status Register (STS) Layout
|
||||||
|
|
||||||
|
The status register is a 32-bit register with the following flag bits:
|
||||||
|
|
||||||
|
| Bit | Name | Description | Boot Value |
|
||||||
|
|-----|------|-------------|------------|
|
||||||
|
| 0 | **Equal** | Set if last comparison result was equal | 0 |
|
||||||
|
| 1 | **GreaterThan** | Set if last comparison result was greater than | 0 |
|
||||||
|
| 2 | **GreaterThanOrEqual** | Set if last comparison was greater than or equal | 0 |
|
||||||
|
| 3 | **LessThan** | Set if last comparison result was less than | 0 |
|
||||||
|
| 4 | **LessThanOrEqual** | Set if last comparison was less than or equal | 0 |
|
||||||
|
| 5 | **Zero** | Set if last arithmetic/logic operation result was zero | 0 |
|
||||||
|
| 6-31 | - | Reserved | 0 |
|
||||||
|
|
||||||
|
## Instruction Encoding Formats
|
||||||
|
|
||||||
|
DSA uses three instruction encoding formats:
|
||||||
|
|
||||||
|
### R-Type (Register) Instructions
|
||||||
|
|
||||||
|
Used for operations with register operands only, including shifts.
|
||||||
|
|
||||||
|
```
|
||||||
|
31-26 | 25-21 | 20-16 | 15-11 | 10-6 | 5-0
|
||||||
|
--------+---------+---------+---------+--------+-------
|
||||||
|
Opcode | SrcReg1 | SrcReg2 | DestReg | ShiftAmt | Unused
|
||||||
|
```
|
||||||
|
|
||||||
|
- **Opcode** (6 bits): Instruction operation code
|
||||||
|
- **SrcReg1** (5 bits): First source register
|
||||||
|
- **SrcReg2** (5 bits): Second source register
|
||||||
|
- **DestReg** (5 bits): Destination register
|
||||||
|
- **ShiftAmt** (5 bits): Shift amount (for shift instructions only, must be 0 otherwise)
|
||||||
|
- **Unused** (6 bits): Must be 0
|
||||||
|
|
||||||
|
**Important Rules:**
|
||||||
|
- ShiftAmt must be 0 for non-shift instructions (else illegal instruction fault)
|
||||||
|
- Unused register fields must be set to `noreg` (0x17) if not used
|
||||||
|
- Using registers in unexpected positions may cause illegal instruction fault
|
||||||
|
|
||||||
|
### I-Type (Immediate) Instructions
|
||||||
|
|
||||||
|
Used for operations with a 16-bit immediate value.
|
||||||
|
|
||||||
|
```
|
||||||
|
31-26 | 25-21 | 20-16 | 15-0
|
||||||
|
--------+---------+---------+-------------
|
||||||
|
Opcode | SrcReg | DestReg | 16-bit Immediate
|
||||||
|
```
|
||||||
|
|
||||||
|
- **Opcode** (6 bits): Instruction operation code
|
||||||
|
- **SrcReg** (5 bits): Source register (base for memory ops)
|
||||||
|
- **DestReg** (5 bits): Destination register (or offset register for jumps)
|
||||||
|
- **Immediate** (16 bits): Signed 16-bit immediate value or offset
|
||||||
|
|
||||||
|
**Usage:**
|
||||||
|
- Arithmetic: Immediate is a signed value
|
||||||
|
- Memory access: Immediate is a signed byte offset from base address
|
||||||
|
- Branches: Immediate is a signed offset from current PCX
|
||||||
|
- Literal loads: Immediate is unsigned 16-bit value
|
||||||
|
|
||||||
|
### J-Type (Jump) Instructions
|
||||||
|
|
||||||
|
Used for absolute jumps with large address ranges.
|
||||||
|
|
||||||
|
```
|
||||||
|
31-26 | 25-0
|
||||||
|
--------+----------------------
|
||||||
|
Opcode | 26-bit Address
|
||||||
|
```
|
||||||
|
|
||||||
|
- **Opcode** (6 bits): Jump instruction code
|
||||||
|
- **Address** (26 bits): Partial address for jump
|
||||||
|
|
||||||
|
**Address Calculation:**
|
||||||
|
1. Left-shift the 26-bit address by 2 (word alignment)
|
||||||
|
2. OR with upper 4 bits of current PCX
|
||||||
|
3. Result is final 32-bit jump address
|
||||||
|
|
||||||
|
**Jump Range:** 256MB region around current PC (±128MB)
|
||||||
|
|
||||||
|
**Note:** J-type instructions are defined but currently unused. Use I-type JMP with register addressing for long jumps.
|
||||||
|
|
||||||
|
## Hardware Instructions
|
||||||
|
|
||||||
|
### Data Movement
|
||||||
|
|
||||||
|
| Hex | Mnemonic | Type | Operands | Description |
|
||||||
|
|-----|----------|------|----------|-------------|
|
||||||
|
| 0x00 | **NOP** | R | - | No operation - does nothing |
|
||||||
|
| 0x01 | **MOV** | R | SrcReg, DestReg | Copy value from SrcReg to DestReg |
|
||||||
|
| 0x02 | **MOVS** | R | SrcReg, DestReg | Copy with sign extension to fill 32 bits |
|
||||||
|
|
||||||
|
**MOV/MOVS Details:**
|
||||||
|
- MOV performs direct copy (all 32 bits)
|
||||||
|
- MOVS sign-extends the value (useful after byte/halfword loads)
|
||||||
|
- Both instructions set the Zero flag if result is zero
|
||||||
|
|
||||||
|
### Memory Access - Load Instructions
|
||||||
|
|
||||||
|
All loads require proper alignment or trigger an alignment fault.
|
||||||
|
|
||||||
|
| Hex | Mnemonic | Type | Operands | Description |
|
||||||
|
|-----|----------|------|----------|-------------|
|
||||||
|
| 0x03 | **LDB** | I | BaseReg, DestReg, Offset | Load byte (8-bit), zero-extend to 32 bits |
|
||||||
|
| 0x04 | **LDBS** | I | BaseReg, DestReg, Offset | Load byte (8-bit), sign-extend to 32 bits |
|
||||||
|
| 0x05 | **LDH** | I | BaseReg, DestReg, Offset | Load halfword (16-bit), zero-extend to 32 bits |
|
||||||
|
| 0x06 | **LDHS** | I | BaseReg, DestReg, Offset | Load halfword (16-bit), sign-extend to 32 bits |
|
||||||
|
| 0x07 | **LDW** | I | BaseReg, DestReg, Offset | Load word (32-bit) |
|
||||||
|
|
||||||
|
**Load Operation:**
|
||||||
|
- Effective address = BaseReg + SignExtend(Offset)
|
||||||
|
- Offset is a signed 16-bit value
|
||||||
|
- Alignment requirements:
|
||||||
|
- LDB/LDBS: No alignment required (byte-aligned)
|
||||||
|
- LDH/LDHS: Must be 2-byte aligned
|
||||||
|
- LDW: Must be 4-byte aligned
|
||||||
|
|
||||||
|
**Encoding Note:**
|
||||||
|
In machine code, the order is: BaseReg (SrcReg field), DestReg field, Offset (Immediate field)
|
||||||
|
|
||||||
|
### Memory Access - Store Instructions
|
||||||
|
|
||||||
|
All stores require proper alignment or trigger an alignment fault.
|
||||||
|
|
||||||
|
| Hex | Mnemonic | Type | Operands | Description |
|
||||||
|
|-----|----------|------|----------|-------------|
|
||||||
|
| 0x08 | **STB** | I | SrcReg, BaseReg, Offset | Store byte (8-bit) to memory |
|
||||||
|
| 0x09 | **STH** | I | SrcReg, BaseReg, Offset | Store halfword (16-bit) to memory |
|
||||||
|
| 0x0A | **STW** | I | SrcReg, BaseReg, Offset | Store word (32-bit) to memory |
|
||||||
|
|
||||||
|
**Store Operation:**
|
||||||
|
- Effective address = BaseReg + SignExtend(Offset)
|
||||||
|
- Offset is a signed 16-bit value
|
||||||
|
- Only the relevant bits are stored (8, 16, or 32)
|
||||||
|
- Alignment requirements:
|
||||||
|
- STB: No alignment required (byte-aligned)
|
||||||
|
- STH: Must be 2-byte aligned
|
||||||
|
- STW: Must be 4-byte aligned
|
||||||
|
|
||||||
|
**Encoding Note:**
|
||||||
|
In machine code: SrcReg (SrcReg field), BaseReg (DestReg field), Offset (Immediate field)
|
||||||
|
|
||||||
|
### Immediate Load Instructions
|
||||||
|
|
||||||
|
| Hex | Mnemonic | Type | Operands | Description |
|
||||||
|
|-----|----------|------|----------|-------------|
|
||||||
|
| 0x0B | **LLI** | I | DestReg, Value | Load 16-bit value into lower 16 bits<br/>⚠️ **CLEARS upper 16 bits!** |
|
||||||
|
| 0x0C | **LUI** | I | DestReg, Value | Load 16-bit value into upper 16 bits<br/>Lower 16 bits unchanged |
|
||||||
|
|
||||||
|
**Usage for 32-bit Values:**
|
||||||
|
```
|
||||||
|
LLI 0x1234, rg0 ; rg0 = 0x00001234
|
||||||
|
LUI 0xABCD, rg0 ; rg0 = 0xABCD1234
|
||||||
|
```
|
||||||
|
|
||||||
|
**⚠️ CRITICAL:** Always execute LLI before LUI, as LLI clears the upper 16 bits!
|
||||||
|
|
||||||
|
**Encoding Note:**
|
||||||
|
In machine code: Value (Immediate field), DestReg field (SrcReg unused, set to noreg)
|
||||||
|
|
||||||
|
### Jump and Branch Instructions
|
||||||
|
|
||||||
|
| Hex | Mnemonic | Type | Operands | Description |
|
||||||
|
|-----|----------|------|----------|-------------|
|
||||||
|
| 0x0D | **JMP** | I | DestReg, Offset | Unconditional jump to (DestReg + Offset) |
|
||||||
|
| 0x0E | **JEQ** | I | DestReg, Offset | Jump if Equal flag set |
|
||||||
|
| 0x0F | **JNE** | I | DestReg, Offset | Jump if Equal flag NOT set |
|
||||||
|
| 0x10 | **JGT** | I | DestReg, Offset | Jump if GreaterThan flag set |
|
||||||
|
| 0x11 | **JGE** | I | DestReg, Offset | Jump if GreaterThan OR Equal flag set |
|
||||||
|
| 0x12 | **JLT** | I | DestReg, Offset | Jump if LessThan flag set |
|
||||||
|
| 0x13 | **JLE** | I | DestReg, Offset | Jump if LessThan OR Equal flag set |
|
||||||
|
|
||||||
|
**Jump Calculation:**
|
||||||
|
- Target address = DestReg + SignExtend(Offset)
|
||||||
|
- If DestReg = zero, this becomes absolute addressing with Offset
|
||||||
|
- If DestReg = pcx, this becomes PC-relative addressing
|
||||||
|
- Conditional jumps check flags in STS register
|
||||||
|
|
||||||
|
**Encoding Note:**
|
||||||
|
In machine code: DestReg field, Offset (Immediate field) (SrcReg unused, set to noreg)
|
||||||
|
|
||||||
|
### Comparison
|
||||||
|
|
||||||
|
| Hex | Mnemonic | Type | Operands | Description |
|
||||||
|
|-----|----------|------|----------|-------------|
|
||||||
|
| 0x14 | **CMP** | R | Reg1, Reg2 | Compare Reg1 with Reg2, set flags in STS |
|
||||||
|
|
||||||
|
**Flag Setting:**
|
||||||
|
- Equal: Set if Reg1 == Reg2
|
||||||
|
- GreaterThan: Set if Reg1 > Reg2 (signed)
|
||||||
|
- GreaterThanOrEqual: Set if Reg1 >= Reg2 (signed)
|
||||||
|
- LessThan: Set if Reg1 < Reg2 (signed)
|
||||||
|
- LessThanOrEqual: Set if Reg1 <= Reg2 (signed)
|
||||||
|
- Zero: Set if (Reg1 - Reg2) == 0 (same as Equal)
|
||||||
|
|
||||||
|
**Encoding Note:**
|
||||||
|
DestReg and ShiftAmt fields unused (set to noreg and 0)
|
||||||
|
|
||||||
|
### Arithmetic Instructions
|
||||||
|
|
||||||
|
| Hex | Mnemonic | Type | Operands | Description |
|
||||||
|
|-----|----------|------|----------|-------------|
|
||||||
|
| 0x15 | **INC** | R | Reg | Increment register by 1 |
|
||||||
|
| 0x16 | **DEC** | R | Reg | Decrement register by 1 |
|
||||||
|
| 0x19 | **ADD** | R | Src1, Src2, Dest | Dest = Src1 + Src2 |
|
||||||
|
| 0x1A | **SUB** | R | Src1, Src2, Dest | Dest = Src1 - Src2 |
|
||||||
|
| 0x25 | **IADD** | I | Src, Literal, Dest | Dest = Src + SignExtend(Literal) |
|
||||||
|
| 0x26 | **ISUB** | I | Src, Literal, Dest | Dest = Src - SignExtend(Literal) |
|
||||||
|
|
||||||
|
**Flag Effects:**
|
||||||
|
- Zero flag set if result is zero
|
||||||
|
- Other flags undefined after arithmetic (use CMP for comparisons)
|
||||||
|
|
||||||
|
**Encoding Notes:**
|
||||||
|
- INC/DEC: Reg in SrcReg1 field, also copied to DestReg field
|
||||||
|
- IADD/ISUB: Immediate is signed 16-bit value
|
||||||
|
|
||||||
|
### Bitwise Logical Operations
|
||||||
|
|
||||||
|
| Hex | Mnemonic | Type | Operands | Description |
|
||||||
|
|-----|----------|------|----------|-------------|
|
||||||
|
| 0x1B | **AND** | R | Src1, Src2, Dest | Dest = Src1 & Src2 (bitwise AND) |
|
||||||
|
| 0x1C | **OR** | R | Src1, Src2, Dest | Dest = Src1 \| Src2 (bitwise OR) |
|
||||||
|
| 0x1D | **NOT** | R | Src, Dest | Dest = ~Src (bitwise NOT) |
|
||||||
|
| 0x1E | **XOR** | R | Src1, Src2, Dest | Dest = Src1 ^ Src2 (bitwise XOR) |
|
||||||
|
| 0x1F | **NAND** | R | Src1, Src2, Dest | Dest = ~(Src1 & Src2) (bitwise NAND) |
|
||||||
|
| 0x20 | **NOR** | R | Src1, Src2, Dest | Dest = ~(Src1 \| Src2) (bitwise NOR) |
|
||||||
|
| 0x21 | **XNOR** | R | Src1, Src2, Dest | Dest = ~(Src1 ^ Src2) (bitwise XNOR) |
|
||||||
|
|
||||||
|
**Flag Effects:**
|
||||||
|
- Zero flag set if result is zero
|
||||||
|
- Other flags undefined
|
||||||
|
|
||||||
|
**Encoding Note:**
|
||||||
|
NOT uses only Src and Dest; SrcReg2 unused (set to noreg)
|
||||||
|
|
||||||
|
### Shift Operations
|
||||||
|
|
||||||
|
| Hex | Mnemonic | Type | Operands | Description |
|
||||||
|
|-----|----------|------|----------|-------------|
|
||||||
|
| 0x17 | **SHL** | R | Reg, ShiftAmount | Shift Reg left by ShiftAmount bits<br/>Zero-fill from right |
|
||||||
|
| 0x18 | **SHR** | R | Reg, ShiftAmount | Shift Reg right by ShiftAmount bits<br/>Zero-fill from left (logical shift) |
|
||||||
|
|
||||||
|
**Shift Amount:**
|
||||||
|
- Can be a 5-bit literal (0-31) in ShiftAmt field
|
||||||
|
- Can be a register value (low 5 bits used)
|
||||||
|
- If using register: Place in SrcReg2, set ShiftAmt to 0
|
||||||
|
- If using literal: Place in ShiftAmt field, set SrcReg2 to noreg
|
||||||
|
|
||||||
|
**Flag Effects:**
|
||||||
|
- Zero flag set if result is zero
|
||||||
|
|
||||||
|
**Encoding Notes:**
|
||||||
|
- Reg in both SrcReg1 and DestReg fields
|
||||||
|
- For literal shifts: ShiftAmt field contains shift count
|
||||||
|
- For register shifts: SrcReg2 contains register, ShiftAmt must be 0
|
||||||
|
|
||||||
|
### System and Control Instructions
|
||||||
|
|
||||||
|
| Hex | Mnemonic | Type | Operands | Description |
|
||||||
|
|-----|----------|------|----------|-------------|
|
||||||
|
| 0x22 | **INT** | I | InterruptCode | Trigger interrupt with 8-bit code<br/>Saves return address to ret register<br/>Sets bpr to kernel stack |
|
||||||
|
| 0x23 | **IRT** | R | - | Return from interrupt<br/>Restores execution context |
|
||||||
|
| 0x24 | **HLT** | R | - | Halt processor execution<br/>Stops fetch-decode-execute cycle |
|
||||||
|
|
||||||
|
**INT Behavior:**
|
||||||
|
1. Save current PCX to ret register
|
||||||
|
2. Switch bpr to kernel stack address
|
||||||
|
3. Look up interrupt handler address in interrupt descriptor table (idr)
|
||||||
|
4. Jump to handler at interrupt vector
|
||||||
|
|
||||||
|
**IRT Behavior:**
|
||||||
|
1. Restore previous execution context
|
||||||
|
2. Return to address in ret register
|
||||||
|
3. Restore user stack pointer
|
||||||
|
|
||||||
|
**Encoding Notes:**
|
||||||
|
- INT: InterruptCode in low 8 bits of Immediate field
|
||||||
|
- IRT/HLT: All register fields set to noreg, ShiftAmt to 0
|
||||||
|
|
||||||
|
## Instruction Summary Table
|
||||||
|
|
||||||
|
| Opcode | Mnemonic | Type | Category |
|
||||||
|
|--------|----------|------|----------|
|
||||||
|
| 0x00 | NOP | R | Control |
|
||||||
|
| 0x01 | MOV | R | Data Movement |
|
||||||
|
| 0x02 | MOVS | R | Data Movement |
|
||||||
|
| 0x03 | LDB | I | Memory Load |
|
||||||
|
| 0x04 | LDBS | I | Memory Load |
|
||||||
|
| 0x05 | LDH | I | Memory Load |
|
||||||
|
| 0x06 | LDHS | I | Memory Load |
|
||||||
|
| 0x07 | LDW | I | Memory Load |
|
||||||
|
| 0x08 | STB | I | Memory Store |
|
||||||
|
| 0x09 | STH | I | Memory Store |
|
||||||
|
| 0x0A | STW | I | Memory Store |
|
||||||
|
| 0x0B | LLI | I | Immediate Load |
|
||||||
|
| 0x0C | LUI | I | Immediate Load |
|
||||||
|
| 0x0D | JMP | I | Jump |
|
||||||
|
| 0x0E | JEQ | I | Branch |
|
||||||
|
| 0x0F | JNE | I | Branch |
|
||||||
|
| 0x10 | JGT | I | Branch |
|
||||||
|
| 0x11 | JGE | I | Branch |
|
||||||
|
| 0x12 | JLT | I | Branch |
|
||||||
|
| 0x13 | JLE | I | Branch |
|
||||||
|
| 0x14 | CMP | R | Comparison |
|
||||||
|
| 0x15 | INC | R | Arithmetic |
|
||||||
|
| 0x16 | DEC | R | Arithmetic |
|
||||||
|
| 0x17 | SHL | R | Shift |
|
||||||
|
| 0x18 | SHR | R | Shift |
|
||||||
|
| 0x19 | ADD | R | Arithmetic |
|
||||||
|
| 0x1A | SUB | R | Arithmetic |
|
||||||
|
| 0x1B | AND | R | Logical |
|
||||||
|
| 0x1C | OR | R | Logical |
|
||||||
|
| 0x1D | NOT | R | Logical |
|
||||||
|
| 0x1E | XOR | R | Logical |
|
||||||
|
| 0x1F | NAND | R | Logical |
|
||||||
|
| 0x20 | NOR | R | Logical |
|
||||||
|
| 0x21 | XNOR | R | Logical |
|
||||||
|
| 0x22 | INT | I | System |
|
||||||
|
| 0x23 | IRT | R | System |
|
||||||
|
| 0x24 | HLT | R | System |
|
||||||
|
| 0x25 | IADD | I | Arithmetic |
|
||||||
|
| 0x26 | ISUB | I | Arithmetic |
|
||||||
|
|
||||||
|
## Exception Conditions
|
||||||
|
|
||||||
|
The following conditions trigger exceptions:
|
||||||
|
|
||||||
|
| Exception | Trigger Condition |
|
||||||
|
|-----------|------------------|
|
||||||
|
| **Illegal Instruction** | - Invalid opcode<br/>- noreg used as source/destination<br/>- ShiftAmt non-zero for non-shift instruction<br/>- Register field violations |
|
||||||
|
| **Protection Fault** | - Write to pcx register<br/>- Read/write idr or mmr in user mode<br/>- Read from noreg<br/>- Write to zero register (discarded, no fault) |
|
||||||
|
| **Alignment Fault** | - LDH/LDHS/STH with odd address<br/>- LDW/STW with address not divisible by 4 |
|
||||||
|
| **Memory Access Violation** | - Access to unmapped or protected memory<br/>- Stack overflow/underflow |
|
||||||
|
|
||||||
|
## Calling Convention
|
||||||
|
|
||||||
|
See the DSA Assembly Language Reference for the complete calling convention and ABI specification.
|
||||||
|
|
||||||
|
## Notes on Design
|
||||||
|
|
||||||
|
1. **Word Size:** All addresses and general computation is 32-bit
|
||||||
|
2. **Endianness:** Little-endian byte order
|
||||||
|
3. **Stack Growth:** Stack grows upward (incrementing addresses)
|
||||||
|
4. **Alignment:** Natural alignment required for halfword and word accesses
|
||||||
|
5. **Sign Extension:** All immediate values are sign-extended unless noted
|
||||||
|
6. **Zero Register:** Provides constant zero, writes are legal but discarded
|
||||||
|
7. **Reserved Encodings:** Opcodes 0x27-0x3F reserved for future use
|
||||||
@@ -0,0 +1,638 @@
|
|||||||
|
# DSA Implementation vs Documentation Discrepancies
|
||||||
|
|
||||||
|
## Critical Discrepancies
|
||||||
|
|
||||||
|
### 1. **Stack Growth Direction** ❌ CRITICAL
|
||||||
|
|
||||||
|
**Documentation states:** Stack grows upward (toward higher addresses)
|
||||||
|
|
||||||
|
**Implementation shows (expand.rs:44-51):**
|
||||||
|
```rust
|
||||||
|
fn expand_push(current: &Node, nodes: &mut Vec<Node>) -> Result<(), AssembleError> {
|
||||||
|
// ...
|
||||||
|
nodes.extend(vec![
|
||||||
|
node!(label, Opcode::SubI, spr, 4, spr), // spr = spr - 4
|
||||||
|
node!(None, Opcode::Stw, reg, spr, 0),
|
||||||
|
]);
|
||||||
|
```
|
||||||
|
|
||||||
|
**Implementation shows (expand.rs:130-137):**
|
||||||
|
```rust
|
||||||
|
fn expand_pop(current: &Node, nodes: &mut Vec<Node>) -> Result<(), AssembleError> {
|
||||||
|
// ...
|
||||||
|
nodes.extend(vec![
|
||||||
|
node!(label, Opcode::Ldw, spr, reg, 0),
|
||||||
|
node!(None, Opcode::AddI, spr, 4, spr), // spr = spr + 4
|
||||||
|
]);
|
||||||
|
```
|
||||||
|
|
||||||
|
**Reality:** Stack grows **DOWNWARD** (toward lower addresses)
|
||||||
|
- PUSH: Decrements SPR by 4, then stores
|
||||||
|
- POP: Loads, then increments SPR by 4
|
||||||
|
|
||||||
|
**Impact:** All documentation examples and calling convention diagrams are backwards!
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 2. **CALL Pseudo-instruction Expansion** ❌ CRITICAL
|
||||||
|
|
||||||
|
**Documentation states (DSA_Assembly_Reference.md):**
|
||||||
|
```asm
|
||||||
|
; call print::print expands to:
|
||||||
|
lwi print::print, ret ; Load function address into ret
|
||||||
|
jmp 0, ret ; Jump to function (saves return in pcx)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Implementation shows (expand.rs:109-123):**
|
||||||
|
```rust
|
||||||
|
fn expand_call(current: &Node, nodes: &mut Vec<Node>) -> Result<(), AssembleError> {
|
||||||
|
nodes.extend(vec![
|
||||||
|
node!(label, Opcode::SubI, spr, 4, spr), // Decrement stack pointer
|
||||||
|
node!(None, Opcode::Stw, pcx, spr, 0), // Store PCX (return addr) on stack
|
||||||
|
node!(None, Opcode::Jmp, addr, zero), // Jump to function
|
||||||
|
]);
|
||||||
|
```
|
||||||
|
|
||||||
|
**Reality:** CALL expansion is:
|
||||||
|
1. Decrement SPR by 4
|
||||||
|
2. Store PCX (return address) to stack
|
||||||
|
3. Jump to function address
|
||||||
|
|
||||||
|
**Impact:** Return address is stored on the STACK, not in RET register!
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 3. **RETURN Pseudo-instruction Expansion** ❌ CRITICAL
|
||||||
|
|
||||||
|
**Documentation states:**
|
||||||
|
```asm
|
||||||
|
; return expands to:
|
||||||
|
jmp 0, ret ; Jump to address in ret register
|
||||||
|
```
|
||||||
|
|
||||||
|
**Implementation shows (expand.rs:125-135):**
|
||||||
|
```rust
|
||||||
|
fn expand_return(current: &Node, nodes: &mut Vec<Node>) {
|
||||||
|
nodes.extend(vec![
|
||||||
|
node!(label, Opcode::Ldw, spr, ret, 0), // Load return addr from stack
|
||||||
|
node!(None, Opcode::AddI, spr, 4, spr), // Increment stack pointer
|
||||||
|
node!(None, Opcode::Jmp, 4, ret), // Jump to (ret + 4)
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Reality:** RETURN expansion is:
|
||||||
|
1. Load return address from stack into RET register
|
||||||
|
2. Increment SPR by 4
|
||||||
|
3. Jump to (RET + 4)
|
||||||
|
|
||||||
|
**Why +4?** The stored PCX points to the instruction AFTER the call's jump, so we need to add 4 to skip past the stored PCX instruction itself... or this might be a bug in the implementation.
|
||||||
|
|
||||||
|
**Impact:** Return mechanism is completely different from documentation!
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 4. **Calling Convention - Stack Frame Layout** ❌ CRITICAL
|
||||||
|
|
||||||
|
**Documentation states:**
|
||||||
|
```
|
||||||
|
Higher Addresses
|
||||||
|
├─────────────┤
|
||||||
|
│ Arg N │ ← spr + (8 + 4*(N-1))
|
||||||
|
│ ... │
|
||||||
|
│ Arg 2 │ ← spr + 16
|
||||||
|
│ Arg 1 │ ← spr + 12
|
||||||
|
│ Arg 0 │ ← spr + 8
|
||||||
|
├─────────────┤
|
||||||
|
│ Ret Addr │ ← spr + 4
|
||||||
|
├─────────────┤
|
||||||
|
│ Old BPR │ ← spr + 0
|
||||||
|
├─────────────┤ ← bpr, spr
|
||||||
|
│ Locals │
|
||||||
|
Lower Addresses
|
||||||
|
```
|
||||||
|
|
||||||
|
**Reality based on implementation:**
|
||||||
|
Since stack grows DOWN:
|
||||||
|
```
|
||||||
|
Lower Addresses
|
||||||
|
├─────────────┤ ← Current SPR/BPR
|
||||||
|
│ Old BPR │ ← spr + 0 (immediately above SPR)
|
||||||
|
├─────────────┤
|
||||||
|
│ Ret Addr │ ← spr + 4 (pushed by CALL)
|
||||||
|
├─────────────┤
|
||||||
|
│ Arg 0 │ ← spr + 8
|
||||||
|
│ Arg 1 │ ← spr + 12
|
||||||
|
│ Arg 2 │ ← spr + 16
|
||||||
|
│ ... │
|
||||||
|
│ Arg N │ ← spr + (8 + 4*(N-1))
|
||||||
|
├─────────────┤
|
||||||
|
Higher Addresses
|
||||||
|
```
|
||||||
|
|
||||||
|
**The diagram needs to be flipped!** The offsets are correct, but the direction is wrong.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 5. **Label-Based Load/Store Scratch Register** ⚠️ IMPORTANT
|
||||||
|
|
||||||
|
**Documentation states:** Uses `rgf` as scratch register
|
||||||
|
|
||||||
|
**Implementation confirms (expand.rs:138-153):**
|
||||||
|
```rust
|
||||||
|
fn expand_ldx(current: &Node, nodes: &mut Vec<Node>) -> Result<(), AssembleError> {
|
||||||
|
// For ldb label, reg:
|
||||||
|
nodes.extend(vec![
|
||||||
|
node!(current.label(), Opcode::Lli, name, reg),
|
||||||
|
node!(None, Opcode::Lui, name, reg),
|
||||||
|
node!(None, opcode, reg, reg, offset),
|
||||||
|
]);
|
||||||
|
```
|
||||||
|
|
||||||
|
**Wait! This is WRONG in the implementation!**
|
||||||
|
|
||||||
|
The load expansion uses the DESTINATION register as scratch:
|
||||||
|
```asm
|
||||||
|
ldb buffer, rg2 expands to:
|
||||||
|
lli buffer, rg2 ; Uses rg2 as destination
|
||||||
|
lui buffer, rg2 ; Uses rg2 as destination
|
||||||
|
ldb rg2, rg2, 0 ; Uses rg2 as base
|
||||||
|
```
|
||||||
|
|
||||||
|
**Documentation says it should use rgf:**
|
||||||
|
```asm
|
||||||
|
ldb buffer, rg2 expands to:
|
||||||
|
lli buffer, rgf ; Uses rgf as scratch
|
||||||
|
lui buffer, rgf ; Uses rgf as scratch
|
||||||
|
ldb rgf, rg2, 0 ; Load from rgf into rg2
|
||||||
|
```
|
||||||
|
|
||||||
|
**For stores (expand.rs:155-176):**
|
||||||
|
```rust
|
||||||
|
fn expand_stx(current: &Node, nodes: &mut Vec<Node>) -> Result<(), AssembleError> {
|
||||||
|
// For stb reg, label:
|
||||||
|
let temp = Token::Register(Register::Acc); // Uses ACC, not RGF!
|
||||||
|
|
||||||
|
nodes.extend(vec![
|
||||||
|
node!(current.label(), Opcode::Lli, dest, temp),
|
||||||
|
node!(None, Opcode::Lui, dest, temp),
|
||||||
|
node!(None, opcode, base, temp, offset),
|
||||||
|
]);
|
||||||
|
```
|
||||||
|
|
||||||
|
**Reality:**
|
||||||
|
- Load pseudo-instructions use the DESTINATION register as scratch
|
||||||
|
- Store pseudo-instructions use the ACC register as scratch, NOT rgf
|
||||||
|
|
||||||
|
**Impact:** Documentation is incorrect about which registers are used!
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 6. **LWI Pseudo-instruction** ✅ CORRECT
|
||||||
|
|
||||||
|
**Documentation and implementation agree:**
|
||||||
|
```rust
|
||||||
|
fn expand_lwi(current: &Node, nodes: &mut Vec<Node>) -> Result<(), AssembleError> {
|
||||||
|
nodes.extend(vec![
|
||||||
|
node!(current.label(), Opcode::Lli, val, reg),
|
||||||
|
node!(None, Opcode::Lui, val, reg),
|
||||||
|
]);
|
||||||
|
```
|
||||||
|
|
||||||
|
This matches the documented expansion.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 7. **PUSHA/POPA Pseudo-instructions** 📝 UNDOCUMENTED
|
||||||
|
|
||||||
|
**These exist in implementation but are NOT in documentation!**
|
||||||
|
|
||||||
|
**expand.rs:53-76:**
|
||||||
|
```rust
|
||||||
|
fn expand_pusha(current: &Node, nodes: &mut Vec<Node>) -> Result<(), AssembleError> {
|
||||||
|
let count = expect_token!(arg0, Immediate)?;
|
||||||
|
let spr = Token::Register(Register::Spr);
|
||||||
|
let registers: Vec<Register> = Register::general();
|
||||||
|
|
||||||
|
nodes.push(node!(label, Opcode::SubI, spr, Token::Immediate(count * 4), spr));
|
||||||
|
|
||||||
|
nodes.extend((0..count).rev().map(|i| {
|
||||||
|
node!(None, Opcode::Stw,
|
||||||
|
Token::Register(registers[i as usize]),
|
||||||
|
spr,
|
||||||
|
Token::Immediate(i * 4)
|
||||||
|
)
|
||||||
|
}));
|
||||||
|
```
|
||||||
|
|
||||||
|
**expand.rs:78-101:**
|
||||||
|
```rust
|
||||||
|
fn expand_popa(current: &Node, nodes: &mut Vec<Node>) -> Result<(), AssembleError> {
|
||||||
|
let count = expect_token!(arg0, Immediate)?;
|
||||||
|
|
||||||
|
nodes.extend((0..count).rev().map(|i| {
|
||||||
|
node!(
|
||||||
|
{ if i == 0 { label.clone() } else { None } },
|
||||||
|
Opcode::Ldw,
|
||||||
|
spr,
|
||||||
|
Token::Register(registers[i as usize]),
|
||||||
|
Token::Immediate(i * 4)
|
||||||
|
)
|
||||||
|
}));
|
||||||
|
|
||||||
|
nodes.push(node!(None, Opcode::AddI, spr, Token::Immediate(count * 4), spr));
|
||||||
|
```
|
||||||
|
|
||||||
|
**What they do:**
|
||||||
|
- `pusha N` - Push first N general-purpose registers (rg0-rgN) to stack
|
||||||
|
- `popa N` - Pop first N general-purpose registers from stack
|
||||||
|
|
||||||
|
**Missing from documentation entirely!**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 8. **Register Index Encoding** ⚠️ IMPORTANT
|
||||||
|
|
||||||
|
**Documentation states:** System registers like MAR, MDR, STS, CIR, PCX are "internal" and not accessible
|
||||||
|
|
||||||
|
**Implementation shows (instructions.rs:148-153):**
|
||||||
|
```rust
|
||||||
|
0x18 => Self::Mar,
|
||||||
|
0x19 => Self::Mdr,
|
||||||
|
0x1A => Self::Sts,
|
||||||
|
0x1B => Self::Cir,
|
||||||
|
0x1C => Self::Pcx,
|
||||||
|
```
|
||||||
|
|
||||||
|
**Reality:** These registers ARE encoded in the instruction format at indices 0x18-0x1C!
|
||||||
|
|
||||||
|
**However, instructions.rs:186 shows:**
|
||||||
|
```rust
|
||||||
|
"null" => Ok(Self::NoReg), // Can parse "null" as NoReg
|
||||||
|
```
|
||||||
|
|
||||||
|
**Documentation never mentions "null" as an alternative name for noreg!**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 9. **LUI Immediate Value Handling** ⚠️ IMPORTANT
|
||||||
|
|
||||||
|
**Documentation states:**
|
||||||
|
```
|
||||||
|
lui immediate, dest_reg ; Load immediate into upper 16 bits
|
||||||
|
```
|
||||||
|
|
||||||
|
**Implementation shows (codegen.rs:248-254):**
|
||||||
|
```rust
|
||||||
|
fn build_load_immediate_instruction(...) -> Result<Instruction, AssembleError> {
|
||||||
|
// ...
|
||||||
|
match opcode {
|
||||||
|
Opcode::Lli => {
|
||||||
|
let instruction_args = args!(I, immediate: value as u16, r1: dest);
|
||||||
|
Ok(Instruction::LoadLowerImmediate(instruction_args))
|
||||||
|
}
|
||||||
|
Opcode::Lui => {
|
||||||
|
let upper_value = value >> 16; // Shifts right by 16!
|
||||||
|
let instruction_args = args!(I, immediate: upper_value as u16, r1: dest);
|
||||||
|
Ok(Instruction::LoadUpperImmediate(instruction_args))
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Reality:** When assembling `lui immediate, reg`, the assembler:
|
||||||
|
1. Takes the immediate value
|
||||||
|
2. Shifts it RIGHT by 16 bits
|
||||||
|
3. Stores the result in the instruction
|
||||||
|
|
||||||
|
**This means:**
|
||||||
|
```asm
|
||||||
|
lli 0x1234, rg0 ; Stores 0x1234 in lower 16 bits
|
||||||
|
lui 0xABCD0000, rg0 ; Right-shifts to 0xABCD, stores in upper 16 bits
|
||||||
|
```
|
||||||
|
|
||||||
|
**Or more likely, the assembler expects:**
|
||||||
|
```asm
|
||||||
|
lli 0x1234, rg0 ; Stores 0x1234 in lower 16 bits
|
||||||
|
lui 0xABCD, rg0 ; Stores 0xABCD in upper 16 bits (no shift needed)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Documentation needs clarification on what immediate value format LUI expects!**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 10. **Data Definition Encoding** ⚠️ IMPORTANT
|
||||||
|
|
||||||
|
**Implementation (expand.rs:217-267):**
|
||||||
|
```rust
|
||||||
|
fn process_dx_data(args: Vec<Token>, size: usize) -> Result<Vec<u32>, AssembleError> {
|
||||||
|
for token in args {
|
||||||
|
match token {
|
||||||
|
Token::StringLit(mut s) => {
|
||||||
|
s.push('\0'); // Automatically adds null terminator!
|
||||||
|
for ch in s.chars() {
|
||||||
|
let mut char_buf = [0u8; 4];
|
||||||
|
let char_bytes = ch.encode_utf8(&mut char_buf);
|
||||||
|
buffer.extend_from_slice(char_bytes.as_bytes());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Token::Immediate(value) => {
|
||||||
|
buffer.extend_from_slice(&value.to_be_bytes()); // BIG ENDIAN!
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Key findings:**
|
||||||
|
1. String literals automatically get null terminator appended
|
||||||
|
2. Numeric values are stored in **BIG ENDIAN** format (to_be_bytes)
|
||||||
|
3. Documentation says "little-endian byte order" globally
|
||||||
|
|
||||||
|
**Contradiction:** Data definition uses BIG ENDIAN, but doc says LITTLE ENDIAN!
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 11. **Segment Instruction** 📝 UNDOCUMENTED
|
||||||
|
|
||||||
|
**Implementation has a SEGMENT instruction (0x27/0x3F):**
|
||||||
|
```rust
|
||||||
|
Segment(u32) = 0x3F,
|
||||||
|
```
|
||||||
|
|
||||||
|
**This is completely undocumented!**
|
||||||
|
|
||||||
|
From model.rs:
|
||||||
|
```rust
|
||||||
|
Self::Segment => write!(f, "[SEGMENT]"),
|
||||||
|
```
|
||||||
|
|
||||||
|
From codegen.rs:
|
||||||
|
```rust
|
||||||
|
Opcode::Segment => build_segment_instruction(&args),
|
||||||
|
```
|
||||||
|
|
||||||
|
**Purpose unclear, needs documentation!**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 12. **Data Instruction** 📝 UNDOCUMENTED
|
||||||
|
|
||||||
|
**Implementation has a DATA instruction (0x3E):**
|
||||||
|
```rust
|
||||||
|
Data(u32) = 0x3E,
|
||||||
|
```
|
||||||
|
|
||||||
|
**This appears to be a meta-instruction for embedding raw data, but it's undocumented in the assembly reference!**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 13. **INC/DEC Instruction Encoding** ⚠️ MINOR
|
||||||
|
|
||||||
|
**Implementation (codegen.rs:293-299):**
|
||||||
|
```rust
|
||||||
|
fn build_inc_dec_instruction(opcode: Opcode, args: &[Token]) -> Result<Instruction, AssembleError> {
|
||||||
|
let reg = expect_token!(reg_token, Register)?;
|
||||||
|
match opcode {
|
||||||
|
Opcode::Inc => Ok(Instruction::Increment(args!(R, sr1: reg))),
|
||||||
|
Opcode::Dec => Ok(Instruction::Decrement(args!(R, sr1: reg))),
|
||||||
|
```
|
||||||
|
|
||||||
|
**Reality:** INC/DEC only set SR1 field, not DR field.
|
||||||
|
|
||||||
|
**But args.rs shows:**
|
||||||
|
```rust
|
||||||
|
impl RTypeArgs {
|
||||||
|
pub fn new(...) -> Self {
|
||||||
|
let sr1 = sr1.unwrap_or_default(); // Defaults to NoReg
|
||||||
|
let dr = dr.unwrap_or_default(); // Defaults to NoReg
|
||||||
|
```
|
||||||
|
|
||||||
|
**So the DR field gets set to NoReg, which is correct per documentation.**
|
||||||
|
|
||||||
|
**However, the Display impl (instructions.rs:449) shows:**
|
||||||
|
```rust
|
||||||
|
Self::Increment(a) | Self::Decrement(a) => write!(f, " {}", a.sr1),
|
||||||
|
```
|
||||||
|
|
||||||
|
**This is correct - only shows SR1 in disassembly.**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 14. **Shift Instruction Operand Order** ⚠️ MINOR
|
||||||
|
|
||||||
|
**Implementation (codegen.rs:301-312):**
|
||||||
|
```rust
|
||||||
|
fn build_shift_instruction(opcode: Opcode, args: &[Token]) -> Result<Instruction, AssembleError> {
|
||||||
|
let reg = expect_token!(reg_token, Register)?;
|
||||||
|
let amount = expect_token!(amount_token, Immediate)? as u8;
|
||||||
|
|
||||||
|
match opcode {
|
||||||
|
Opcode::Shl => Ok(Instruction::ShiftLeft(args!(R, sr1: reg, shamt: amount))),
|
||||||
|
```
|
||||||
|
|
||||||
|
**This only handles LITERAL shift amounts, not REGISTER shift amounts!**
|
||||||
|
|
||||||
|
**Documentation states both are supported:**
|
||||||
|
```asm
|
||||||
|
shl rg0, 2 ; Literal shift
|
||||||
|
shl rg0, rg1 ; Register shift
|
||||||
|
```
|
||||||
|
|
||||||
|
**The current codegen only handles the literal case!**
|
||||||
|
|
||||||
|
**This is a BUG in the implementation - register shifts aren't properly assembled!**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 15. **Jump Instruction Operand Order** ⚠️ CONFUSION
|
||||||
|
|
||||||
|
**Documentation shows assembly syntax:**
|
||||||
|
```asm
|
||||||
|
jmp addr [, offset_reg]
|
||||||
|
```
|
||||||
|
|
||||||
|
**But implementation (codegen.rs:256-270):**
|
||||||
|
```rust
|
||||||
|
fn build_jump_instruction(opcode: Opcode, args: &[Token]) -> Result<Instruction, AssembleError> {
|
||||||
|
let address = expect_token!(address_token, Immediate)?;
|
||||||
|
let offset = expect_token!(offset_token, Register)?;
|
||||||
|
let instruction_args = args!(I, immediate: address as u16, r1: offset);
|
||||||
|
```
|
||||||
|
|
||||||
|
**This expects:**
|
||||||
|
1. First arg: immediate (address)
|
||||||
|
2. Second arg: register (offset)
|
||||||
|
|
||||||
|
**So assembly syntax should be:**
|
||||||
|
```asm
|
||||||
|
jmp immediate, offset_register
|
||||||
|
```
|
||||||
|
|
||||||
|
**Example:**
|
||||||
|
```asm
|
||||||
|
jmp 0x1000, zero ; Jump to 0x1000
|
||||||
|
jmp 4, ret ; Jump to (ret + 4)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Documentation syntax is correct, but parameter names are confusing!**
|
||||||
|
|
||||||
|
The "address" is actually an OFFSET, and the register is the BASE!
|
||||||
|
|
||||||
|
**Better naming:**
|
||||||
|
```asm
|
||||||
|
jmp offset, base_register
|
||||||
|
; Target = base_register + offset
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 16. **NOT Instruction Operand Count** ✅ MINOR ISSUE
|
||||||
|
|
||||||
|
**Documentation shows:**
|
||||||
|
```asm
|
||||||
|
not src, dest ; Two operands
|
||||||
|
```
|
||||||
|
|
||||||
|
**Implementation (instructions.rs:428-429):**
|
||||||
|
```rust
|
||||||
|
Self::Compare(args) | Self::Not(args) => {
|
||||||
|
write!(f, " {}, {}", args.sr1, args.sr2)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**This displays BOTH sr1 and sr2 for NOT!**
|
||||||
|
|
||||||
|
**But codegen.rs:354-362:**
|
||||||
|
```rust
|
||||||
|
fn build_not_instruction(args: &[Token]) -> Result<Instruction, AssembleError> {
|
||||||
|
let reg = expect_token!(reg_token, Register)?;
|
||||||
|
let dest = expect_token!(dest_token, Register)?;
|
||||||
|
Ok(Instruction::Not(args!(R, sr1: reg, dr: dest)))
|
||||||
|
```
|
||||||
|
|
||||||
|
**Sets sr1 and dr, NOT sr1 and sr2!**
|
||||||
|
|
||||||
|
**The Display impl is WRONG - should show sr1 and dr:**
|
||||||
|
```rust
|
||||||
|
Self::Not(args) => write!(f, " {}, {}", args.sr1, args.dr)
|
||||||
|
```
|
||||||
|
|
||||||
|
**This is a display bug in the implementation!**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 17. **Register File Indexing** ✅ CORRECT
|
||||||
|
|
||||||
|
**Documentation and implementation both agree:**
|
||||||
|
- 0x00-0x0F: rg0-rgf (general purpose)
|
||||||
|
- 0x10: acc
|
||||||
|
- 0x11: spr
|
||||||
|
- 0x12: bpr
|
||||||
|
- 0x13: ret
|
||||||
|
- 0x14: idr
|
||||||
|
- 0x15: mmr
|
||||||
|
- 0x16: zero
|
||||||
|
- 0x17: noreg
|
||||||
|
|
||||||
|
**This matches perfectly.**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 18. **Immediate Arithmetic Destination** ⚠️ MINOR
|
||||||
|
|
||||||
|
**Implementation (codegen.rs:314-330):**
|
||||||
|
```rust
|
||||||
|
fn build_arithmetic_immediate_instruction(...) -> Result<Instruction, AssembleError> {
|
||||||
|
let reg = expect_token!(reg_token, Register)?;
|
||||||
|
let immediate = expect_token!(immediate_token, Immediate)? as u16;
|
||||||
|
let dest = expect_token!(dest_token, Register)?;
|
||||||
|
let instruction_args = args!(I, immediate: immediate, r1: reg, r2: dest);
|
||||||
|
```
|
||||||
|
|
||||||
|
**This REQUIRES three arguments:**
|
||||||
|
1. Source register
|
||||||
|
2. Immediate value
|
||||||
|
3. Destination register
|
||||||
|
|
||||||
|
**But documentation says destination is optional:**
|
||||||
|
```
|
||||||
|
iadd src_reg, imm [, dest_reg] ; dest optional
|
||||||
|
```
|
||||||
|
|
||||||
|
**Reality:** The assembler REQUIRES the destination register!
|
||||||
|
|
||||||
|
**If you want in-place operation:**
|
||||||
|
```asm
|
||||||
|
iadd rg0, 10, rg0 ; Required to specify rg0 twice
|
||||||
|
```
|
||||||
|
|
||||||
|
**Not:**
|
||||||
|
```asm
|
||||||
|
iadd rg0, 10 ; This won't work!
|
||||||
|
```
|
||||||
|
|
||||||
|
**Documentation is misleading - destination is NOT optional!**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 19. **Memory Instruction Offsets** ✅ CORRECT
|
||||||
|
|
||||||
|
**Implementation correctly handles signed 16-bit offsets:**
|
||||||
|
```rust
|
||||||
|
let offset = expect_token!(offset_token, Immediate)? as u16;
|
||||||
|
```
|
||||||
|
|
||||||
|
**These are stored as u16 but interpreted as signed i16 at runtime.**
|
||||||
|
|
||||||
|
**Documentation is correct about this.**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 20. **Instruction Opcode Values** ✅ VERIFIED
|
||||||
|
|
||||||
|
Comparing model.rs opcodes with instructions.rs:
|
||||||
|
|
||||||
|
| Instruction | model.rs | instructions.rs | Match |
|
||||||
|
|-------------|----------|-----------------|-------|
|
||||||
|
| Nop | 0x00 | 0x0 | ✅ |
|
||||||
|
| Mov | 0x01 | 0x1 | ✅ |
|
||||||
|
| MovSigned | 0x02 | 0x2 | ✅ |
|
||||||
|
| LoadByte | 0x03 | 0x3 | ✅ |
|
||||||
|
| ... | ... | ... | ✅ |
|
||||||
|
| AddImmediate | 0x25 | 0x25 | ✅ |
|
||||||
|
| SubImmediate | 0x26 | 0x26 | ✅ |
|
||||||
|
| Segment | 0x27 | 0x3F | ❌ MISMATCH! |
|
||||||
|
|
||||||
|
**CRITICAL:** Segment instruction has opcode **0x27** in model.rs but **0x3F** in instructions.rs!
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Summary of Critical Issues
|
||||||
|
|
||||||
|
### Must Fix in Documentation:
|
||||||
|
|
||||||
|
1. ✅ **Stack grows DOWNWARD** - flip all diagrams
|
||||||
|
2. ✅ **CALL expansion** - uses stack, not ret register directly
|
||||||
|
3. ✅ **RETURN expansion** - loads from stack, jumps to ret+4
|
||||||
|
4. ✅ **Stack frame layout** - flip diagram vertically
|
||||||
|
5. ✅ **Load pseudo scratch register** - uses DEST reg, not rgf
|
||||||
|
6. ✅ **Store pseudo scratch register** - uses ACC, not rgf
|
||||||
|
7. ✅ **Add PUSHA/POPA documentation**
|
||||||
|
8. ✅ **Add SEGMENT instruction documentation**
|
||||||
|
9. ✅ **Add DATA instruction documentation**
|
||||||
|
10. ✅ **Clarify LUI immediate value handling**
|
||||||
|
11. ✅ **Fix endianness** - data definition uses BIG endian
|
||||||
|
12. ✅ **IADD/ISUB destination NOT optional**
|
||||||
|
13. ✅ **Add "null" as alias for noreg**
|
||||||
|
14. ✅ **Fix Segment opcode** - 0x27 or 0x3F?
|
||||||
|
|
||||||
|
### Potential Implementation Bugs:
|
||||||
|
|
||||||
|
1. ⚠️ **Shift instruction** - doesn't handle register shifts
|
||||||
|
2. ⚠️ **NOT display** - shows sr2 instead of dr
|
||||||
|
3. ⚠️ **RETURN +4 offset** - why is this needed?
|
||||||
|
4. ⚠️ **Segment opcode mismatch** - 0x27 vs 0x3F
|
||||||
|
|
||||||
|
### Minor Documentation Improvements:
|
||||||
|
|
||||||
|
1. Add explicit examples of stack growth direction
|
||||||
|
2. Show complete memory layout diagrams
|
||||||
|
3. Document which registers are volatile/preserved
|
||||||
|
4. Add troubleshooting section for common mistakes
|
||||||
|
5. Clarify jump instruction parameter semantics
|
||||||
@@ -0,0 +1,149 @@
|
|||||||
|
# DSA Documentation Inconsistencies Analysis
|
||||||
|
|
||||||
|
## 1. Register Descriptions
|
||||||
|
|
||||||
|
### Issue: System Registers vs Assembly-Accessible Registers
|
||||||
|
- `registers.md` lists MAR, STS, CIR, MDR as "System" registers
|
||||||
|
- These are NOT mentioned in `dsa_assembly_reference.md` or `instruction_set.md`
|
||||||
|
- **Resolution**: System registers are internal CPU registers not directly accessible in assembly. They should be documented separately from programmer-accessible registers.
|
||||||
|
|
||||||
|
### Issue: Register Naming Inconsistencies
|
||||||
|
- `registers.md` uses `RG0-RGF` (uppercase)
|
||||||
|
- `dsa_assembly_reference.md` uses `rg0-rgf` (lowercase)
|
||||||
|
- **Resolution**: Assembly syntax should be lowercase (standard convention)
|
||||||
|
|
||||||
|
### Issue: NOREG Register
|
||||||
|
- `registers.md`: "Loads/using as dest register must cause an illegal instruction trap"
|
||||||
|
- `dsa_assembly_reference.md`: "on-read/write: illegal instruction fault"
|
||||||
|
- **Resolution**: Consistent terminology needed - use "illegal instruction fault"
|
||||||
|
|
||||||
|
## 2. Instruction Operand Order Inconsistencies
|
||||||
|
|
||||||
|
### Issue: Load Instructions
|
||||||
|
- `instruction_set.md`: `LDB BaseReg, Offset, DestReg`
|
||||||
|
- `dsa_assembly_reference.md`: `LDB base_reg, dest_reg [, offset]`
|
||||||
|
- **Resolution**: Assembly reference shows standard syntax (base, dest, offset optional), instruction set shows encoding order
|
||||||
|
|
||||||
|
### Issue: Store Instructions
|
||||||
|
- `instruction_set.md`: `STB SrcReg, BaseReg, Offset`
|
||||||
|
- `dsa_assembly_reference.md`: `STB src_reg, base_reg [, offset]`
|
||||||
|
- **Resolution**: Consistent - offset is optional
|
||||||
|
|
||||||
|
### Issue: Immediate Load Instructions
|
||||||
|
- `instruction_set.md`: `LLI DstReg, Value` (destination first)
|
||||||
|
- `dsa_assembly_reference.md`: `LLI imm, dest_reg` (immediate first)
|
||||||
|
- **Resolution**: Assembly reference shows gas-style syntax (source, dest), instruction set shows encoding order
|
||||||
|
|
||||||
|
### Issue: Jump Instructions
|
||||||
|
- `instruction_set.md`: `JMP DestReg, Offset | Address`
|
||||||
|
- `dsa_assembly_reference.md`: `JMP addr [, offset_reg]` or `JMP imm, offset_reg`
|
||||||
|
- **Resolution**: Different perspectives - instruction set shows encoding, assembly shows usage
|
||||||
|
|
||||||
|
## 3. Instruction Behavior Differences
|
||||||
|
|
||||||
|
### Issue: IADD/ISUB Operands
|
||||||
|
- `instruction_set.md`: `IADD Src1, Literal, Dest` (3 operands)
|
||||||
|
- `dsa_assembly_reference.md`: `IADD src_reg, imm [, dest_reg]` (dest optional)
|
||||||
|
- **Resolution**: Assembly allows dest to default to src_reg
|
||||||
|
|
||||||
|
### Issue: SHL/SHR Operands
|
||||||
|
- `instruction_set.md`: `SHL Reg, Literal | ValReg`
|
||||||
|
- `dsa_assembly_reference.md`: `SHL reg, shift_amount`
|
||||||
|
- **Resolution**: Both literal and register shifts supported
|
||||||
|
|
||||||
|
## 4. Pseudo-Instruction Inconsistencies
|
||||||
|
|
||||||
|
### Issue: PUSH/POP Expansion
|
||||||
|
- `pseudoinstructions.md`:
|
||||||
|
- PUSH = `INC SPR` then `STW register, SPR`
|
||||||
|
- POP = `LDW SPR, register` then `DEC SPR`
|
||||||
|
- Standard stack conventions suggest PUSH should decrement (grow down)
|
||||||
|
- **Resolution**: Clarify stack growth direction
|
||||||
|
|
||||||
|
### Issue: LDB/LDH/LDW Pseudo vs Hardware
|
||||||
|
- `pseudoinstructions.md` lists LDB, LDH, LDW as pseudo-instructions with label addressing
|
||||||
|
- `instruction_set.md` lists them as hardware instructions
|
||||||
|
- **Resolution**: Both exist - hardware instructions use registers, pseudo-instructions add label support
|
||||||
|
|
||||||
|
### Issue: LWI Naming
|
||||||
|
- `dsa_assembly_reference.md`: LWI = Load Word Immediate (load address)
|
||||||
|
- Could be confused with "Load Word Immediate" (load literal value)
|
||||||
|
- **Resolution**: LWI specifically means "Load Word address Into register"
|
||||||
|
|
||||||
|
## 5. Calling Convention Details
|
||||||
|
|
||||||
|
### Issue: Argument Offsets
|
||||||
|
- Calling convention says "first 3 args at offsets 8, 12, 16"
|
||||||
|
- This assumes 32-bit words (4 bytes each)
|
||||||
|
- Offset 8 is position of first argument (after return address at offset 4, and old BPR at offset 0)
|
||||||
|
- **Resolution**: Clarify that SPR+0 = old BPR, SPR+4 = return address, SPR+8 = first arg
|
||||||
|
|
||||||
|
### Issue: Return Value Location
|
||||||
|
- Says "Store return value (if any) to `spr+8`"
|
||||||
|
- This overwrites the first argument
|
||||||
|
- **Resolution**: This is intentional - return value replaces first argument position after cleanup
|
||||||
|
|
||||||
|
## 6. Missing Information
|
||||||
|
|
||||||
|
### From instruction_set.md not in assembly reference:
|
||||||
|
- Instruction encoding details (R-type, I-type, J-type)
|
||||||
|
- Hex opcodes for each instruction
|
||||||
|
- Alignment requirements for memory operations
|
||||||
|
- Sign extension behavior details
|
||||||
|
|
||||||
|
### From assembly reference not in instruction_set:
|
||||||
|
- Complete pseudo-instruction expansions showing what they compile to
|
||||||
|
- Library examples (multiply, print)
|
||||||
|
- Detailed calling convention walkthrough
|
||||||
|
- Module system (INCLUDE directive)
|
||||||
|
|
||||||
|
### From registers.md not elsewhere:
|
||||||
|
- STS (Status Register) bit layout
|
||||||
|
- Boot values for status flags
|
||||||
|
- System registers (MAR, STS, CIR, MDR)
|
||||||
|
|
||||||
|
## 7. Terminology Inconsistencies
|
||||||
|
|
||||||
|
- "halfword" vs "half-word" vs "16-bit value"
|
||||||
|
- "word" assumed to be 32-bit (should be explicit)
|
||||||
|
- "register" vs "reg" in syntax
|
||||||
|
- "immediate" vs "literal" vs "constant"
|
||||||
|
|
||||||
|
## 8. Critical Missing Details
|
||||||
|
|
||||||
|
### CALL and RETURN Pseudo-instructions
|
||||||
|
- Assembly reference shows them but doesn't show their expansion
|
||||||
|
- Need to document what they expand to
|
||||||
|
|
||||||
|
### Label Addressing Mode
|
||||||
|
- Shows expansions for loads/stores with labels
|
||||||
|
- Uses RGF as scratch register - should this be documented as reserved for this purpose?
|
||||||
|
|
||||||
|
### Stack Direction
|
||||||
|
- Not explicitly stated whether stack grows up or down
|
||||||
|
- PUSH uses INC SPR (suggests growing up) - unusual!
|
||||||
|
|
||||||
|
## Recommendations
|
||||||
|
|
||||||
|
1. **Separate Documentation into Logical Layers**:
|
||||||
|
- ISA Specification (hardware-level, for CPU implementers)
|
||||||
|
- Assembly Language Reference (for programmers)
|
||||||
|
- ABI/Calling Convention (for compiler/linker writers)
|
||||||
|
|
||||||
|
2. **Standardize Terminology**:
|
||||||
|
- Use consistent casing (lowercase for assembly mnemonics)
|
||||||
|
- Define terms clearly (word = 32-bit, halfword = 16-bit, byte = 8-bit)
|
||||||
|
- Distinguish "literal" (immediate value in code) from "address" (memory location)
|
||||||
|
|
||||||
|
3. **Document Stack Convention Clearly**:
|
||||||
|
- Explicitly state stack grows upward (unusual but valid)
|
||||||
|
- Show memory layout diagrams
|
||||||
|
|
||||||
|
4. **Show Complete Pseudo-instruction Expansions**:
|
||||||
|
- CALL, RETURN need full expansion documentation
|
||||||
|
- Document which register(s) are used as temporaries
|
||||||
|
|
||||||
|
5. **Clarify Register Usage Conventions**:
|
||||||
|
- ACC: used by pseudo-instructions, volatile
|
||||||
|
- RGF: used by label addressing, volatile
|
||||||
|
- RG0-RGE: general purpose, callee may use per calling convention
|
||||||
@@ -1,10 +1,10 @@
|
|||||||
|
|
||||||
// GENERATED BY DSC COMPILER
|
// GENERATED BY DSC COMPILER
|
||||||
// Generated at 2026-02-04 01:55:11
|
// Generated at 2026-02-05 00:42:40
|
||||||
|
|
||||||
// Imports
|
// Imports
|
||||||
include arena: "./lib/memory/arena_alloc.dsa"
|
|
||||||
include print: "./lib/io/print.dsa"
|
include print: "./lib/io/print.dsa"
|
||||||
|
include arena: "./lib/memory/arena_alloc.dsa"
|
||||||
|
|
||||||
// Globals & Reserved Memory
|
// Globals & Reserved Memory
|
||||||
|
|
||||||
@@ -65,8 +65,8 @@ main:
|
|||||||
pop zero
|
pop zero
|
||||||
subi bpr 16 rg0
|
subi bpr 16 rg0
|
||||||
ldw rg0, rg0 // bpr-24: alloc
|
ldw rg0, rg0 // bpr-24: alloc
|
||||||
push rg0 // bpr-24: alloc
|
push rg4 // bpr-24: ptr2
|
||||||
push rg4 // bpr-28: ptr2
|
push rg0 // bpr-28: alloc
|
||||||
push rg0 // push arg 0
|
push rg0 // push arg 0
|
||||||
call print::print_hex_word
|
call print::print_hex_word
|
||||||
pop zero
|
pop zero
|
||||||
@@ -78,8 +78,8 @@ main:
|
|||||||
call print::print_hex_word
|
call print::print_hex_word
|
||||||
pop zero
|
pop zero
|
||||||
call print::print_newline
|
call print::print_newline
|
||||||
subi bpr 28 rg0
|
subi bpr 24 rg0
|
||||||
ldw rg0, rg0 // bpr-36: ptr2
|
ldw rg0, rg0 // bpr-32: ptr2
|
||||||
push rg0 // bpr-36: ptr2
|
push rg0 // bpr-36: ptr2
|
||||||
push rg0 // push arg 0
|
push rg0 // push arg 0
|
||||||
call print::print_hex_word
|
call print::print_hex_word
|
||||||
@@ -110,8 +110,8 @@ main:
|
|||||||
call print::print_num
|
call print::print_num
|
||||||
pop zero
|
pop zero
|
||||||
call print::print_newline
|
call print::print_newline
|
||||||
db str_12: "end"
|
db str_1: "end"
|
||||||
lwi str_12, rg5
|
lwi str_1, rg5
|
||||||
push rg5 // push arg 0
|
push rg5 // push arg 0
|
||||||
call print::println
|
call print::println
|
||||||
pop zero
|
pop zero
|
||||||
|
|||||||
@@ -28,5 +28,3 @@ fn main() -> u32 {
|
|||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -48,4 +48,3 @@ main:
|
|||||||
call print::print_num
|
call print::print_num
|
||||||
pop zero
|
pop zero
|
||||||
jmp _ret
|
jmp _ret
|
||||||
|
|
||||||
|
|||||||
@@ -7,6 +7,5 @@ int factorial(int n) {
|
|||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
int res = factorial(3);
|
int res = factorial(3);
|
||||||
printnum(res);
|
return res;
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
@@ -1,427 +0,0 @@
|
|||||||
# DSA Assembly Language Instruction Reference
|
|
||||||
|
|
||||||
## Overview
|
|
||||||
|
|
||||||
This document provides a comprehensive reference for the DSA (Damn Simple Architecture) assembly language, including all hardware instructions and pseudo-instructions with their syntax variations and usage examples.
|
|
||||||
|
|
||||||
## Calling Convention
|
|
||||||
|
|
||||||
| Step | Responsibility | Action | Description |
|
|
||||||
|------|----------------|--------|-------------|
|
|
||||||
| 1 | **Caller** | Push arguments | Push exactly n arguments to the stack (in order, last argument pushed first) |
|
|
||||||
| 2 | **Caller** | Call function | Execute `call namespace::function` - this automatically pushes the return address (pcx) and jumps to the function |
|
|
||||||
| 3 | **Function** | Set up stack frame | Execute `push bpr; mov spr, bpr` to establish new stack frame |
|
|
||||||
| 4 | **Function** | Access arguments | Read arguments starting at `spr+8` (first 3 args at offsets 8, 12, 16) |
|
|
||||||
| 5 | **Function** | Execute function | Perform the function's operations using the arguments |
|
|
||||||
| 6 | **Function** | Store return value | Write return value (if any) to `spr+8` |
|
|
||||||
| 7 | **Function** | Restore stack frame | Execute `mov bpr, spr; pop bpr` to restore previous stack frame |
|
|
||||||
| 8 | **Function** | Return | Execute `return` pseudo-instruction to return to caller |
|
|
||||||
| 9 | **Caller** | Clean up stack | Pop exactly n arguments from the stack to clean up |
|
|
||||||
| 10 | **Caller** | Handle unused values | Use `pop zero` to discard any unused stack values if needed |
|
|
||||||
|
|
||||||
**Notes:**
|
|
||||||
- The namespace in step 2 is the name assigned in the `include` statement
|
|
||||||
- The `call` pseudo-instruction automatically handles return address management so long as the callee does not mess with the stack
|
|
||||||
- Arguments are accessed by the callee using offsets from the base pointer (bpr)
|
|
||||||
|
|
||||||
## Registers
|
|
||||||
|
|
||||||
| Register | Type | Description |
|
|
||||||
|----------|------|---------------------------------------------------------------------------------------------------|
|
|
||||||
| `rg0-rgf` | General Purpose | General-purpose registers. |
|
|
||||||
| `acc` | Special | Accumulator for calculations and temporary storage - don't use this for variables as pseudo instructions may overwrite this implicitly! |
|
|
||||||
| `spr` | Special | Stack pointer |
|
|
||||||
| `bpr` | Special | Base pointer for stack frames |
|
|
||||||
| `ret` | Special | Return address register |
|
|
||||||
| `idr` | Privileged | Interrupt descriptor table address<br/>**on-read/write: protection fault (unless in kernel mode)** |
|
|
||||||
| `mmr` | Privileged | Hardware memory map table address<br/>**on-read/write: protection fault (unless in kernel mode)** |
|
|
||||||
| `zero` | Read-only | Always contains zero<br/>**on-read: always returns zero**<br/>**on-write: value is voided** |
|
|
||||||
| `pcx` | Read-only | Program counter<br/>**on-write: protection fault** |
|
|
||||||
| `noreg` | Placeholder | Indicates absence of register argument<br/>**on-read/write: illegal instruction fault** |
|
|
||||||
|
|
||||||
## Hardware Instructions
|
|
||||||
|
|
||||||
### Data Movement Instructions
|
|
||||||
|
|
||||||
| Mnemonic | Operands | Description |
|
|
||||||
|----------|----------|-------------|
|
|
||||||
| **MOV** | `src_reg, dest_reg` | Copy value from source to destination register |
|
|
||||||
| **MOVS** | `src_reg, dest_reg` | Copy with sign extension |
|
|
||||||
|
|
||||||
**Examples:**
|
|
||||||
```asm
|
|
||||||
mov rg0, rg1 ; Copy rg0 to rg1
|
|
||||||
movs rg0, rg1 ; Copy rg0 to rg1 with sign extension
|
|
||||||
```
|
|
||||||
### Memory Access Instructions
|
|
||||||
|
|
||||||
#### Load Instructions
|
|
||||||
|
|
||||||
| Mnemonic | Operands | Description |
|
|
||||||
|----------|----------|-------------|
|
|
||||||
| **LDB** | `base_reg, dest_reg [, offset]`<br>`label, dest_reg [, offset]` | Load byte from memory |
|
|
||||||
| **LDBS** | `base_reg, dest_reg [, offset]`<br>`label, dest_reg [, offset]` | Load byte with sign extension |
|
|
||||||
| **LDH** | `base_reg, dest_reg [, offset]`<br>`label, dest_reg [, offset]` | Load half-word (16-bit) |
|
|
||||||
| **LDHS** | `base_reg, dest_reg [, offset]`<br>`label, dest_reg [, offset]` | Load half-word with sign extension |
|
|
||||||
| **LDW** | `base_reg, dest_reg [, offset]`<br>`label, dest_reg [, offset]` | Load word (32-bit) |
|
|
||||||
|
|
||||||
**Examples:**
|
|
||||||
```asm
|
|
||||||
; Direct register addressing
|
|
||||||
ldb rg0, rg1 ; Load byte from address in rg0
|
|
||||||
ldw rg0, rg1, 8 ; Load word from (rg0 + 8)
|
|
||||||
|
|
||||||
; Label addressing
|
|
||||||
ldb buffer, rg2 ; Load byte from label 'buffer'
|
|
||||||
ldw stack, bpr ; Load stack address into base pointer
|
|
||||||
```
|
|
||||||
**Label Expansions:**
|
|
||||||
```asm
|
|
||||||
; ldb buffer, rg2 expands to:
|
|
||||||
lli buffer, rg2 ; Load lower 16 bits of buffer address
|
|
||||||
lui buffer, rg2 ; Load upper 16 bits of buffer address
|
|
||||||
ldb rg2, rg2 ; Load byte from address in rg2
|
|
||||||
|
|
||||||
; ldw stack, bpr expands to:
|
|
||||||
lli stack, bpr ; Load lower 16 bits of stack address
|
|
||||||
lui stack, bpr ; Load upper 16 bits of stack address
|
|
||||||
ldw bpr, bpr ; Load word from address in bpr
|
|
||||||
```
|
|
||||||
#### Store Instructions
|
|
||||||
|
|
||||||
| Mnemonic | Operands | Description |
|
|
||||||
|----------|----------|-------------|
|
|
||||||
| **STB** | `src_reg, base_reg [, offset]`<br>`src_reg, label [, offset]` | Store byte to memory |
|
|
||||||
| **STH** | `src_reg, base_reg [, offset]`<br>`src_reg, label [, offset]` | Store half-word to memory |
|
|
||||||
| **STW** | `src_reg, base_reg [, offset]`<br>`src_reg, label [, offset]` | Store word to memory |
|
|
||||||
|
|
||||||
**Examples:**
|
|
||||||
```asm
|
|
||||||
; Direct register addressing
|
|
||||||
stb rg0, rg1 ; Store byte from rg0 to address in rg1
|
|
||||||
stw rg0, rg1, 12 ; Store word to (rg1 + 12)
|
|
||||||
|
|
||||||
; Label addressing
|
|
||||||
stb acc, buffer ; Store byte from accumulator to 'buffer'
|
|
||||||
stw rg1, current ; Store word to 'current' variable
|
|
||||||
```
|
|
||||||
**Label Expansions:**
|
|
||||||
```asm
|
|
||||||
; stb acc, buffer expands to:
|
|
||||||
lli buffer, rgf ; Load lower 16 bits of buffer address
|
|
||||||
lui buffer, rgf ; Load upper 16 bits of buffer address
|
|
||||||
stb acc, rgf ; Store byte from acc to address in rgf
|
|
||||||
|
|
||||||
; stw rg1, current expands to:
|
|
||||||
lli current, rgf ; Load lower 16 bits of current address
|
|
||||||
lui current, rgf ; Load upper 16 bits of current address
|
|
||||||
stw rg1, rgf ; Store word from rg1 to address in rgf
|
|
||||||
```
|
|
||||||
### Immediate Load Instructions
|
|
||||||
|
|
||||||
| Mnemonic | Operands | Description |
|
|
||||||
|----------|----------|------------------------------------------------------------------------|
|
|
||||||
| **LLI** | `imm, dest_reg` | Load 16-bit immediate into lower 16 bits<br/>**Clears upper 16 bits!** |
|
|
||||||
| **LUI** | `imm, dest_reg` | Load 16-bit immediate into upper 16 bits |
|
|
||||||
|
|
||||||
**Usage**
|
|
||||||
|
|
||||||
ensure that you always run **Lli** before **Lui** as **Lli** clears the upper 16 bits.
|
|
||||||
|
|
||||||
**Examples:**
|
|
||||||
```asm
|
|
||||||
lli 0x1234, rg0 ; Load 0x1234 into lower 16 bits of rg0
|
|
||||||
lui 0xABCD, rg0 ; Load 0xABCD into upper 16 bits of rg0
|
|
||||||
```
|
|
||||||
### Jump Instructions
|
|
||||||
|
|
||||||
| Mnemonic | Operands | Description |
|
|
||||||
|----------|----------|-------------|
|
|
||||||
| **JMP** | `addr [, offset_reg]`<br>`imm, offset_reg` | Unconditional jump |
|
|
||||||
| **JEQ** | `addr [, offset_reg]` | Jump if equal flag set |
|
|
||||||
| **JNE** | `addr [, offset_reg]` | Jump if not equal flag set |
|
|
||||||
| **JGT** | `addr [, offset_reg]` | Jump if greater than flag set |
|
|
||||||
| **JGE** | `addr [, offset_reg]` | Jump if greater or equal flags set |
|
|
||||||
| **JLT** | `addr [, offset_reg]` | Jump if less than flag set |
|
|
||||||
| **JLE** | `addr [, offset_reg]` | Jump if less or equal flags set |
|
|
||||||
|
|
||||||
**Examples:**
|
|
||||||
```asm
|
|
||||||
jmp start ; Jump to label 'start'
|
|
||||||
jmp 4, ret ; Jump to address (4 + ret register)
|
|
||||||
jeq end ; Jump to 'end' if equal flag set
|
|
||||||
jgt loop ; Jump to 'loop' if greater than flag set
|
|
||||||
```
|
|
||||||
### Arithmetic Instructions
|
|
||||||
|
|
||||||
| Mnemonic | Operands | Description |
|
|
||||||
|----------|----------|-------------|
|
|
||||||
| **ADD** | `src1_reg, src2_reg, dest_reg` | Addition |
|
|
||||||
| **SUB** | `src1_reg, src2_reg, dest_reg` | Subtraction |
|
|
||||||
| **IADD** | `src_reg, imm [, dest_reg]` | Immediate addition |
|
|
||||||
| **ISUB** | `src_reg, imm [, dest_reg]` | Immediate subtraction |
|
|
||||||
| **INC** | `reg` | Increment register by 1 |
|
|
||||||
| **DEC** | `reg` | Decrement register by 1 |
|
|
||||||
|
|
||||||
**Examples:**
|
|
||||||
```asm
|
|
||||||
add rg0, rg1, rg2 ; rg2 = rg0 + rg1
|
|
||||||
sub rg0, rg1, rg2 ; rg2 = rg0 - rg1
|
|
||||||
iadd rg0, 10 ; rg0 = rg0 + 10
|
|
||||||
// or using alternate syntax
|
|
||||||
addi rg0, 1 ; rg0 = rg0 + 1
|
|
||||||
inc rg0 ; rg0 = rg0 + 1
|
|
||||||
```
|
|
||||||
### Bitwise Operations
|
|
||||||
|
|
||||||
| Mnemonic | Operands | Description |
|
|
||||||
|----------|----------|-------------|
|
|
||||||
| **AND** | `src1_reg, src2_reg, dest_reg` | Bitwise AND |
|
|
||||||
| **OR** | `src1_reg, src2_reg, dest_reg` | Bitwise OR |
|
|
||||||
| **XOR** | `src1_reg, src2_reg, dest_reg` | Bitwise XOR |
|
|
||||||
| **NOT** | `src_reg, dest_reg` | Bitwise NOT |
|
|
||||||
| **NAND** | `src1_reg, src2_reg, dest_reg` | Bitwise NAND |
|
|
||||||
| **NOR** | `src1_reg, src2_reg, dest_reg` | Bitwise NOR |
|
|
||||||
| **XNOR** | `src1_reg, src2_reg, dest_reg` | Bitwise XNOR |
|
|
||||||
|
|
||||||
**Examples:**
|
|
||||||
```asm
|
|
||||||
and rg0, rg1, rg2 ; rg2 = rg0 & rg1
|
|
||||||
not rg0, rg1 ; rg1 = ~rg0
|
|
||||||
```
|
|
||||||
### Shift Operations
|
|
||||||
|
|
||||||
| Mnemonic | Operands | Description |
|
|
||||||
|----------|----------|-------------|
|
|
||||||
| **SHL** | `reg, shift_amount` | Shift left |
|
|
||||||
| **SHR** | `reg, shift_amount` | Shift right |
|
|
||||||
|
|
||||||
**Examples:**
|
|
||||||
```asm
|
|
||||||
shl rg0, 2 ; Shift rg0 left by 2 bits
|
|
||||||
shr rg0, 3 ; Shift rg0 right by 3 bits
|
|
||||||
```
|
|
||||||
### Comparison and Control
|
|
||||||
|
|
||||||
| Mnemonic | Operands | Description |
|
|
||||||
|----------|----------|-------------|
|
|
||||||
| **CMP** | `reg1, reg2` | Compare registers and set flags |
|
|
||||||
|
|
||||||
**Examples:**
|
|
||||||
```asm
|
|
||||||
cmp rg0, zero ; Compare rg0 with zero register
|
|
||||||
cmp rg1, rg2 ; Compare rg1 with rg2
|
|
||||||
```
|
|
||||||
### System Instructions
|
|
||||||
|
|
||||||
| Mnemonic | Operands | Description |
|
|
||||||
|----------|----------|-------------|
|
|
||||||
| **HLT** | - | Halt processor execution |
|
|
||||||
| **NOP** | - | No operation |
|
|
||||||
| **INT** | `interrupt_code` | Trigger interrupt |
|
|
||||||
| **IRT** | - | Return from interrupt |
|
|
||||||
|
|
||||||
**Examples:**
|
|
||||||
```asm
|
|
||||||
hlt ; Stop processor execution
|
|
||||||
int 0x21 ; Trigger interrupt 0x21
|
|
||||||
```
|
|
||||||
## Pseudo-Instructions
|
|
||||||
|
|
||||||
### Data Definition
|
|
||||||
|
|
||||||
| Mnemonic | Syntax | Description |
|
|
||||||
|----------|--------|-------------|
|
|
||||||
| **DB** | `name: value1 [, value2, ...]` | Define bytes |
|
|
||||||
| **DH** | `name: value1 [, value2, ...]` | Define half-words |
|
|
||||||
| **DW** | `name: value1 [, value2, ...]` | Define words |
|
|
||||||
|
|
||||||
**Examples:**
|
|
||||||
```asm
|
|
||||||
db message: "Hello World", 0
|
|
||||||
dh numbers: 1000, 2000, 3000
|
|
||||||
dw stack: 0x10000
|
|
||||||
```
|
|
||||||
### Memory Reservation
|
|
||||||
|
|
||||||
| Mnemonic | Syntax | Description |
|
|
||||||
|----------|--------|-------------|
|
|
||||||
| **RESB** | `name: size` | Reserve bytes |
|
|
||||||
| **RESH** | `name: size` | Reserve half-words |
|
|
||||||
| **RESW** | `name: size` | Reserve words |
|
|
||||||
|
|
||||||
**Examples:**
|
|
||||||
```asm
|
|
||||||
resb buffer: 256 ; Reserve 256 bytes
|
|
||||||
resh array: 100 ; Reserve space for 100 half-words
|
|
||||||
resw heap: 1024 ; Reserve space for 1024 words
|
|
||||||
```
|
|
||||||
### Stack Operations
|
|
||||||
|
|
||||||
| Mnemonic | Operands | Description |
|
|
||||||
|----------|----------|-------------|
|
|
||||||
| **PUSH** | `reg` | Push register value onto stack |
|
|
||||||
| **POP** | `reg` | Pop stack value into register |
|
|
||||||
|
|
||||||
**Examples:**
|
|
||||||
```asm
|
|
||||||
push rg0 ; Push rg0 value onto stack
|
|
||||||
pop ret ; Pop return address
|
|
||||||
```
|
|
||||||
### Memory Access Shortcuts
|
|
||||||
|
|
||||||
| Mnemonic | Operands | Description |
|
|
||||||
|----------|----------|-------------|
|
|
||||||
| **LWI** | `name, reg` | Load address into register |
|
|
||||||
|
|
||||||
**Examples:**
|
|
||||||
```asm
|
|
||||||
lwi string, rg1 ; Load address of 'string' into rg1
|
|
||||||
```
|
|
||||||
|
|
||||||
### Function Control
|
|
||||||
|
|
||||||
| Mnemonic | Operands | Description |
|
|
||||||
|----------|----------|-------------|
|
|
||||||
| **CALL** | `namespace::function` | Call a function with automatic return address management |
|
|
||||||
| **RETURN** | - | Return from a function to the caller |
|
|
||||||
|
|
||||||
**Examples:**
|
|
||||||
```asm
|
|
||||||
call print::print ; Call the print function from the print namespace
|
|
||||||
return ; Return from the current function
|
|
||||||
```
|
|
||||||
|
|
||||||
### Module System
|
|
||||||
|
|
||||||
| Mnemonic | Syntax | Description |
|
|
||||||
|----------|--------|-------------|
|
|
||||||
| **INCLUDE** | `module_name "path"` | Include module |
|
|
||||||
|
|
||||||
**Examples:**
|
|
||||||
```asm
|
|
||||||
include print "print.dsa"
|
|
||||||
include fib "fib.dsa"
|
|
||||||
```
|
|
||||||
## Library Examples
|
|
||||||
|
|
||||||
### Multiplication Library (multiply.dsa)
|
|
||||||
|
|
||||||
```asm
|
|
||||||
// multiply.dsa
|
|
||||||
// usage:
|
|
||||||
//
|
|
||||||
// include multiply "<relative path>"
|
|
||||||
//
|
|
||||||
// usage for multiply:
|
|
||||||
// push (arg1)
|
|
||||||
// push (arg0)
|
|
||||||
// call multiply::multiply
|
|
||||||
// pop (arg0)
|
|
||||||
// pop (arg1)
|
|
||||||
|
|
||||||
multiply:
|
|
||||||
push bpr
|
|
||||||
mov spr, bpr
|
|
||||||
|
|
||||||
ldw bpr, rg0, 8 // load op 1
|
|
||||||
ldw bpr, rg1, 12 // load op 2
|
|
||||||
|
|
||||||
lli 0, acc // initialize accumulator
|
|
||||||
|
|
||||||
start:
|
|
||||||
add acc, rg0, acc
|
|
||||||
dec rg1
|
|
||||||
|
|
||||||
cmp rg1, zero
|
|
||||||
jgt start
|
|
||||||
|
|
||||||
end:
|
|
||||||
stw acc, bpr, 8 // store result for caller
|
|
||||||
mov bpr, spr
|
|
||||||
pop bpr
|
|
||||||
return
|
|
||||||
```
|
|
||||||
|
|
||||||
### Print Library (print.dsa)
|
|
||||||
|
|
||||||
```asm
|
|
||||||
// print.dsa
|
|
||||||
// usage:
|
|
||||||
//
|
|
||||||
// include print "<relative path>"
|
|
||||||
//
|
|
||||||
// usage for print:
|
|
||||||
// push (register containing address of string)
|
|
||||||
// call print::print
|
|
||||||
// pop zero
|
|
||||||
//
|
|
||||||
// usage for reset:
|
|
||||||
// call print::reset
|
|
||||||
|
|
||||||
dw display: 0x20000
|
|
||||||
dw current: 0x20000
|
|
||||||
|
|
||||||
// prints the given text to the screen.
|
|
||||||
print:
|
|
||||||
push bpr
|
|
||||||
mov spr, bpr
|
|
||||||
|
|
||||||
ldw bpr, rg0, 8 // get string address argument
|
|
||||||
ldw current, rg1 // get current display position
|
|
||||||
|
|
||||||
print_loop:
|
|
||||||
ldb rg0, acc
|
|
||||||
stb acc, rg1
|
|
||||||
|
|
||||||
iadd rg0, 1
|
|
||||||
iadd rg1, 1
|
|
||||||
|
|
||||||
cmp acc, zero
|
|
||||||
jne print_loop
|
|
||||||
jmp end
|
|
||||||
|
|
||||||
// return
|
|
||||||
end:
|
|
||||||
stw rg1, current
|
|
||||||
|
|
||||||
mov bpr, spr
|
|
||||||
pop bpr
|
|
||||||
return
|
|
||||||
|
|
||||||
// resets the cursor position on the screen
|
|
||||||
reset:
|
|
||||||
push bpr
|
|
||||||
mov spr, bpr
|
|
||||||
ldw display, rg1
|
|
||||||
stw rg1, current
|
|
||||||
mov bpr, spr
|
|
||||||
pop bpr
|
|
||||||
return
|
|
||||||
```
|
|
||||||
|
|
||||||
### Example Program (main.dsa)
|
|
||||||
|
|
||||||
```asm
|
|
||||||
include print "./print.dsa"
|
|
||||||
|
|
||||||
dw stack: 0x10000
|
|
||||||
db string: "'To confuse your enemy, you must first confuse yourself' - Probably Sun Tzu."
|
|
||||||
|
|
||||||
init:
|
|
||||||
// set up a stack.
|
|
||||||
ldw stack, bpr
|
|
||||||
mov bpr, spr
|
|
||||||
|
|
||||||
start:
|
|
||||||
lwi string, rg1
|
|
||||||
|
|
||||||
// push string address argument
|
|
||||||
push rg1
|
|
||||||
// call print function
|
|
||||||
call print::print
|
|
||||||
// clean up stack
|
|
||||||
pop rg1
|
|
||||||
|
|
||||||
hlt
|
|
||||||
```
|
|
||||||
@@ -1,10 +0,0 @@
|
|||||||
# DSA File formatting specification.
|
|
||||||
|
|
||||||
First, a clarification on what formats this document references.
|
|
||||||
|
|
||||||
- .dsb: DSA Binary object, similar to a .o object file
|
|
||||||
- .dse: DSA Executable file, similar to a .exe/ELF binary
|
|
||||||
|
|
||||||
## Format Specification
|
|
||||||
|
|
||||||
### DSB binary format
|
|
||||||
Reference in New Issue
Block a user