assembler still very broken, dependency resolution works, now working on expanding pseudoinstructions

This commit is contained in:
2025-06-17 03:11:22 +01:00
parent 88a1c9f245
commit 87fbd6c362
9 changed files with 604 additions and 292 deletions
+144 -256
View File
@@ -1,73 +1,35 @@
use core::fmt;
use std::path::PathBuf;
use std::str::FromStr;
use crate::AssembleError;
use crate::lexer::{Symbol, Token};
use common::prelude::{Instruction, Register};
use crate::model::{Module, Node, Opcode, Symbol, Token, TokenType};
use crate::{AssembleError, quick_hash};
pub struct Parser {
tokens: Vec<Token>,
nodes: Vec<Node>,
}
#[derive(Debug, PartialEq, Copy, Clone)]
pub enum TokenType {
Symbol,
Register,
Immediate,
StringLit,
Opcode,
pub struct Program {
pub nodes: Vec<Node>,
}
impl TokenType {
fn from_token(token: &Token) -> TokenType {
match token {
Token::Symbol(_) => TokenType::Symbol,
Token::Register(_) => TokenType::Register,
Token::Immediate(_) => TokenType::Immediate,
Token::StringLit(_) => TokenType::StringLit,
Token::Opcode(_) => TokenType::Opcode,
}
impl Program {
pub fn new() -> Program {
Program { nodes: vec![] }
}
}
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
// TODO: MAKE SURE I DO THE BIT SHIFT FOR LUI CODEGEN
pub fn add_module(&mut self, module: Vec<Node>) {
self.nodes.extend(module);
}
#[derive(Debug)]
#[expect(dead_code)]
pub struct Node(Option<Symbol>, Opcode, Vec<Token>);
impl Iterator for Parser {
type Item = Result<Node, AssembleError>;
fn next(&mut self) -> Option<Result<Node, AssembleError>> {
if self.tokens.is_empty() {
return None;
pub fn parser(&mut self) -> Parser {
Parser {
tokens: vec![],
nodes: self.nodes.clone(),
}
Some(self.parse_instruction())
}
}
@@ -75,9 +37,128 @@ impl Parser {
pub fn new(tokens: Vec<Token>) -> Parser {
Parser {
tokens: tokens.into_iter().rev().collect(),
nodes: vec![],
}
}
pub fn parse_nodes(&mut self) -> Result<&mut Self, AssembleError> {
while !self.tokens.is_empty() {
let ins = self.parse_instruction()?;
self.nodes.push(ins);
}
Ok(self)
}
pub fn get_dependencies(&mut self) -> Result<Vec<PathBuf>, AssembleError> {
let mut dependencies = Vec::new();
for node in &self.nodes {
if let Opcode::Include = node.1 {
if let Token::StringLit(path) = node.2.get(1).unwrap() {
dependencies.push(PathBuf::from(path));
}
}
}
Ok(dependencies)
}
pub fn resolve_dependencies(&mut self) -> Result<&mut Self, AssembleError> {
// first we get a list of imports
let mut dependencies = Vec::new();
for node in &self.nodes {
if let Opcode::Include = node.1 {
// we want the path, and the name
let name = if let Token::Symbol(name) = node.2.get(0).unwrap() {
name.name.clone()
} else {
unreachable!()
}; //node.2.get(0).unwrap()
let path = if let Token::StringLit(path) = node.2.get(1).unwrap() {
path
} else {
unreachable!()
};
let hash = quick_hash(&PathBuf::from(path).canonicalize().unwrap());
dependencies.push((name, hash));
}
}
let mut changes = Vec::<(u32, u32, Symbol)>::new();
// now we resolve the symbols on all the nodes
// we need to check all operands for unresolved signals
for (i, node) in self.nodes.clone().iter().enumerate() {
let Node(_, _, operands) = node;
for (j, token) in operands.iter().enumerate() {
if let Token::Symbol(symbol) = token {
for d in &dependencies {
if let Module::Unresolved(name) = symbol.module.clone() {
if name != d.0 {
continue;
}
let symbol = Symbol {
name: symbol.name.clone(),
module: Module::Resolved(d.1),
};
changes.push((i as u32, j as u32, symbol));
}
}
}
}
}
for (i, j, symbol) in changes {
self.nodes[i as usize].2[j as usize] = Token::Symbol(symbol);
}
Ok(self)
}
pub fn get(&self) -> Vec<Node> {
self.nodes.clone()
}
pub fn expand_pseudo_ops(&mut self) -> Result<&mut Self, AssembleError> {
for node in self.nodes.iter_mut() {
match node.1 {
Opcode::Db | Opcode::Dh | Opcode::Dw => todo!(),
Opcode::Resb | Opcode::Resh | Opcode::Resw => todo!(),
Opcode::Push => {
// inc SPR
// STW reg, SPR
let label = node.0.clone();
let reg = node.2.get(0).unwrap();
vec![
Node(
label.clone(),
Opcode::Inc,
vec![Token::Register(Register::Spr)],
),
Node(
label.clone(),
Opcode::Stw,
vec![reg.clone(), Token::Register(Register::Spr)],
),
];
}
_ => continue,
}
}
Ok(self)
}
pub fn resolve_symbols(&mut self) -> Result<&mut Self, AssembleError> {
Ok(self)
}
pub fn instructions(&mut self) -> Vec<Instruction> {
vec![]
}
fn parse_instruction(&mut self) -> Result<Node, AssembleError> {
if self.tokens.is_empty() {
unreachable!();
@@ -170,6 +251,12 @@ impl Parser {
args = vec![reg];
}
Opcode::Include => {
let mod_name = self.expect(TokenType::Symbol)?;
let path = self.expect(TokenType::StringLit)?;
args = vec![mod_name, path];
}
// J-type instructions
Opcode::Jmp
| Opcode::Jeq
@@ -312,202 +399,3 @@ impl Parser {
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum Opcode {
// Real instructions (0x00-0x26)
Nop,
Mov,
Movs,
Ldb,
Ldbs,
Ldh,
Ldhs,
Ldw,
Stb,
Sth,
Stw,
Lli,
Lui,
Jmp,
Jeq,
Jne,
Jgt,
Jge,
Jlt,
Jle,
Cmp,
Inc,
Dec,
Shl,
Shr,
Add,
Sub,
And,
Or,
Not,
Xor,
Nand,
Nor,
Xnor,
Int,
Irt,
Hlt,
Iadd,
Isub,
// Pseudo-instructions
Db,
Dh,
Dw,
Resb,
Resh,
Resw,
Push,
Pop,
Lwi,
}
#[derive(Debug)]
pub enum OpcodeFromStrError {
InvalidRegister(&'static str),
}
impl std::fmt::Display for OpcodeFromStrError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::InvalidRegister(reg) => write!(f, "register does not exist: {reg}"),
}
}
}
impl std::error::Error for OpcodeFromStrError {}
impl FromStr for Opcode {
type Err = OpcodeFromStrError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"nop" => Ok(Self::Nop),
"mov" => Ok(Self::Mov),
"movs" => Ok(Self::Movs),
"ldb" => Ok(Self::Ldb),
"ldbs" => Ok(Self::Ldbs),
"ldh" => Ok(Self::Ldh),
"ldhs" => Ok(Self::Ldhs),
"ldw" => Ok(Self::Ldw),
"stb" => Ok(Self::Stb),
"sth" => Ok(Self::Sth),
"stw" => Ok(Self::Stw),
"lli" => Ok(Self::Lli),
"lui" => Ok(Self::Lui),
"jmp" => Ok(Self::Jmp),
"jeq" => Ok(Self::Jeq),
"jne" => Ok(Self::Jne),
"jgt" => Ok(Self::Jgt),
"jge" => Ok(Self::Jge),
"jlt" => Ok(Self::Jlt),
"jle" => Ok(Self::Jle),
"cmp" => Ok(Self::Cmp),
"inc" => Ok(Self::Inc),
"dec" => Ok(Self::Dec),
"shl" => Ok(Self::Shl),
"shr" => Ok(Self::Shr),
"add" => Ok(Self::Add),
"sub" => Ok(Self::Sub),
"and" => Ok(Self::And),
"or" => Ok(Self::Or),
"not" => Ok(Self::Not),
"xor" => Ok(Self::Xor),
"nand" => Ok(Self::Nand),
"nor" => Ok(Self::Nor),
"xnor" => Ok(Self::Xnor),
"int" => Ok(Self::Int),
"irt" => Ok(Self::Irt),
"hlt" => Ok(Self::Hlt),
"iadd" => Ok(Self::Iadd),
"isub" => Ok(Self::Isub),
"db" => Ok(Self::Db),
"dh" => Ok(Self::Dh),
"dw" => Ok(Self::Dw),
"resb" => Ok(Self::Resb),
"resh" => Ok(Self::Resh),
"resw" => Ok(Self::Resw),
"push" => Ok(Self::Push),
"pop" => Ok(Self::Pop),
"lwi" => Ok(Self::Lwi),
_ => Err(OpcodeFromStrError::InvalidRegister("unknown opcode")),
}
}
}
impl Opcode {
pub const OPCODES: &[&str] = &[
// Real instructions (0x00-0x26)
"nop", "mov", "movs", "ldb", "ldbs", "ldh", "ldhs", "ldw", "stb", "sth", "stw",
"lli", "lui", "jmp", "jeq", "jne", "jgt", "jge", "jlt", "jle", "cmp", "inc",
"dec", "shl", "shr", "add", "sub", "and", "or", "not", "xor", "nand", "nor",
"xnor", "int", "irt", "hlt", "iadd", "isub", // Pseudo-instructions
"db", "dh", "dw", "resb", "resh", "resw", "push", "pop", "lwi",
];
pub fn to_opcode_value(&self) -> Option<u8> {
match self {
Self::Nop => Some(0x00),
Self::Mov => Some(0x01),
Self::Movs => Some(0x02),
Self::Ldb => Some(0x03),
Self::Ldbs => Some(0x04),
Self::Ldh => Some(0x05),
Self::Ldhs => Some(0x06),
Self::Ldw => Some(0x07),
Self::Stb => Some(0x08),
Self::Sth => Some(0x09),
Self::Stw => Some(0x0A),
Self::Lli => Some(0x0B),
Self::Lui => Some(0x0C),
Self::Jmp => Some(0x0D),
Self::Jeq => Some(0x0E),
Self::Jne => Some(0x0F),
Self::Jgt => Some(0x10),
Self::Jge => Some(0x11),
Self::Jlt => Some(0x12),
Self::Jle => Some(0x13),
Self::Cmp => Some(0x14),
Self::Inc => Some(0x15),
Self::Dec => Some(0x16),
Self::Shl => Some(0x17),
Self::Shr => Some(0x18),
Self::Add => Some(0x19),
Self::Sub => Some(0x1A),
Self::And => Some(0x1B),
Self::Or => Some(0x1C),
Self::Not => Some(0x1D),
Self::Xor => Some(0x1E),
Self::Nand => Some(0x1F),
Self::Nor => Some(0x20),
Self::Xnor => Some(0x21),
Self::Int => Some(0x22),
Self::Irt => Some(0x23),
Self::Hlt => Some(0x24),
Self::Iadd => Some(0x25),
Self::Isub => Some(0x26),
// Pseudo-instructions don't have opcode values
_ => None,
}
}
pub fn is_pseudo_instruction(&self) -> bool {
matches!(
self,
Self::Db
| Self::Dh
| Self::Dw
| Self::Resb
| Self::Resh
| Self::Resw
| Self::Push
| Self::Pop
| Self::Lwi
)
}
}