From b0670d1e6c9bc24321ac32265898a3ff9c61abf3 Mon Sep 17 00:00:00 2001 From: zxq5 Date: Tue, 17 Jun 2025 23:48:06 +0100 Subject: [PATCH] assembler updates --- assembler/src/lib.rs | 117 +++++++++++++++++- assembler/src/parser.rs | 252 +++++++++++++++----------------------- assembler/src/resolver.rs | 60 +++++++++ 3 files changed, 268 insertions(+), 161 deletions(-) create mode 100644 assembler/src/resolver.rs diff --git a/assembler/src/lib.rs b/assembler/src/lib.rs index d7fdb55..0206a4d 100644 --- a/assembler/src/lib.rs +++ b/assembler/src/lib.rs @@ -11,11 +11,13 @@ use common::prelude::Instruction; use crate::{ model::{Node, Token, TokenType}, parser::{Parser, Program}, + resolver::resolve_dependencies, }; pub mod lexer; pub mod model; pub mod parser; +pub mod resolver; pub fn assemble(src: &PathBuf) -> Vec { let mut modules = HashSet::::new(); @@ -59,14 +61,19 @@ fn prepare_dependency( let tokens = lexer::lexer(src, file_hash)?; log(&format!("{:20} {:20}", "Parsing", filename)); - let mut parser = Parser::new(tokens); + let parsed = Parser::parse_nodes(tokens)?; log(&format!("{:20} {:20}", "Resolving Deps", filename)); - let deps = parser - .parse_nodes()? - .resolve_dependencies()? - .get_dependencies()?; - program.add_module(parser.get()); + let nodes = resolve_dependencies(parsed)?; + let deps = Parser::get_dependencies(&nodes)?; + + log(&format!( + "{:20} {:20}", + "Expanding PseudoInstructions", filename + )); + let nodes = Parser::expand_pseudo_ops(nodes, file_hash)?; + + program.add_module(nodes); for dep in deps { log(&format!( @@ -100,6 +107,7 @@ pub fn disassemble(_: Vec) -> String { #[derive(Debug)] pub enum AssembleError { Generic, + UnexpectedEof, InvalidFile(PathBuf), UnexpectedToken(Token, TokenType), } @@ -111,6 +119,7 @@ impl fmt::Display for AssembleError { AssembleError::UnexpectedToken(tok, expected) => { write!(f, "Unexpected token {tok:?}, expected {expected:?}") } + AssembleError::UnexpectedEof => write!(f, "Unexpected end of file"), AssembleError::InvalidFile(path) => write!(f, "Invalid file {path:?}"), } } @@ -125,3 +134,99 @@ fn quick_hash(value: &PathBuf) -> u64 { fn log(message: &str) { println!("\x1b[32mINFO:\x1b[0m {}", message); } + +// create a macro that lexes and parses the input string into Nodes +#[macro_export] +#[macro_use] +macro_rules! dsa { + // Version with formatting arguments + ($hash:expr, $input:expr, $($args:expr),+) => {{ + let input = format!($input, $($args),+); + let tokens = crate::lexer::lexer(input, $hash)?; + let parsed = crate::parser::Parser::parse_nodes(tokens)?; + parsed + }}; + // Version without formatting + ($hash:expr, $input:expr) => {{ + let input = String::from($input); + let tokens = crate::lexer::lexer(input, $hash)?; + let parsed = crate::parser::Parser::parse_nodes(tokens)?; + parsed + }}; +} + +#[macro_export] +#[macro_use] +macro_rules! expect_token { + ($token:expr, Symbol) => { + match $token { + Token::Symbol(value) => Ok(value.clone()), + other => Err(AssembleError::UnexpectedToken( + other.clone(), + TokenType::Symbol, + )), + } + }; + ($token:expr, Register) => { + match $token { + Token::Register(value) => Ok(value.clone()), + other => Err(AssembleError::UnexpectedToken( + other.clone(), + TokenType::Register, + )), + } + }; + ($token:expr, Immediate) => { + match $token { + Token::Immediate(value) => Ok(value.clone()), + other => Err(AssembleError::UnexpectedToken( + other.clone(), + TokenType::Immediate, + )), + } + }; + ($token:expr, StringLit) => { + match $token { + Token::StringLit(value) => Ok(value.clone()), + other => Err(AssembleError::UnexpectedToken( + other.clone(), + TokenType::StringLit, + )), + } + }; + ($token:expr, Opcode) => { + match $token { + Token::Opcode(value) => Ok(value.clone()), + other => Err(AssembleError::UnexpectedToken( + other.clone(), + TokenType::Opcode, + )), + } + }; +} + +#[macro_export] +#[macro_use] +macro_rules! expect_type { + ($token:expr, $($variant:ident),+) => {{ + let token = $token; + match &token { + $( + Token::$variant(_) => Ok(token.clone()), + )+ + other => { + let expected_type = expect_type!(@get_first_type $($variant),+); + Err(AssembleError::UnexpectedToken( + other.clone(), + expected_type, + )) + } + } + }}; + + (@get_first_type Symbol $(, $rest:ident)*) => { TokenType::Symbol }; + (@get_first_type Register $(, $rest:ident)*) => { TokenType::Register }; + (@get_first_type Immediate $(, $rest:ident)*) => { TokenType::Immediate }; + (@get_first_type StringLit $(, $rest:ident)*) => { TokenType::StringLit }; + (@get_first_type Opcode $(, $rest:ident)*) => { TokenType::Opcode }; +} diff --git a/assembler/src/parser.rs b/assembler/src/parser.rs index eb1457e..a52fe79 100644 --- a/assembler/src/parser.rs +++ b/assembler/src/parser.rs @@ -5,7 +5,7 @@ use std::str::FromStr; use common::prelude::{Instruction, Register}; use crate::model::{Module, Node, Opcode, Symbol, Token, TokenType}; -use crate::{AssembleError, quick_hash}; +use crate::{AssembleError, dsa, expect_token, expect_type, quick_hash}; pub struct Parser { tokens: Vec, @@ -34,190 +34,116 @@ impl Program { } impl Parser { - pub fn new(tokens: Vec) -> Parser { - Parser { + pub fn parse_nodes(tokens: Vec) -> Result, AssembleError> { + let mut self_ = Parser { tokens: tokens.into_iter().rev().collect(), nodes: vec![], - } - } + }; - pub fn parse_nodes(&mut self) -> Result<&mut Self, AssembleError> { - while !self.tokens.is_empty() { - let ins = self.parse_instruction()?; - self.nodes.push(ins); + while !self_.tokens.is_empty() { + let ins = self_.parse_instruction()?; + self_.nodes.push(ins); } - Ok(self) + Ok(self_.nodes.clone()) } - pub fn get_dependencies(&mut self) -> Result, AssembleError> { + pub fn get_dependencies(nodes: &Vec) -> Result, AssembleError> { let mut dependencies = Vec::new(); - for node in &self.nodes { + for node in nodes { if let Opcode::Include = node.1 { - if let Token::StringLit(path) = node.2.get(1).unwrap() { - dependencies.push(PathBuf::from(path)); - } + let path = expect_token!(node.2.get(1).unwrap(), StringLit)?; + dependencies.push(PathBuf::from(path)); } } Ok(dependencies) } - pub fn resolve_dependencies(&mut self) -> Result<&mut Self, AssembleError> { - // first we get a list of imports - let mut dependencies = Vec::new(); - for node in &self.nodes { - if let Opcode::Include = node.1 { - // we want the path, and the name - let name = if let Token::Symbol(name) = node.2.get(0).unwrap() { - name.name.clone() - } else { - unreachable!() - }; //node.2.get(0).unwrap() - let path = if let Token::StringLit(path) = node.2.get(1).unwrap() { - path - } else { - unreachable!() - }; - let hash = quick_hash(&PathBuf::from(path).canonicalize().unwrap()); + pub fn expand_pseudo_ops( + mut nodes: Vec, + module: u64, + ) -> Result, AssembleError> { + let mut result = Vec::::with_capacity(nodes.len()); - dependencies.push((name, hash)); - } - } - - let mut changes = Vec::<(u32, u32, Symbol)>::new(); - // now we resolve the symbols on all the nodes - // we need to check all operands for unresolved signals - for (i, node) in self.nodes.clone().iter().enumerate() { - let Node(_, _, operands) = node; - for (j, token) in operands.iter().enumerate() { - if let Token::Symbol(symbol) = token { - for d in &dependencies { - if let Module::Unresolved(name) = symbol.module.clone() { - if name != d.0 { - continue; - } - - let symbol = Symbol { - name: symbol.name.clone(), - module: Module::Resolved(d.1), - }; - changes.push((i as u32, j as u32, symbol)); - } - } - } - } - } - - for (i, j, symbol) in changes { - self.nodes[i as usize].2[j as usize] = Token::Symbol(symbol); - } - - Ok(self) - } - - pub fn get(&self) -> Vec { - self.nodes.clone() - } - - pub fn expand_pseudo_ops(&mut self) -> Result<&mut Self, AssembleError> { - for node in self.nodes.iter_mut() { + for node in nodes.iter_mut() { match node.1 { - Opcode::Db | Opcode::Dh | Opcode::Dw => todo!(), - Opcode::Resb | Opcode::Resh | Opcode::Resw => todo!(), - + // Opcode::Db | Opcode::Dh | Opcode::Dw => todo!(), + // Opcode::Resb | Opcode::Resh | Opcode::Resw => todo!(), Opcode::Push => { // inc SPR // STW reg, SPR let label = node.0.clone(); - let reg = node.2.get(0).unwrap(); + let reg = expect_token!(node.2.get(0).unwrap(), Register)?; - vec![ - Node( - label.clone(), - Opcode::Inc, - vec![Token::Register(Register::Spr)], - ), - Node( - label.clone(), - Opcode::Stw, - vec![reg.clone(), Token::Register(Register::Spr)], - ), - ]; + match label { + Some(label) => result.extend(dsa!( + module, + "{}: iadd spr, 4\n stw {}, spr", + label, + reg + )), + None => { + result.extend(dsa!(module, "iadd spr, 4\n stw {}, spr", reg)) + } + } } - _ => continue, + _ => result.push(node.clone()), } } - Ok(self) - } - - pub fn resolve_symbols(&mut self) -> Result<&mut Self, AssembleError> { - Ok(self) - } - - pub fn instructions(&mut self) -> Vec { - vec![] + Ok(result) } fn parse_instruction(&mut self) -> Result { + println!("tokens: {:?}", self.tokens); + if self.tokens.is_empty() { unreachable!(); } - let label = if let Token::Symbol(label) = self - .tokens - .last() - .expect("parse_instruction should not have been called if this is none!!") - { - Some(label.clone()) - } else { - None - }; - + // check if the Node starts with a label + let label = expect_token!(self.peek_next()?, Symbol).ok(); if label.is_some() { self.tokens.pop(); } - let opcode = match self.expect(TokenType::Opcode)? { - Token::Opcode(opcode) => opcode, - _ => unreachable!(), - }; - + let opcode = expect_token!(self.next()?, Opcode)?; let args: Vec; match opcode { // R-type instructions Opcode::Mov | Opcode::Movs => { - let reg1 = self.expect_any(&[TokenType::Register, TokenType::Symbol])?; - let reg2 = self.expect_any(&[TokenType::Register, TokenType::Symbol])?; + let reg1 = expect_type!(self.next()?, Register, Symbol)?; + let reg2 = expect_type!(self.next()?, Register, Symbol)?; args = vec![reg1, reg2]; } Opcode::Ldb | Opcode::Ldbs | Opcode::Ldh | Opcode::Ldhs | Opcode::Ldw => { - let base = self.expect_any(&[TokenType::Register, TokenType::Symbol])?; - let dest = self.expect_any(&[TokenType::Register, TokenType::Symbol])?; - let offset = - self.maybe_expect(&[TokenType::Register, TokenType::Immediate]); - if offset.is_some() { - self.tokens.pop(); - args = vec![base, offset.unwrap(), dest]; - } else { - args = vec![base, Token::Immediate(0), dest]; + let base = expect_type!(self.next()?, Register, Symbol)?; + let dest = expect_type!(self.next()?, Register, Symbol)?; + + let mut offset = Token::Immediate(0); + if let Ok(next) = self.peek_next() { + if let Ok(_) = expect_type!(next, Register, Immediate) { + offset = self.next()?; + } } + + args = vec![base, offset, dest]; } Opcode::Stb | Opcode::Sth | Opcode::Stw => { - let base = self.expect_any(&[TokenType::Register, TokenType::Symbol])?; - let dest = self.expect_any(&[TokenType::Register, TokenType::Symbol])?; + let base = expect_type!(self.next()?, Register, Symbol)?; + let dest = expect_type!(self.next()?, Register, Symbol)?; - let offset = - self.maybe_expect(&[TokenType::Register, TokenType::Immediate]); - if offset.is_some() { - self.tokens.pop(); - args = vec![base, offset.unwrap(), dest]; - } else { - args = vec![base, Token::Immediate(0), dest]; + let mut offset = Token::Immediate(0); + if let Ok(next) = self.peek_next() { + if let Ok(_) = expect_type!(next, Register, Immediate) { + offset = self.next()?; + } } + + args = vec![base, offset, dest]; } Opcode::Add @@ -228,32 +154,32 @@ impl Parser { | Opcode::Nand | Opcode::Nor | Opcode::Xnor => { - let src1 = self.expect_any(&[TokenType::Register, TokenType::Symbol])?; - let src2 = self.expect_any(&[TokenType::Register, TokenType::Symbol])?; - let dest = self.expect_any(&[TokenType::Register, TokenType::Symbol])?; + let src1 = expect_type!(self.next()?, Register, Symbol)?; + let src2 = expect_type!(self.next()?, Register, Symbol)?; + let dest = expect_type!(self.next()?, Register, Symbol)?; args = vec![src1, src2, dest]; } Opcode::Not | Opcode::Cmp => { - let reg1 = self.expect_any(&[TokenType::Register, TokenType::Symbol])?; - let reg2 = self.expect_any(&[TokenType::Register, TokenType::Symbol])?; + let reg1 = expect_type!(self.next()?, Register, Symbol)?; + let reg2 = expect_type!(self.next()?, Register, Symbol)?; args = vec![reg1, reg2]; } Opcode::Shl | Opcode::Shr => { - let reg = self.expect_any(&[TokenType::Register, TokenType::Symbol])?; - let num = self.expect(TokenType::Immediate)?; + let reg = expect_type!(self.next()?, Register, Symbol)?; + let num = expect_type!(self.next()?, Immediate)?; args = vec![reg, num]; } Opcode::Inc | Opcode::Dec => { - let reg = self.expect_any(&[TokenType::Register, TokenType::Symbol])?; + let reg = expect_type!(self.next()?, Register, Symbol)?; args = vec![reg]; } Opcode::Include => { - let mod_name = self.expect(TokenType::Symbol)?; - let path = self.expect(TokenType::StringLit)?; + let mod_name = expect_type!(self.next()?, Symbol)?; + let path = expect_type!(self.next()?, StringLit)?; args = vec![mod_name, path]; } @@ -265,20 +191,20 @@ impl Parser { | Opcode::Jge | Opcode::Jlt | Opcode::Jle => { - let imm = self.expect_any(&[TokenType::Immediate, TokenType::Symbol])?; + let imm = expect_type!(self.next()?, Immediate, Symbol)?; args = vec![imm]; } // I-type instructions Opcode::Lui | Opcode::Lli | Opcode::Lwi | Opcode::Iadd | Opcode::Isub => { - let imm = self.expect_any(&[TokenType::Immediate, TokenType::Symbol])?; - let reg = self.expect(TokenType::Register)?; + let reg = expect_type!(self.next()?, Register)?; + let imm = expect_type!(self.next()?, Immediate, Symbol)?; args = vec![reg, imm]; } // D-type pseudoinstructions (data definition) Opcode::Resb | Opcode::Resh | Opcode::Resw => { - let num = self.expect(TokenType::Immediate)?; + let num = expect_type!(self.next()?, Immediate)?; args = vec![num]; } @@ -288,18 +214,18 @@ impl Parser { // E-type pseudoinstructions (stack operations) Opcode::Push => { - let reg = self.expect_any(&[TokenType::Register, TokenType::Symbol])?; + let reg = expect_type!(self.next()?, Register, Symbol)?; args = vec![reg]; } Opcode::Pop => { - let reg = self.expect_any(&[TokenType::Register, TokenType::Symbol])?; + let reg = expect_type!(self.next()?, Register, Symbol)?; args = vec![reg]; } // Special instructions Opcode::Int => { - let val = self.expect(TokenType::Immediate)?; + let val = expect_type!(self.next()?, Immediate)?; args = vec![val]; } @@ -369,8 +295,24 @@ impl Parser { Ok(values) } + fn next(&mut self) -> Result { + if self.tokens.is_empty() { + Err(AssembleError::UnexpectedEof) + } else { + Ok(self.tokens.pop().unwrap()) + } + } + + fn peek_next(&mut self) -> Result { + if self.tokens.is_empty() { + Err(AssembleError::UnexpectedEof) + } else { + Ok(self.tokens.last().unwrap().clone()) + } + } + fn expect(&mut self, type_: TokenType) -> Result { - let tok = self.tokens.pop().unwrap(); + let tok = self.next()?; if TokenType::from_token(&tok) == type_ { Ok(tok) @@ -380,7 +322,7 @@ impl Parser { } fn expect_any(&mut self, types: &[TokenType]) -> Result { - let tok = self.tokens.pop().unwrap(); + let tok = self.next()?; if types.contains(&TokenType::from_token(&tok)) { Ok(tok) @@ -390,9 +332,9 @@ impl Parser { } fn maybe_expect(&mut self, types: &[TokenType]) -> Option { - let tok = self.tokens.last().unwrap(); + let tok = self.peek_next().ok()?; - if types.contains(&TokenType::from_token(tok)) { + if types.contains(&TokenType::from_token(&tok)) { Some(tok.clone()) } else { None diff --git a/assembler/src/resolver.rs b/assembler/src/resolver.rs new file mode 100644 index 0000000..c0f10b1 --- /dev/null +++ b/assembler/src/resolver.rs @@ -0,0 +1,60 @@ +use std::path::PathBuf; + +use crate::{ + AssembleError, + model::{Module, Node, Opcode, Symbol, Token}, + quick_hash, +}; + +pub fn resolve_dependencies(mut nodes: Vec) -> Result, AssembleError> { + // first we get a list of imports + let mut dependencies = Vec::new(); + for node in &nodes { + if let Opcode::Include = node.1 { + // we want the path, and the name + let name = if let Token::Symbol(name) = node.2.get(0).unwrap() { + name.name.clone() + } else { + unreachable!() + }; //node.2.get(0).unwrap() + let path = if let Token::StringLit(path) = node.2.get(1).unwrap() { + path + } else { + unreachable!() + }; + let hash = quick_hash(&PathBuf::from(path).canonicalize().unwrap()); + + dependencies.push((name, hash)); + } + } + + let mut changes = Vec::<(u32, u32, Symbol)>::new(); + // now we resolve the symbols on all the nodes + // we need to check all operands for unresolved signals + for (i, node) in nodes.clone().iter().enumerate() { + let Node(_, _, operands) = node; + for (j, token) in operands.iter().enumerate() { + if let Token::Symbol(symbol) = token { + for d in &dependencies { + if let Module::Unresolved(name) = symbol.module.clone() { + if name != d.0 { + continue; + } + + let symbol = Symbol { + name: symbol.name.clone(), + module: Module::Resolved(d.1), + }; + changes.push((i as u32, j as u32, symbol)); + } + } + } + } + } + + for (i, j, symbol) in changes { + nodes[i as usize].2[j as usize] = Token::Symbol(symbol); + } + + Ok(nodes) +}