From 1210b193330144fade06bdb16949c00b9a91b1b2 Mon Sep 17 00:00:00 2001 From: zxq5 Date: Wed, 18 Jun 2025 03:54:39 +0100 Subject: [PATCH] asm done with parsing and linking. codegen all that's left --- assembler/src/codegen.rs | 6 ++ assembler/src/expand.rs | 189 ++++++++++++++++++++++++++++++++++++++ assembler/src/lib.rs | 24 +++-- assembler/src/model.rs | 81 ++++++++++++++-- assembler/src/parser.rs | 56 +++-------- assembler/src/resolver.rs | 73 +++++++++++++-- 6 files changed, 366 insertions(+), 63 deletions(-) create mode 100644 assembler/src/codegen.rs create mode 100644 assembler/src/expand.rs diff --git a/assembler/src/codegen.rs b/assembler/src/codegen.rs new file mode 100644 index 0000000..c32381d --- /dev/null +++ b/assembler/src/codegen.rs @@ -0,0 +1,6 @@ +use common::{ + instructions, + prelude::{Instruction, RTypeArgs}, +}; + +use crate::model::{Node, Opcode}; diff --git a/assembler/src/expand.rs b/assembler/src/expand.rs new file mode 100644 index 0000000..63a52f8 --- /dev/null +++ b/assembler/src/expand.rs @@ -0,0 +1,189 @@ +use common::prelude::Register; + +use crate::{ + AssembleError, dsa, expect_token, expect_type, + model::{Node, Opcode, Token, TokenType}, + node, +}; + +pub fn expand_pseudo_ops( + mut nodes: Vec, + module: u64, +) -> Result, AssembleError> { + let mut result = Vec::::with_capacity(nodes.len()); + + for node in nodes.iter_mut() { + if let Err(_) = try_expand(node.clone(), &mut result, module) { + result.push(node.clone()); + } + } + + Ok(result) +} + +fn try_expand( + mut node: Node, + result: &mut Vec, + module: u64, +) -> Result<(), AssembleError> { + match node.opcode() { + Opcode::Push => expand_push(node.clone(), result)?, + Opcode::Pop => expand_pop(node.clone(), result)?, + Opcode::Ldb | Opcode::Ldh | Opcode::Ldw => expand_ldx(node.clone(), result)?, + Opcode::Lwi => expand_lwi(node.clone(), result)?, + Opcode::Resb | Opcode::Resh | Opcode::Resw => expand_resx(node.clone(), result)?, + Opcode::Db | Opcode::Dh | Opcode::Dw => expand_dx(node.clone(), result)?, + _ => result.push(node.clone()), + }; + Ok(()) +} + +fn expand_push(current: Node, nodes: &mut Vec) -> Result<(), AssembleError> { + let label = current.label(); + let reg = expect_type!(current.arg(0).unwrap(), Register)?; + + nodes.extend(vec![ + node!(label, Opcode::Iadd, reg.clone(), Token::Immediate(4)), + node!(None, Opcode::Stw, reg, Token::Register(Register::Spr)), + ]); + + Ok(()) +} + +fn expand_pop(current: Node, nodes: &mut Vec) -> Result<(), AssembleError> { + let label = current.label(); + let reg = expect_type!(current.arg(0).unwrap(), Register)?; + + nodes.extend(vec![ + node!(label, Opcode::Isub, reg.clone(), Token::Immediate(4)), + node!(None, Opcode::Ldw, reg, Token::Register(Register::Spr)), + ]); + + Ok(()) +} + +fn expand_ldx(current: Node, nodes: &mut Vec) -> Result<(), AssembleError> { + let opcode = current.opcode(); + let name = expect_type!(current.arg(0).unwrap(), Symbol)?; + let reg = expect_type!(current.arg(2).unwrap(), Register)?; + + nodes.extend(vec![ + node!(current.label(), Opcode::Lli, name.clone(), reg.clone()), + node!(None, Opcode::Lui, name.clone(), reg.clone()), + node!(None, opcode, reg.clone(), Token::Immediate(0), reg), + ]); + + Ok(()) +} + +fn expand_lwi(current: Node, nodes: &mut Vec) -> Result<(), AssembleError> { + let reg = expect_type!(current.arg(0).unwrap(), Register)?; + let name = expect_type!(current.arg(1).unwrap(), Symbol)?; + + nodes.extend(vec![ + node!(current.label(), Opcode::Lli, name.clone(), reg.clone()), + node!(None, Opcode::Lui, name.clone(), reg.clone()), + ]); + + Ok(()) +} + +fn expand_resx(current: Node, nodes: &mut Vec) -> Result<(), AssembleError> { + let region_label = expect_token!(current.arg(0).unwrap(), Symbol)?; + let size = expect_token!(current.arg(1).unwrap(), Immediate)?; + + let units_per = match current.opcode() { + Opcode::Resb => 4, + Opcode::Resh => 2, + Opcode::Resw => 1, + _ => unreachable!(), + }; + + let mut buffer = vec![]; + // push the inital node with the label + for _ in 0..size.div_ceil(units_per) { + // push the rest of the nodes + buffer.push(node!(None, Opcode::Data, Token::Immediate(0))); + } + buffer[0].symbol = Some(region_label); + nodes.extend(buffer); + + Ok(()) +} + +fn expand_dx(current: Node, nodes: &mut Vec) -> Result<(), AssembleError> { + let region_label = expect_token!(current.arg(0).unwrap(), Symbol)?; + let size = match current.opcode() { + Opcode::Db => 4, + Opcode::Dh => 2, + Opcode::Dw => 1, + _ => unreachable!(), + }; + + let mut buffer = vec![]; + + let mut args = current.args(); + let label = args.remove(0); + + for word in process_dx_data(args, size)? { + buffer.push(node!(None, Opcode::Data, Token::Immediate(word))); + } + buffer[0].symbol = Some(region_label); + + nodes.extend(buffer); + Ok(()) +} + +fn process_dx_data(args: Vec, size: usize) -> Result, AssembleError> { + assert!(matches!(size, 1 | 2 | 4)); + + let mut buffer = Vec::::new(); + + // Process each token + for token in args { + match token { + Token::StringLit(s) => { + // Split string into chars and write as bytes + for ch in s.chars() { + // Convert char to bytes (UTF-8 encoding) + let mut char_buf = [0u8; 4]; + let char_bytes = ch.encode_utf8(&mut char_buf); + buffer.extend_from_slice(char_bytes.as_bytes()); + } + } + Token::Immediate(value) => { + // Split u32 into bytes (little-endian) + buffer.extend_from_slice(&value.to_be_bytes()); + } + _ => { + return Err(AssembleError::Generic); + } + } + + // Pad buffer to alignment boundary with zeros + let remainder = buffer.len() % size; + if remainder != 0 { + let padding = size - remainder; + buffer.resize(buffer.len() + padding, 0); + } + } + + // Convert byte buffer to u32 chunks + // Pad final buffer to u32 boundary if needed + let remainder = buffer.len() % 4; + if remainder != 0 { + let padding = 4 - remainder; + buffer.resize(buffer.len() + padding, 0); + } + + // Convert bytes to u32s efficiently using chunks_exact + let result = buffer + .chunks_exact(4) + .map(|chunk| { + // Convert 4 bytes to u32 (little-endian) + u32::from_be_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]) + }) + .collect(); + + Ok(result) +} diff --git a/assembler/src/lib.rs b/assembler/src/lib.rs index 52eddcc..a831fc0 100644 --- a/assembler/src/lib.rs +++ b/assembler/src/lib.rs @@ -9,11 +9,14 @@ use std::{ use common::prelude::Instruction; use crate::{ - model::{Node, Token, TokenType}, + expand::expand_pseudo_ops, + model::{Node, Symbol, Token, TokenType}, parser::{Parser, Program}, - resolver::resolve_dependencies, + resolver::{resolve_dependencies, resolve_symbols}, }; +pub mod codegen; +pub mod expand; pub mod lexer; pub mod model; pub mod parser; @@ -31,8 +34,11 @@ pub fn assemble(src: &Path) -> Vec { Err(err) => println!("BIG ERROR {err:?}"), } - for node in program.nodes { - println!("{node:?}"); + let mut nodes = program.nodes; + resolve_symbols(&mut nodes).unwrap(); + + for node in nodes { + println!("{node}"); } vec![] @@ -71,7 +77,7 @@ fn prepare_dependency( "{:20} {:20}", "Expanding PseudoInstructions", filename )); - let nodes = Parser::expand_pseudo_ops(nodes, file_hash)?; + let nodes = expand_pseudo_ops(nodes, file_hash)?; program.add_module(nodes); @@ -110,6 +116,8 @@ pub enum AssembleError { UnexpectedEof, InvalidFile(PathBuf), UnexpectedToken(Token, TokenType), + InvalidArg, + UndefinedSymbol(Symbol), } impl fmt::Display for AssembleError { @@ -121,6 +129,10 @@ impl fmt::Display for AssembleError { } AssembleError::UnexpectedEof => write!(f, "Unexpected end of file"), AssembleError::InvalidFile(path) => write!(f, "Invalid file {path:?}"), + AssembleError::InvalidArg => write!(f, "Invalid argument"), + AssembleError::UndefinedSymbol(symbol) => { + write!(f, "Undefined symbol {symbol}") + } } } } @@ -218,7 +230,7 @@ macro_rules! expect_type { other => { let expected_type = expect_type!(@get_first_type $($variant),+); Err(AssembleError::UnexpectedToken( - other.clone(), + other.clone().clone(), expected_type, )) } diff --git a/assembler/src/model.rs b/assembler/src/model.rs index dc8a9cd..6f2a635 100644 --- a/assembler/src/model.rs +++ b/assembler/src/model.rs @@ -2,23 +2,76 @@ use std::{fmt, str::FromStr}; use common::prelude::Register; +use crate::AssembleError; + #[derive(Debug, Clone)] -pub struct Node(pub Option, pub Opcode, pub Vec); +pub struct Node { + pub symbol: Option, + pub opcode: Opcode, + pub tokens: Vec, +} + +#[macro_export] +#[macro_use] +macro_rules! node { + ($symbol: expr, $opcode: expr, args: $tokens: expr) => { + Node::new($symbol.clone(), $opcode.clone(), $tokens.clone()) + }; + + ($symbol: expr, $opcode: expr, $($tokens: expr),+) => { + Node::new($symbol.clone(), $opcode.clone(), vec![$($tokens.clone()),+]) + }; +} + +impl Node { + pub fn new(symbol: Option, opcode: Opcode, tokens: Vec) -> Node { + Node { + symbol, + opcode, + tokens, + } + } + + pub fn label(&self) -> Option { + self.symbol.clone() + } + + pub fn opcode(&self) -> Opcode { + self.opcode.clone() + } + + pub fn args(&self) -> Vec { + self.tokens.clone() + } + + pub fn arg(&self, index: usize) -> Result { + self.args() + .get(index) + .cloned() + .ok_or(AssembleError::InvalidArg) + } +} impl fmt::Display for Node { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let symbol = match &self.0 { - Some(symbol) => format!("{symbol}"), + let symbol = match &self.label() { + Some(symbol) => format!("{}:\n", symbol), None => "".to_string(), }; - write!(f, "Node: {} {} {:?}", symbol, self.1, self.2) + write!( + f, + "\x1b[93m{} \t\x1b[94m{} \x1b[37m{:?} \x1b[0m", + symbol, + self.opcode(), + self.args() + ) } } impl fmt::Display for Symbol { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}::{}", self.module, self.name) + write!(f, "{} ( module: {})", self.name, self.module) } } @@ -82,18 +135,29 @@ impl fmt::Display for Opcode { Opcode::Push => write!(f, "push"), Opcode::Pop => write!(f, "pop"), Opcode::Lwi => write!(f, "lwi"), + + // utility - removed at compile time Opcode::Include => write!(f, "include"), + + // special - generated by assembler + Opcode::Data => write!(f, "data"), } } } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Eq, Hash)] pub struct Symbol { pub name: String, pub module: Module, } -#[derive(Debug, Clone)] +impl PartialEq for Symbol { + fn eq(&self, other: &Self) -> bool { + self.name == other.name && self.module == other.module + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum Module { Resolved(u64), Unresolved(String), @@ -182,6 +246,9 @@ pub enum Opcode { Pop, Lwi, Include, + + // fake instructions (these aren't present in the binary as instructions) + Data, } #[derive(Debug)] diff --git a/assembler/src/parser.rs b/assembler/src/parser.rs index af3a72e..10fefca 100644 --- a/assembler/src/parser.rs +++ b/assembler/src/parser.rs @@ -3,7 +3,7 @@ use std::path::PathBuf; use common::prelude::{Instruction, Register}; use crate::model::{Module, Node, Opcode, Symbol, Token, TokenType}; -use crate::{AssembleError, dsa, expect_token, expect_type, quick_hash}; +use crate::{AssembleError, dsa, expect_token, expect_type, node, quick_hash}; pub struct Parser { tokens: Vec, @@ -55,52 +55,15 @@ impl Parser { pub fn get_dependencies(nodes: &Vec) -> Result, AssembleError> { let mut dependencies = Vec::new(); for node in nodes { - if let Opcode::Include = node.1 { - let path = expect_token!(node.2.get(1).unwrap(), StringLit)?; + if let Opcode::Include = node.opcode() { + let path = expect_token!(node.args().get(1).unwrap(), StringLit)?; dependencies.push(PathBuf::from(path)); } } Ok(dependencies) } - pub fn expand_pseudo_ops( - mut nodes: Vec, - module: u64, - ) -> Result, AssembleError> { - let mut result = Vec::::with_capacity(nodes.len()); - - for node in nodes.iter_mut() { - match node.1 { - // Opcode::Db | Opcode::Dh | Opcode::Dw => todo!(), - // Opcode::Resb | Opcode::Resh | Opcode::Resw => todo!(), - Opcode::Push => { - // inc SPR - // STW reg, SPR - let label = node.0.clone(); - let reg = expect_token!(node.2.get(0).unwrap(), Register)?; - - match label { - Some(label) => result.extend(dsa!( - module, - "{}: iadd spr, 4\n stw {}, spr", - label, - reg - )), - None => { - result.extend(dsa!(module, "iadd spr, 4\n stw {}, spr", reg)) - } - } - } - _ => result.push(node.clone()), - } - } - - Ok(result) - } - fn parse_instruction(&mut self) -> Result { - println!("tokens: {:?}", self.tokens); - if self.tokens.is_empty() { unreachable!(); } @@ -208,8 +171,9 @@ impl Parser { // D-type pseudoinstructions (data definition) Opcode::Resb | Opcode::Resh | Opcode::Resw => { + let name = expect_type!(self.next()?, Symbol)?; let num = expect_type!(self.next()?, Immediate)?; - args = vec![num]; + args = vec![name, num]; } Opcode::Db | Opcode::Dh | Opcode::Dw => { @@ -237,9 +201,13 @@ impl Parser { Opcode::Hlt | Opcode::Nop | Opcode::Irt => { args = Vec::new(); } + + Opcode::Data => { + return Err(AssembleError::Generic); + } } - Ok(Node(label, opcode, args)) + Ok(node!(label, opcode, args: args)) } fn parse_data_definition( @@ -248,8 +216,8 @@ impl Parser { ) -> Result, AssembleError> { let mut values = Vec::new(); - let _name = self.expect(TokenType::Symbol)?; - values.push(self.tokens.pop().unwrap()); + let name = expect_type!(self.next()?, Symbol)?; + values.push(name); match opcode { Opcode::Db => { diff --git a/assembler/src/resolver.rs b/assembler/src/resolver.rs index c0f10b1..eeaff22 100644 --- a/assembler/src/resolver.rs +++ b/assembler/src/resolver.rs @@ -1,4 +1,4 @@ -use std::path::PathBuf; +use std::{collections::HashMap, path::PathBuf}; use crate::{ AssembleError, @@ -6,18 +6,77 @@ use crate::{ quick_hash, }; +pub fn resolve_symbols(nodes: &mut Vec) -> Result<(), AssembleError> { + let symbol_table = generate_symbol_table(&nodes)?; + + for node in nodes.iter_mut() { + match node.opcode() { + Opcode::Lli => { + if let Token::Symbol(symbol) = node.arg(0).unwrap() { + if let Some(address) = symbol_table.get(&symbol) { + node.tokens[0] = Token::Immediate(*address); + } else { + return Err(AssembleError::UndefinedSymbol(symbol.clone())); + } + } + } + Opcode::Lui => { + if let Token::Symbol(symbol) = node.arg(0).unwrap() { + if let Some(address) = symbol_table.get(&symbol) { + node.tokens[0] = Token::Immediate(*address); + } else { + return Err(AssembleError::UndefinedSymbol(symbol.clone())); + } + } + } + Opcode::Jmp + | Opcode::Jeq + | Opcode::Jne + | Opcode::Jgt + | Opcode::Jge + | Opcode::Jlt + | Opcode::Jle => { + if let Token::Symbol(symbol) = node.arg(0).unwrap() { + if let Some(address) = symbol_table.get(&symbol) { + node.tokens[0] = Token::Immediate(*address); + } else { + return Err(AssembleError::UndefinedSymbol(symbol.clone())); + } + } + } + _ => (), + } + } + + Ok(()) +} + +fn generate_symbol_table( + nodes: &Vec, +) -> Result, AssembleError> { + let mut table = HashMap::new(); + + for (i, node) in nodes.iter().enumerate() { + if let Some(symbol) = node.label() { + table.insert(symbol, 4 * i as u32); + } + } + + Ok(table) +} + pub fn resolve_dependencies(mut nodes: Vec) -> Result, AssembleError> { // first we get a list of imports let mut dependencies = Vec::new(); for node in &nodes { - if let Opcode::Include = node.1 { + if let Opcode::Include = node.opcode() { // we want the path, and the name - let name = if let Token::Symbol(name) = node.2.get(0).unwrap() { + let name = if let Token::Symbol(name) = node.arg(0).unwrap() { name.name.clone() } else { unreachable!() }; //node.2.get(0).unwrap() - let path = if let Token::StringLit(path) = node.2.get(1).unwrap() { + let path = if let Token::StringLit(path) = node.arg(1).unwrap() { path } else { unreachable!() @@ -32,7 +91,9 @@ pub fn resolve_dependencies(mut nodes: Vec) -> Result, AssembleE // now we resolve the symbols on all the nodes // we need to check all operands for unresolved signals for (i, node) in nodes.clone().iter().enumerate() { - let Node(_, _, operands) = node; + let Node { + tokens: operands, .. + } = node; for (j, token) in operands.iter().enumerate() { if let Token::Symbol(symbol) = token { for d in &dependencies { @@ -53,7 +114,7 @@ pub fn resolve_dependencies(mut nodes: Vec) -> Result, AssembleE } for (i, j, symbol) in changes { - nodes[i as usize].2[j as usize] = Token::Symbol(symbol); + nodes[i as usize].tokens[j as usize] = Token::Symbol(symbol); } Ok(nodes)