diff --git a/assembler/src/codegen.rs b/assembler/src/assembler/codegen.rs similarity index 96% rename from assembler/src/codegen.rs rename to assembler/src/assembler/codegen.rs index 7947a3e..a54eebc 100644 --- a/assembler/src/codegen.rs +++ b/assembler/src/assembler/codegen.rs @@ -1,9 +1,7 @@ use common::{args, prelude::*}; -use crate::{ - AssembleError, expect_token, - model::{Node, Opcode}, -}; +use crate::assembler::model::{Node, Opcode}; +use crate::{assembler::AssembleError, expect_token}; pub fn codegen(nodes: Vec) -> Result, AssembleError> { let mut instructions = vec![]; @@ -140,15 +138,15 @@ fn build_instruction(node: Node) -> Result { _ => unreachable!(), } } - Opcode::Iadd | Opcode::Isub => { + Opcode::AddI | Opcode::SubI => { let reg = expect_token!(args.first().unwrap(), Register)?; let immediate = expect_token!(args.get(1).unwrap(), Immediate)? as u16; let dest = expect_token!(args.get(2).unwrap(), Register)?; let args = args!(I, immediate: immediate, r1: reg, r2: dest); match opcode { - Opcode::Iadd => Ok(Instruction::AddImmediate(args)), - Opcode::Isub => Ok(Instruction::SubImmediate(args)), + Opcode::AddI => Ok(Instruction::AddImmediate(args)), + Opcode::SubI => Ok(Instruction::SubImmediate(args)), _ => unreachable!(), } } diff --git a/assembler/src/expand.rs b/assembler/src/assembler/expand.rs similarity index 97% rename from assembler/src/expand.rs rename to assembler/src/assembler/expand.rs index eb42930..a929dd7 100644 --- a/assembler/src/expand.rs +++ b/assembler/src/assembler/expand.rs @@ -1,10 +1,7 @@ use common::prelude::Register; -use crate::{ - AssembleError, expect_token, expect_type, - model::{Node, Opcode, Token}, - node, -}; +use crate::assembler::model::{Node, Opcode, Token}; +use crate::{assembler::AssembleError, expect_token, expect_type, node}; pub fn expand_pseudo_ops( mut nodes: Vec, @@ -49,7 +46,7 @@ fn expand_push(current: Node, nodes: &mut Vec) -> Result<(), AssembleError nodes.extend(vec![ node!( label, - Opcode::Isub, + Opcode::SubI, Token::Register(Register::Spr), Token::Immediate(4), Token::Register(Register::Spr) @@ -80,7 +77,7 @@ fn expand_pop(current: Node, nodes: &mut Vec) -> Result<(), AssembleError> ), node!( None, - Opcode::Iadd, + Opcode::AddI, Token::Register(Register::Spr), Token::Immediate(4), Token::Register(Register::Spr) diff --git a/assembler/src/lexer.rs b/assembler/src/assembler/lexer.rs similarity index 97% rename from assembler/src/lexer.rs rename to assembler/src/assembler/lexer.rs index ac6b14d..ad783c9 100644 --- a/assembler/src/lexer.rs +++ b/assembler/src/assembler/lexer.rs @@ -1,9 +1,7 @@ use std::str::FromStr; -use crate::{ - AssembleError, - model::{Module, Opcode, Symbol, Token}, -}; +use crate::assembler::AssembleError; +use crate::assembler::model::{Module, Opcode, Symbol, Token}; use common::prelude::Register; pub fn lexer(mut program: String, module: u64) -> Result, AssembleError> { diff --git a/assembler/src/assembler/mod.rs b/assembler/src/assembler/mod.rs new file mode 100644 index 0000000..fbb38c6 --- /dev/null +++ b/assembler/src/assembler/mod.rs @@ -0,0 +1,248 @@ +use std::{ + collections::HashSet, + fmt, fs, + hash::{DefaultHasher, Hash, Hasher}, + path::{Path, PathBuf}, +}; + +use common::prelude::Instruction; + +use crate::{ + assembler::{ + expand::expand_pseudo_ops, + model::{Node, Opcode, Symbol, Token, TokenType}, + parser::{Parser, Program}, + resolver::{create_sections, resolve_dependencies, resolve_symbols}, + }, + codegen, log, node, +}; + +pub mod codegen; +pub mod expand; +pub mod lexer; +pub mod model; +pub mod parser; +pub mod resolver; + +pub fn assemble(src: &Path) -> Result, AssembleError> { + let mut modules = HashSet::::new(); + let mut program = Program::new(); + + let hash = quick_hash(src); + modules.insert(hash); + + prepare_dependency(src, &mut modules, &mut program)?; + let mut nodes = program.nodes; + + create_sections(&mut nodes)?; + resolve_symbols(&mut nodes)?; + + let instructions = codegen(nodes)?; + for inst in instructions.iter() { + println!("{inst}"); + } + + Ok(instructions) +} + +fn prepare_dependency( + path: &Path, + modules: &mut HashSet, + program: &mut Program, +) -> Result<(), AssembleError> { + let filename = path.file_name().unwrap().to_str().unwrap(); + if let Ok(path) = path.canonicalize() { + log(&format!( + "{:20} {:20} [{}]", + "Building", + filename, + path.display() + )); + } + + let src = fs::read_to_string(path) + .map_err(|_| AssembleError::InvalidFile(path.to_path_buf()))?; + let file_hash = quick_hash(path); + + log(&format!("{:20} {:20}", "Tokenising", filename)); + let tokens = lexer::lexer(src, file_hash)?; + + log(&format!("{:20} {:20}", "Parsing", filename)); + let parsed = Parser::parse_nodes(tokens)?; + + log(&format!("{:20} {:20}", "Resolving Deps", filename)); + let nodes = resolve_dependencies(parsed)?; + + let deps = Parser::get_dependencies(&nodes)?; + + log(&format!( + "{:20} {:20}", + "Expanding PseudoInstructions", filename + )); + let mut nodes = expand_pseudo_ops(nodes, file_hash)?; + + // add a section instruction + nodes.insert( + 0, + node!(None, Opcode::Segment, Token::Immediate(file_hash as u32)), + ); + + for n in nodes.iter() { + println!("{n}"); + } + + program.add_module(nodes); + + for dep in deps { + log(&format!( + "{:20} {:20}", + "Including", + dep.file_name().unwrap().to_str().unwrap() + )); + + if !modules.contains(&quick_hash(&dep)) { + modules.insert(quick_hash(&dep)); + prepare_dependency(dep.as_path(), modules, program)? + } + } + + Ok(()) +} + +fn _build(_src: Vec) -> Result, AssembleError> { + Ok(vec![]) +} + +/// TODO: disassembling functionality +/// - We probably don't need to implement this for a while yet. +/// - This method should recover symbols such as labels and variables from the human +/// written assembly, recognising sequences that are expansions of pseudo-instructions +/// and reversing this to produce near enough the original source code. +pub fn disassemble(_: Vec) -> String { + todo!() +} + +#[derive(Debug)] +pub enum AssembleError { + Generic, + UnexpectedEof, + InvalidFile(PathBuf), + UnexpectedToken(Token, TokenType), + InvalidArg, + UndefinedSymbol(Symbol), +} + +impl fmt::Display for AssembleError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + AssembleError::Generic => write!(f, "Generic error"), + AssembleError::UnexpectedToken(tok, expected) => { + write!(f, "Unexpected token {tok:?}, expected {expected:?}") + } + AssembleError::UnexpectedEof => write!(f, "Unexpected end of file"), + AssembleError::InvalidFile(path) => write!(f, "Invalid file {path:?}"), + AssembleError::InvalidArg => write!(f, "Invalid argument"), + AssembleError::UndefinedSymbol(symbol) => { + write!(f, "Undefined symbol {symbol}") + } + } + } +} + +fn quick_hash(value: &Path) -> u64 { + let mut hasher = DefaultHasher::new(); + value.canonicalize().unwrap().to_str().hash(&mut hasher); + hasher.finish() +} + +#[macro_export] +macro_rules! dsa { + // Version with formatting arguments + ($hash:expr, $input:expr, $($args:expr),+) => {{ + let input = format!($input, $($args),+); + let tokens = $crate::lexer::lexer(input, $hash)?; + let parsed = $crate::parser::Parser::parse_nodes(tokens)?; + parsed + }}; + // Version without formatting + ($hash:expr, $input:expr) => {{ + let input = String::from($input); + let tokens = $crate::lexer::lexer(input, $hash)?; + let parsed = $crate::parser::Parser::parse_nodes(tokens)?; + parsed + }}; +} + +#[macro_export] +macro_rules! expect_token { + ($token:expr, Symbol) => { + match $token { + $crate::assembler::model::Token::Symbol(value) => Ok(value.clone()), + other => Err($crate::assembler::AssembleError::UnexpectedToken( + other.clone(), + $crate::assembler::model::TokenType::Symbol, + )), + } + }; + ($token:expr, Register) => { + match $token { + $crate::assembler::model::Token::Register(value) => Ok(value.clone()), + other => Err($crate::assembler::AssembleError::UnexpectedToken( + other.clone(), + $crate::assembler::model::TokenType::Register, + )), + } + }; + ($token:expr, Immediate) => { + match $token { + $crate::assembler::model::Token::Immediate(value) => Ok(value.clone()), + other => Err($crate::assembler::AssembleError::UnexpectedToken( + other.clone(), + $crate::assembler::model::TokenType::Immediate, + )), + } + }; + ($token:expr, StringLit) => { + match $token { + $crate::assembler::model::Token::StringLit(value) => Ok(value.clone()), + other => Err($crate::assembler::AssembleError::UnexpectedToken( + other.clone(), + $crate::assembler::model::TokenType::StringLit, + )), + } + }; + ($token:expr, Opcode) => { + match $token { + $crate::assembler::model::Token::Opcode(value) => Ok(value.clone()), + other => Err($crate::assembler::AssembleError::UnexpectedToken( + other.clone(), + $crate::assembler::model::TokenType::Opcode, + )), + } + }; +} + +#[macro_export] +macro_rules! expect_type { + ($token:expr, $($variant:ident),+) => {{ + let token = $token; + match &token { + $( + $crate::assembler::model::Token::$variant(_) => Ok(token.clone()), + )+ + other => { + let expected_type = expect_type!(@get_first_type $($variant),+); + Err($crate::assembler::AssembleError::UnexpectedToken( + other.clone().clone(), + expected_type, + )) + } + } + }}; + + (@get_first_type Symbol $(, $rest:ident)*) => { $crate::assembler::model::TokenType::Symbol }; + (@get_first_type Register $(, $rest:ident)*) => { $crate::assembler::model::TokenType::Register }; + (@get_first_type Immediate $(, $rest:ident)*) => { $crate::assembler::model::TokenType::Immediate }; + (@get_first_type StringLit $(, $rest:ident)*) => { $crate::assembler::model::TokenType::StringLit }; + (@get_first_type Opcode $(, $rest:ident)*) => { $crate::assembler::model::TokenType::Opcode }; +} diff --git a/assembler/src/model.rs b/assembler/src/assembler/model.rs similarity index 96% rename from assembler/src/model.rs rename to assembler/src/assembler/model.rs index 27c478b..7e4592b 100644 --- a/assembler/src/model.rs +++ b/assembler/src/assembler/model.rs @@ -2,7 +2,7 @@ use std::{fmt, str::FromStr}; use common::prelude::Register; -use crate::AssembleError; +use crate::assembler::AssembleError; #[derive(Debug, Clone)] pub struct Node { @@ -123,8 +123,8 @@ impl fmt::Display for Opcode { Opcode::Int => write!(f, "int"), Opcode::Irt => write!(f, "irt"), Opcode::Hlt => write!(f, "hlt"), - Opcode::Iadd => write!(f, "iadd"), - Opcode::Isub => write!(f, "isub"), + Opcode::AddI => write!(f, "addi"), + Opcode::SubI => write!(f, "subi"), Opcode::Db => write!(f, "db"), Opcode::Dh => write!(f, "dh"), Opcode::Dw => write!(f, "dw"), @@ -240,8 +240,8 @@ pub enum Opcode { Int, Irt, Hlt, - Iadd, - Isub, + AddI, + SubI, // Pseudo-instructions Db, Dh, @@ -316,8 +316,8 @@ impl FromStr for Opcode { "int" => Ok(Self::Int), "irt" => Ok(Self::Irt), "hlt" => Ok(Self::Hlt), - "iadd" => Ok(Self::Iadd), - "isub" => Ok(Self::Isub), + "addi" => Ok(Self::AddI), + "subi" => Ok(Self::SubI), "db" => Ok(Self::Db), "dh" => Ok(Self::Dh), "dw" => Ok(Self::Dw), @@ -339,7 +339,7 @@ impl Opcode { "nop", "mov", "movs", "ldb", "ldbs", "ldh", "ldhs", "ldw", "stb", "sth", "stw", "lli", "lui", "jmp", "jeq", "jne", "jgt", "jge", "jlt", "jle", "cmp", "inc", "dec", "shl", "shr", "add", "sub", "and", "or", "not", "xor", "nand", "nor", - "xnor", "int", "irt", "hlt", "iadd", "isub", // Pseudo-instructions + "xnor", "int", "irt", "hlt", "addi", "subi", // Pseudo-instructions "db", "dh", "dw", "resb", "resh", "resw", "push", "pop", "lwi", "include", ]; @@ -382,8 +382,8 @@ impl Opcode { Self::Int => Some(0x22), Self::Irt => Some(0x23), Self::Hlt => Some(0x24), - Self::Iadd => Some(0x25), - Self::Isub => Some(0x26), + Self::AddI => Some(0x25), + Self::SubI => Some(0x26), Self::Segment => Some(0x27), // Pseudo-instructions don't have opcode values _ => None, diff --git a/assembler/src/parser.rs b/assembler/src/assembler/parser.rs similarity index 98% rename from assembler/src/parser.rs rename to assembler/src/assembler/parser.rs index fdc95ce..3f8ab49 100644 --- a/assembler/src/parser.rs +++ b/assembler/src/assembler/parser.rs @@ -1,11 +1,8 @@ use std::path::PathBuf; -use crate::{ - AssembleError, expect_token, expect_type, - model::{Node, Opcode, Token}, - node, -}; +use crate::{assembler::AssembleError, expect_token, expect_type, node}; +use crate::assembler::model::{Node, Opcode, Token}; use common::prelude::*; pub struct Parser { @@ -177,7 +174,7 @@ impl Parser { } // Immediate Arithmetic - Opcode::Iadd | Opcode::Isub => { + Opcode::AddI | Opcode::SubI => { let reg = expect_type!(self.next()?, Register)?; let imm = expect_type!(self.next()?, Immediate)?; let reg2 = if expect_type!(self.peek_next()?, Register).is_ok() { diff --git a/assembler/src/resolver.rs b/assembler/src/assembler/resolver.rs similarity index 96% rename from assembler/src/resolver.rs rename to assembler/src/assembler/resolver.rs index b93de50..6dea158 100644 --- a/assembler/src/resolver.rs +++ b/assembler/src/assembler/resolver.rs @@ -2,11 +2,9 @@ use std::{collections::HashMap, path::PathBuf}; use common::prelude::Register; -use crate::{ - AssembleError, - model::{Module, Node, Opcode, Symbol, Token}, - node, quick_hash, -}; +use crate::assembler::model::{Module, Node, Opcode, Symbol, Token}; +use crate::assembler::quick_hash; +use crate::{assembler::AssembleError, node}; pub fn resolve_symbols(nodes: &mut [Node]) -> Result<(), AssembleError> { let symbol_table = generate_symbol_table(nodes)?; diff --git a/assembler/src/lib.rs b/assembler/src/lib.rs index de96981..f456590 100644 --- a/assembler/src/lib.rs +++ b/assembler/src/lib.rs @@ -1,253 +1,21 @@ -use core::fmt; -use std::{ - collections::HashSet, - fs, - hash::{DefaultHasher, Hash, Hasher}, - path::{Path, PathBuf}, -}; - +use assembler::codegen::codegen; +use assembler::expand::expand_pseudo_ops; +use assembler::model::{Node, Opcode, Symbol, Token, TokenType}; +use assembler::parser::{Parser, Program}; +use assembler::resolver::{create_sections, resolve_dependencies, resolve_symbols}; use common::prelude::*; +use core::fmt; -use crate::{ - codegen::codegen, - expand::expand_pseudo_ops, - model::{Node, Opcode, Symbol, Token, TokenType}, - parser::{Parser, Program}, - resolver::{create_sections, resolve_dependencies, resolve_symbols}, -}; +pub mod assembler; -pub mod codegen; -pub mod expand; -pub mod lexer; -pub mod model; -pub mod parser; -pub mod resolver; +use crate::assembler::lexer; -pub fn assemble(src: &Path) -> Result, AssembleError> { - let mut modules = HashSet::::new(); - let mut program = Program::new(); - - let hash = quick_hash(src); - modules.insert(hash); - - prepare_dependency(src, &mut modules, &mut program)?; - let mut nodes = program.nodes; - - create_sections(&mut nodes)?; - resolve_symbols(&mut nodes)?; - - let instructions = codegen(nodes)?; - for inst in instructions.iter() { - println!("{inst}"); - } - - Ok(instructions) -} - -fn prepare_dependency( - path: &Path, - modules: &mut HashSet, - program: &mut Program, -) -> Result<(), AssembleError> { - let filename = path.file_name().unwrap().to_str().unwrap(); - if let Ok(path) = path.canonicalize() { - log(&format!( - "{:20} {:20} [{}]", - "Building", - filename, - path.display() - )); - } - - let src = fs::read_to_string(path) - .map_err(|_| AssembleError::InvalidFile(path.to_path_buf()))?; - let file_hash = quick_hash(path); - - log(&format!("{:20} {:20}", "Tokenising", filename)); - let tokens = lexer::lexer(src, file_hash)?; - - log(&format!("{:20} {:20}", "Parsing", filename)); - let parsed = Parser::parse_nodes(tokens)?; - - log(&format!("{:20} {:20}", "Resolving Deps", filename)); - let nodes = resolve_dependencies(parsed)?; - - let deps = Parser::get_dependencies(&nodes)?; - - log(&format!( - "{:20} {:20}", - "Expanding PseudoInstructions", filename - )); - let mut nodes = expand_pseudo_ops(nodes, file_hash)?; - - // add a section instruction - nodes.insert( - 0, - node!(None, Opcode::Segment, Token::Immediate(file_hash as u32)), - ); - - for n in nodes.iter() { - println!("{n}"); - } - - program.add_module(nodes); - - for dep in deps { - log(&format!( - "{:20} {:20}", - "Including", - dep.file_name().unwrap().to_str().unwrap() - )); - - if !modules.contains(&quick_hash(&dep)) { - modules.insert(quick_hash(&dep)); - prepare_dependency(dep.as_path(), modules, program)? - } - } - - Ok(()) -} - -fn _build(_src: Vec) -> Result, AssembleError> { - Ok(vec![]) -} - -/// TODO: disassembling functionality -/// - We probably don't need to implement this for a while yet. -/// - This method should recover symbols such as labels and variables from the human -/// written assembly, recognising sequences that are expansions of pseudo-instructions -/// and reversing this to produce near enough the original source code. -pub fn disassemble(_: Vec) -> String { - todo!() -} - -#[derive(Debug)] -pub enum AssembleError { - Generic, - UnexpectedEof, - InvalidFile(PathBuf), - UnexpectedToken(Token, TokenType), - InvalidArg, - UndefinedSymbol(Symbol), -} - -impl fmt::Display for AssembleError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - AssembleError::Generic => write!(f, "Generic error"), - AssembleError::UnexpectedToken(tok, expected) => { - write!(f, "Unexpected token {tok:?}, expected {expected:?}") - } - AssembleError::UnexpectedEof => write!(f, "Unexpected end of file"), - AssembleError::InvalidFile(path) => write!(f, "Invalid file {path:?}"), - AssembleError::InvalidArg => write!(f, "Invalid argument"), - AssembleError::UndefinedSymbol(symbol) => { - write!(f, "Undefined symbol {symbol}") - } - } - } -} - -fn quick_hash(value: &Path) -> u64 { - let mut hasher = DefaultHasher::new(); - value.canonicalize().unwrap().to_str().hash(&mut hasher); - hasher.finish() +pub mod prelude { + pub use crate::assembler::assemble; + pub use crate::assembler::disassemble; } // TODO: Use an actual logging or tracing library for pretty (scoped) output. fn log(message: &str) { println!("\x1b[32mINFO:\x1b[0m {message}"); } - -// create a macro that lexes and parses the input string into Nodes -#[macro_export] -macro_rules! dsa { - // Version with formatting arguments - ($hash:expr, $input:expr, $($args:expr),+) => {{ - let input = format!($input, $($args),+); - let tokens = $crate::lexer::lexer(input, $hash)?; - let parsed = $crate::parser::Parser::parse_nodes(tokens)?; - parsed - }}; - // Version without formatting - ($hash:expr, $input:expr) => {{ - let input = String::from($input); - let tokens = $crate::lexer::lexer(input, $hash)?; - let parsed = $crate::parser::Parser::parse_nodes(tokens)?; - parsed - }}; -} - -#[macro_export] -macro_rules! expect_token { - ($token:expr, Symbol) => { - match $token { - $crate::model::Token::Symbol(value) => Ok(value.clone()), - other => Err($crate::AssembleError::UnexpectedToken( - other.clone(), - $crate::model::TokenType::Symbol, - )), - } - }; - ($token:expr, Register) => { - match $token { - $crate::model::Token::Register(value) => Ok(value.clone()), - other => Err($crate::AssembleError::UnexpectedToken( - other.clone(), - $crate::model::TokenType::Register, - )), - } - }; - ($token:expr, Immediate) => { - match $token { - $crate::model::Token::Immediate(value) => Ok(value.clone()), - other => Err($crate::AssembleError::UnexpectedToken( - other.clone(), - $crate::model::TokenType::Immediate, - )), - } - }; - ($token:expr, StringLit) => { - match $token { - $crate::model::Token::StringLit(value) => Ok(value.clone()), - other => Err($crate::AssembleError::UnexpectedToken( - other.clone(), - $crate::model::TokenType::StringLit, - )), - } - }; - ($token:expr, Opcode) => { - match $token { - $crate::model::Token::Opcode(value) => Ok(value.clone()), - other => Err($crate::AssembleError::UnexpectedToken( - other.clone(), - $crate::model::TokenType::Opcode, - )), - } - }; -} - -#[macro_export] -macro_rules! expect_type { - ($token:expr, $($variant:ident),+) => {{ - let token = $token; - match &token { - $( - $crate::model::Token::$variant(_) => Ok(token.clone()), - )+ - other => { - let expected_type = expect_type!(@get_first_type $($variant),+); - Err($crate::AssembleError::UnexpectedToken( - other.clone().clone(), - expected_type, - )) - } - } - }}; - - (@get_first_type Symbol $(, $rest:ident)*) => { $crate::model::TokenType::Symbol }; - (@get_first_type Register $(, $rest:ident)*) => { $crate::model::TokenType::Register }; - (@get_first_type Immediate $(, $rest:ident)*) => { $crate::model::TokenType::Immediate }; - (@get_first_type StringLit $(, $rest:ident)*) => { $crate::model::TokenType::StringLit }; - (@get_first_type Opcode $(, $rest:ident)*) => { $crate::model::TokenType::Opcode }; -} diff --git a/assembler/src/main.rs b/assembler/src/main.rs index ab365ae..6185a64 100644 --- a/assembler/src/main.rs +++ b/assembler/src/main.rs @@ -13,7 +13,7 @@ fn main() { let src = PathBuf::from(input_path); let mut output_file = fs::File::create(output_path).unwrap(); - match assembler::assemble(&src) { + match assembler::assembler::assemble(&src) { Ok(res) => { res.iter().map(|i| i.encode()).for_each(|i| { output_file.write_all(&i.to_le_bytes()).unwrap();