refactoring assembler

This commit is contained in:
2025-06-19 23:28:53 +01:00
parent 5c83b49328
commit 52e2306fca
9 changed files with 287 additions and 283 deletions
@@ -1,9 +1,7 @@
use common::{args, prelude::*};
use crate::{
AssembleError, expect_token,
model::{Node, Opcode},
};
use crate::assembler::model::{Node, Opcode};
use crate::{assembler::AssembleError, expect_token};
pub fn codegen(nodes: Vec<Node>) -> Result<Vec<Instruction>, AssembleError> {
let mut instructions = vec![];
@@ -140,15 +138,15 @@ fn build_instruction(node: Node) -> Result<Instruction, AssembleError> {
_ => unreachable!(),
}
}
Opcode::Iadd | Opcode::Isub => {
Opcode::AddI | Opcode::SubI => {
let reg = expect_token!(args.first().unwrap(), Register)?;
let immediate = expect_token!(args.get(1).unwrap(), Immediate)? as u16;
let dest = expect_token!(args.get(2).unwrap(), Register)?;
let args = args!(I, immediate: immediate, r1: reg, r2: dest);
match opcode {
Opcode::Iadd => Ok(Instruction::AddImmediate(args)),
Opcode::Isub => Ok(Instruction::SubImmediate(args)),
Opcode::AddI => Ok(Instruction::AddImmediate(args)),
Opcode::SubI => Ok(Instruction::SubImmediate(args)),
_ => unreachable!(),
}
}
@@ -1,10 +1,7 @@
use common::prelude::Register;
use crate::{
AssembleError, expect_token, expect_type,
model::{Node, Opcode, Token},
node,
};
use crate::assembler::model::{Node, Opcode, Token};
use crate::{assembler::AssembleError, expect_token, expect_type, node};
pub fn expand_pseudo_ops(
mut nodes: Vec<Node>,
@@ -49,7 +46,7 @@ fn expand_push(current: Node, nodes: &mut Vec<Node>) -> Result<(), AssembleError
nodes.extend(vec![
node!(
label,
Opcode::Isub,
Opcode::SubI,
Token::Register(Register::Spr),
Token::Immediate(4),
Token::Register(Register::Spr)
@@ -80,7 +77,7 @@ fn expand_pop(current: Node, nodes: &mut Vec<Node>) -> Result<(), AssembleError>
),
node!(
None,
Opcode::Iadd,
Opcode::AddI,
Token::Register(Register::Spr),
Token::Immediate(4),
Token::Register(Register::Spr)
@@ -1,9 +1,7 @@
use std::str::FromStr;
use crate::{
AssembleError,
model::{Module, Opcode, Symbol, Token},
};
use crate::assembler::AssembleError;
use crate::assembler::model::{Module, Opcode, Symbol, Token};
use common::prelude::Register;
pub fn lexer(mut program: String, module: u64) -> Result<Vec<Token>, AssembleError> {
+248
View File
@@ -0,0 +1,248 @@
use std::{
collections::HashSet,
fmt, fs,
hash::{DefaultHasher, Hash, Hasher},
path::{Path, PathBuf},
};
use common::prelude::Instruction;
use crate::{
assembler::{
expand::expand_pseudo_ops,
model::{Node, Opcode, Symbol, Token, TokenType},
parser::{Parser, Program},
resolver::{create_sections, resolve_dependencies, resolve_symbols},
},
codegen, log, node,
};
pub mod codegen;
pub mod expand;
pub mod lexer;
pub mod model;
pub mod parser;
pub mod resolver;
pub fn assemble(src: &Path) -> Result<Vec<Instruction>, AssembleError> {
let mut modules = HashSet::<u64>::new();
let mut program = Program::new();
let hash = quick_hash(src);
modules.insert(hash);
prepare_dependency(src, &mut modules, &mut program)?;
let mut nodes = program.nodes;
create_sections(&mut nodes)?;
resolve_symbols(&mut nodes)?;
let instructions = codegen(nodes)?;
for inst in instructions.iter() {
println!("{inst}");
}
Ok(instructions)
}
fn prepare_dependency(
path: &Path,
modules: &mut HashSet<u64>,
program: &mut Program,
) -> Result<(), AssembleError> {
let filename = path.file_name().unwrap().to_str().unwrap();
if let Ok(path) = path.canonicalize() {
log(&format!(
"{:20} {:20} [{}]",
"Building",
filename,
path.display()
));
}
let src = fs::read_to_string(path)
.map_err(|_| AssembleError::InvalidFile(path.to_path_buf()))?;
let file_hash = quick_hash(path);
log(&format!("{:20} {:20}", "Tokenising", filename));
let tokens = lexer::lexer(src, file_hash)?;
log(&format!("{:20} {:20}", "Parsing", filename));
let parsed = Parser::parse_nodes(tokens)?;
log(&format!("{:20} {:20}", "Resolving Deps", filename));
let nodes = resolve_dependencies(parsed)?;
let deps = Parser::get_dependencies(&nodes)?;
log(&format!(
"{:20} {:20}",
"Expanding PseudoInstructions", filename
));
let mut nodes = expand_pseudo_ops(nodes, file_hash)?;
// add a section instruction
nodes.insert(
0,
node!(None, Opcode::Segment, Token::Immediate(file_hash as u32)),
);
for n in nodes.iter() {
println!("{n}");
}
program.add_module(nodes);
for dep in deps {
log(&format!(
"{:20} {:20}",
"Including",
dep.file_name().unwrap().to_str().unwrap()
));
if !modules.contains(&quick_hash(&dep)) {
modules.insert(quick_hash(&dep));
prepare_dependency(dep.as_path(), modules, program)?
}
}
Ok(())
}
fn _build(_src: Vec<Node>) -> Result<Vec<Instruction>, AssembleError> {
Ok(vec![])
}
/// TODO: disassembling functionality
/// - We probably don't need to implement this for a while yet.
/// - This method should recover symbols such as labels and variables from the human
/// written assembly, recognising sequences that are expansions of pseudo-instructions
/// and reversing this to produce near enough the original source code.
pub fn disassemble(_: Vec<Instruction>) -> String {
todo!()
}
#[derive(Debug)]
pub enum AssembleError {
Generic,
UnexpectedEof,
InvalidFile(PathBuf),
UnexpectedToken(Token, TokenType),
InvalidArg,
UndefinedSymbol(Symbol),
}
impl fmt::Display for AssembleError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
AssembleError::Generic => write!(f, "Generic error"),
AssembleError::UnexpectedToken(tok, expected) => {
write!(f, "Unexpected token {tok:?}, expected {expected:?}")
}
AssembleError::UnexpectedEof => write!(f, "Unexpected end of file"),
AssembleError::InvalidFile(path) => write!(f, "Invalid file {path:?}"),
AssembleError::InvalidArg => write!(f, "Invalid argument"),
AssembleError::UndefinedSymbol(symbol) => {
write!(f, "Undefined symbol {symbol}")
}
}
}
}
fn quick_hash(value: &Path) -> u64 {
let mut hasher = DefaultHasher::new();
value.canonicalize().unwrap().to_str().hash(&mut hasher);
hasher.finish()
}
#[macro_export]
macro_rules! dsa {
// Version with formatting arguments
($hash:expr, $input:expr, $($args:expr),+) => {{
let input = format!($input, $($args),+);
let tokens = $crate::lexer::lexer(input, $hash)?;
let parsed = $crate::parser::Parser::parse_nodes(tokens)?;
parsed
}};
// Version without formatting
($hash:expr, $input:expr) => {{
let input = String::from($input);
let tokens = $crate::lexer::lexer(input, $hash)?;
let parsed = $crate::parser::Parser::parse_nodes(tokens)?;
parsed
}};
}
#[macro_export]
macro_rules! expect_token {
($token:expr, Symbol) => {
match $token {
$crate::assembler::model::Token::Symbol(value) => Ok(value.clone()),
other => Err($crate::assembler::AssembleError::UnexpectedToken(
other.clone(),
$crate::assembler::model::TokenType::Symbol,
)),
}
};
($token:expr, Register) => {
match $token {
$crate::assembler::model::Token::Register(value) => Ok(value.clone()),
other => Err($crate::assembler::AssembleError::UnexpectedToken(
other.clone(),
$crate::assembler::model::TokenType::Register,
)),
}
};
($token:expr, Immediate) => {
match $token {
$crate::assembler::model::Token::Immediate(value) => Ok(value.clone()),
other => Err($crate::assembler::AssembleError::UnexpectedToken(
other.clone(),
$crate::assembler::model::TokenType::Immediate,
)),
}
};
($token:expr, StringLit) => {
match $token {
$crate::assembler::model::Token::StringLit(value) => Ok(value.clone()),
other => Err($crate::assembler::AssembleError::UnexpectedToken(
other.clone(),
$crate::assembler::model::TokenType::StringLit,
)),
}
};
($token:expr, Opcode) => {
match $token {
$crate::assembler::model::Token::Opcode(value) => Ok(value.clone()),
other => Err($crate::assembler::AssembleError::UnexpectedToken(
other.clone(),
$crate::assembler::model::TokenType::Opcode,
)),
}
};
}
#[macro_export]
macro_rules! expect_type {
($token:expr, $($variant:ident),+) => {{
let token = $token;
match &token {
$(
$crate::assembler::model::Token::$variant(_) => Ok(token.clone()),
)+
other => {
let expected_type = expect_type!(@get_first_type $($variant),+);
Err($crate::assembler::AssembleError::UnexpectedToken(
other.clone().clone(),
expected_type,
))
}
}
}};
(@get_first_type Symbol $(, $rest:ident)*) => { $crate::assembler::model::TokenType::Symbol };
(@get_first_type Register $(, $rest:ident)*) => { $crate::assembler::model::TokenType::Register };
(@get_first_type Immediate $(, $rest:ident)*) => { $crate::assembler::model::TokenType::Immediate };
(@get_first_type StringLit $(, $rest:ident)*) => { $crate::assembler::model::TokenType::StringLit };
(@get_first_type Opcode $(, $rest:ident)*) => { $crate::assembler::model::TokenType::Opcode };
}
@@ -2,7 +2,7 @@ use std::{fmt, str::FromStr};
use common::prelude::Register;
use crate::AssembleError;
use crate::assembler::AssembleError;
#[derive(Debug, Clone)]
pub struct Node {
@@ -123,8 +123,8 @@ impl fmt::Display for Opcode {
Opcode::Int => write!(f, "int"),
Opcode::Irt => write!(f, "irt"),
Opcode::Hlt => write!(f, "hlt"),
Opcode::Iadd => write!(f, "iadd"),
Opcode::Isub => write!(f, "isub"),
Opcode::AddI => write!(f, "addi"),
Opcode::SubI => write!(f, "subi"),
Opcode::Db => write!(f, "db"),
Opcode::Dh => write!(f, "dh"),
Opcode::Dw => write!(f, "dw"),
@@ -240,8 +240,8 @@ pub enum Opcode {
Int,
Irt,
Hlt,
Iadd,
Isub,
AddI,
SubI,
// Pseudo-instructions
Db,
Dh,
@@ -316,8 +316,8 @@ impl FromStr for Opcode {
"int" => Ok(Self::Int),
"irt" => Ok(Self::Irt),
"hlt" => Ok(Self::Hlt),
"iadd" => Ok(Self::Iadd),
"isub" => Ok(Self::Isub),
"addi" => Ok(Self::AddI),
"subi" => Ok(Self::SubI),
"db" => Ok(Self::Db),
"dh" => Ok(Self::Dh),
"dw" => Ok(Self::Dw),
@@ -339,7 +339,7 @@ impl Opcode {
"nop", "mov", "movs", "ldb", "ldbs", "ldh", "ldhs", "ldw", "stb", "sth", "stw",
"lli", "lui", "jmp", "jeq", "jne", "jgt", "jge", "jlt", "jle", "cmp", "inc",
"dec", "shl", "shr", "add", "sub", "and", "or", "not", "xor", "nand", "nor",
"xnor", "int", "irt", "hlt", "iadd", "isub", // Pseudo-instructions
"xnor", "int", "irt", "hlt", "addi", "subi", // Pseudo-instructions
"db", "dh", "dw", "resb", "resh", "resw", "push", "pop", "lwi", "include",
];
@@ -382,8 +382,8 @@ impl Opcode {
Self::Int => Some(0x22),
Self::Irt => Some(0x23),
Self::Hlt => Some(0x24),
Self::Iadd => Some(0x25),
Self::Isub => Some(0x26),
Self::AddI => Some(0x25),
Self::SubI => Some(0x26),
Self::Segment => Some(0x27),
// Pseudo-instructions don't have opcode values
_ => None,
@@ -1,11 +1,8 @@
use std::path::PathBuf;
use crate::{
AssembleError, expect_token, expect_type,
model::{Node, Opcode, Token},
node,
};
use crate::{assembler::AssembleError, expect_token, expect_type, node};
use crate::assembler::model::{Node, Opcode, Token};
use common::prelude::*;
pub struct Parser {
@@ -177,7 +174,7 @@ impl Parser {
}
// Immediate Arithmetic
Opcode::Iadd | Opcode::Isub => {
Opcode::AddI | Opcode::SubI => {
let reg = expect_type!(self.next()?, Register)?;
let imm = expect_type!(self.next()?, Immediate)?;
let reg2 = if expect_type!(self.peek_next()?, Register).is_ok() {
@@ -2,11 +2,9 @@ use std::{collections::HashMap, path::PathBuf};
use common::prelude::Register;
use crate::{
AssembleError,
model::{Module, Node, Opcode, Symbol, Token},
node, quick_hash,
};
use crate::assembler::model::{Module, Node, Opcode, Symbol, Token};
use crate::assembler::quick_hash;
use crate::{assembler::AssembleError, node};
pub fn resolve_symbols(nodes: &mut [Node]) -> Result<(), AssembleError> {
let symbol_table = generate_symbol_table(nodes)?;
+11 -243
View File
@@ -1,253 +1,21 @@
use core::fmt;
use std::{
collections::HashSet,
fs,
hash::{DefaultHasher, Hash, Hasher},
path::{Path, PathBuf},
};
use assembler::codegen::codegen;
use assembler::expand::expand_pseudo_ops;
use assembler::model::{Node, Opcode, Symbol, Token, TokenType};
use assembler::parser::{Parser, Program};
use assembler::resolver::{create_sections, resolve_dependencies, resolve_symbols};
use common::prelude::*;
use core::fmt;
use crate::{
codegen::codegen,
expand::expand_pseudo_ops,
model::{Node, Opcode, Symbol, Token, TokenType},
parser::{Parser, Program},
resolver::{create_sections, resolve_dependencies, resolve_symbols},
};
pub mod assembler;
pub mod codegen;
pub mod expand;
pub mod lexer;
pub mod model;
pub mod parser;
pub mod resolver;
use crate::assembler::lexer;
pub fn assemble(src: &Path) -> Result<Vec<Instruction>, AssembleError> {
let mut modules = HashSet::<u64>::new();
let mut program = Program::new();
let hash = quick_hash(src);
modules.insert(hash);
prepare_dependency(src, &mut modules, &mut program)?;
let mut nodes = program.nodes;
create_sections(&mut nodes)?;
resolve_symbols(&mut nodes)?;
let instructions = codegen(nodes)?;
for inst in instructions.iter() {
println!("{inst}");
}
Ok(instructions)
}
fn prepare_dependency(
path: &Path,
modules: &mut HashSet<u64>,
program: &mut Program,
) -> Result<(), AssembleError> {
let filename = path.file_name().unwrap().to_str().unwrap();
if let Ok(path) = path.canonicalize() {
log(&format!(
"{:20} {:20} [{}]",
"Building",
filename,
path.display()
));
}
let src = fs::read_to_string(path)
.map_err(|_| AssembleError::InvalidFile(path.to_path_buf()))?;
let file_hash = quick_hash(path);
log(&format!("{:20} {:20}", "Tokenising", filename));
let tokens = lexer::lexer(src, file_hash)?;
log(&format!("{:20} {:20}", "Parsing", filename));
let parsed = Parser::parse_nodes(tokens)?;
log(&format!("{:20} {:20}", "Resolving Deps", filename));
let nodes = resolve_dependencies(parsed)?;
let deps = Parser::get_dependencies(&nodes)?;
log(&format!(
"{:20} {:20}",
"Expanding PseudoInstructions", filename
));
let mut nodes = expand_pseudo_ops(nodes, file_hash)?;
// add a section instruction
nodes.insert(
0,
node!(None, Opcode::Segment, Token::Immediate(file_hash as u32)),
);
for n in nodes.iter() {
println!("{n}");
}
program.add_module(nodes);
for dep in deps {
log(&format!(
"{:20} {:20}",
"Including",
dep.file_name().unwrap().to_str().unwrap()
));
if !modules.contains(&quick_hash(&dep)) {
modules.insert(quick_hash(&dep));
prepare_dependency(dep.as_path(), modules, program)?
}
}
Ok(())
}
fn _build(_src: Vec<Node>) -> Result<Vec<Instruction>, AssembleError> {
Ok(vec![])
}
/// TODO: disassembling functionality
/// - We probably don't need to implement this for a while yet.
/// - This method should recover symbols such as labels and variables from the human
/// written assembly, recognising sequences that are expansions of pseudo-instructions
/// and reversing this to produce near enough the original source code.
pub fn disassemble(_: Vec<Instruction>) -> String {
todo!()
}
#[derive(Debug)]
pub enum AssembleError {
Generic,
UnexpectedEof,
InvalidFile(PathBuf),
UnexpectedToken(Token, TokenType),
InvalidArg,
UndefinedSymbol(Symbol),
}
impl fmt::Display for AssembleError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
AssembleError::Generic => write!(f, "Generic error"),
AssembleError::UnexpectedToken(tok, expected) => {
write!(f, "Unexpected token {tok:?}, expected {expected:?}")
}
AssembleError::UnexpectedEof => write!(f, "Unexpected end of file"),
AssembleError::InvalidFile(path) => write!(f, "Invalid file {path:?}"),
AssembleError::InvalidArg => write!(f, "Invalid argument"),
AssembleError::UndefinedSymbol(symbol) => {
write!(f, "Undefined symbol {symbol}")
}
}
}
}
fn quick_hash(value: &Path) -> u64 {
let mut hasher = DefaultHasher::new();
value.canonicalize().unwrap().to_str().hash(&mut hasher);
hasher.finish()
pub mod prelude {
pub use crate::assembler::assemble;
pub use crate::assembler::disassemble;
}
// TODO: Use an actual logging or tracing library for pretty (scoped) output.
fn log(message: &str) {
println!("\x1b[32mINFO:\x1b[0m {message}");
}
// create a macro that lexes and parses the input string into Nodes
#[macro_export]
macro_rules! dsa {
// Version with formatting arguments
($hash:expr, $input:expr, $($args:expr),+) => {{
let input = format!($input, $($args),+);
let tokens = $crate::lexer::lexer(input, $hash)?;
let parsed = $crate::parser::Parser::parse_nodes(tokens)?;
parsed
}};
// Version without formatting
($hash:expr, $input:expr) => {{
let input = String::from($input);
let tokens = $crate::lexer::lexer(input, $hash)?;
let parsed = $crate::parser::Parser::parse_nodes(tokens)?;
parsed
}};
}
#[macro_export]
macro_rules! expect_token {
($token:expr, Symbol) => {
match $token {
$crate::model::Token::Symbol(value) => Ok(value.clone()),
other => Err($crate::AssembleError::UnexpectedToken(
other.clone(),
$crate::model::TokenType::Symbol,
)),
}
};
($token:expr, Register) => {
match $token {
$crate::model::Token::Register(value) => Ok(value.clone()),
other => Err($crate::AssembleError::UnexpectedToken(
other.clone(),
$crate::model::TokenType::Register,
)),
}
};
($token:expr, Immediate) => {
match $token {
$crate::model::Token::Immediate(value) => Ok(value.clone()),
other => Err($crate::AssembleError::UnexpectedToken(
other.clone(),
$crate::model::TokenType::Immediate,
)),
}
};
($token:expr, StringLit) => {
match $token {
$crate::model::Token::StringLit(value) => Ok(value.clone()),
other => Err($crate::AssembleError::UnexpectedToken(
other.clone(),
$crate::model::TokenType::StringLit,
)),
}
};
($token:expr, Opcode) => {
match $token {
$crate::model::Token::Opcode(value) => Ok(value.clone()),
other => Err($crate::AssembleError::UnexpectedToken(
other.clone(),
$crate::model::TokenType::Opcode,
)),
}
};
}
#[macro_export]
macro_rules! expect_type {
($token:expr, $($variant:ident),+) => {{
let token = $token;
match &token {
$(
$crate::model::Token::$variant(_) => Ok(token.clone()),
)+
other => {
let expected_type = expect_type!(@get_first_type $($variant),+);
Err($crate::AssembleError::UnexpectedToken(
other.clone().clone(),
expected_type,
))
}
}
}};
(@get_first_type Symbol $(, $rest:ident)*) => { $crate::model::TokenType::Symbol };
(@get_first_type Register $(, $rest:ident)*) => { $crate::model::TokenType::Register };
(@get_first_type Immediate $(, $rest:ident)*) => { $crate::model::TokenType::Immediate };
(@get_first_type StringLit $(, $rest:ident)*) => { $crate::model::TokenType::StringLit };
(@get_first_type Opcode $(, $rest:ident)*) => { $crate::model::TokenType::Opcode };
}
+1 -1
View File
@@ -13,7 +13,7 @@ fn main() {
let src = PathBuf::from(input_path);
let mut output_file = fs::File::create(output_path).unwrap();
match assembler::assemble(&src) {
match assembler::assembler::assemble(&src) {
Ok(res) => {
res.iter().map(|i| i.encode()).for_each(|i| {
output_file.write_all(&i.to_le_bytes()).unwrap();