refactoring assembler

This commit is contained in:
2025-06-19 23:28:53 +01:00
parent 5c83b49328
commit 52e2306fca
9 changed files with 287 additions and 283 deletions
+11 -243
View File
@@ -1,253 +1,21 @@
use core::fmt;
use std::{
collections::HashSet,
fs,
hash::{DefaultHasher, Hash, Hasher},
path::{Path, PathBuf},
};
use assembler::codegen::codegen;
use assembler::expand::expand_pseudo_ops;
use assembler::model::{Node, Opcode, Symbol, Token, TokenType};
use assembler::parser::{Parser, Program};
use assembler::resolver::{create_sections, resolve_dependencies, resolve_symbols};
use common::prelude::*;
use core::fmt;
use crate::{
codegen::codegen,
expand::expand_pseudo_ops,
model::{Node, Opcode, Symbol, Token, TokenType},
parser::{Parser, Program},
resolver::{create_sections, resolve_dependencies, resolve_symbols},
};
pub mod assembler;
pub mod codegen;
pub mod expand;
pub mod lexer;
pub mod model;
pub mod parser;
pub mod resolver;
use crate::assembler::lexer;
pub fn assemble(src: &Path) -> Result<Vec<Instruction>, AssembleError> {
let mut modules = HashSet::<u64>::new();
let mut program = Program::new();
let hash = quick_hash(src);
modules.insert(hash);
prepare_dependency(src, &mut modules, &mut program)?;
let mut nodes = program.nodes;
create_sections(&mut nodes)?;
resolve_symbols(&mut nodes)?;
let instructions = codegen(nodes)?;
for inst in instructions.iter() {
println!("{inst}");
}
Ok(instructions)
}
fn prepare_dependency(
path: &Path,
modules: &mut HashSet<u64>,
program: &mut Program,
) -> Result<(), AssembleError> {
let filename = path.file_name().unwrap().to_str().unwrap();
if let Ok(path) = path.canonicalize() {
log(&format!(
"{:20} {:20} [{}]",
"Building",
filename,
path.display()
));
}
let src = fs::read_to_string(path)
.map_err(|_| AssembleError::InvalidFile(path.to_path_buf()))?;
let file_hash = quick_hash(path);
log(&format!("{:20} {:20}", "Tokenising", filename));
let tokens = lexer::lexer(src, file_hash)?;
log(&format!("{:20} {:20}", "Parsing", filename));
let parsed = Parser::parse_nodes(tokens)?;
log(&format!("{:20} {:20}", "Resolving Deps", filename));
let nodes = resolve_dependencies(parsed)?;
let deps = Parser::get_dependencies(&nodes)?;
log(&format!(
"{:20} {:20}",
"Expanding PseudoInstructions", filename
));
let mut nodes = expand_pseudo_ops(nodes, file_hash)?;
// add a section instruction
nodes.insert(
0,
node!(None, Opcode::Segment, Token::Immediate(file_hash as u32)),
);
for n in nodes.iter() {
println!("{n}");
}
program.add_module(nodes);
for dep in deps {
log(&format!(
"{:20} {:20}",
"Including",
dep.file_name().unwrap().to_str().unwrap()
));
if !modules.contains(&quick_hash(&dep)) {
modules.insert(quick_hash(&dep));
prepare_dependency(dep.as_path(), modules, program)?
}
}
Ok(())
}
fn _build(_src: Vec<Node>) -> Result<Vec<Instruction>, AssembleError> {
Ok(vec![])
}
/// TODO: disassembling functionality
/// - We probably don't need to implement this for a while yet.
/// - This method should recover symbols such as labels and variables from the human
/// written assembly, recognising sequences that are expansions of pseudo-instructions
/// and reversing this to produce near enough the original source code.
pub fn disassemble(_: Vec<Instruction>) -> String {
todo!()
}
#[derive(Debug)]
pub enum AssembleError {
Generic,
UnexpectedEof,
InvalidFile(PathBuf),
UnexpectedToken(Token, TokenType),
InvalidArg,
UndefinedSymbol(Symbol),
}
impl fmt::Display for AssembleError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
AssembleError::Generic => write!(f, "Generic error"),
AssembleError::UnexpectedToken(tok, expected) => {
write!(f, "Unexpected token {tok:?}, expected {expected:?}")
}
AssembleError::UnexpectedEof => write!(f, "Unexpected end of file"),
AssembleError::InvalidFile(path) => write!(f, "Invalid file {path:?}"),
AssembleError::InvalidArg => write!(f, "Invalid argument"),
AssembleError::UndefinedSymbol(symbol) => {
write!(f, "Undefined symbol {symbol}")
}
}
}
}
fn quick_hash(value: &Path) -> u64 {
let mut hasher = DefaultHasher::new();
value.canonicalize().unwrap().to_str().hash(&mut hasher);
hasher.finish()
pub mod prelude {
pub use crate::assembler::assemble;
pub use crate::assembler::disassemble;
}
// TODO: Use an actual logging or tracing library for pretty (scoped) output.
fn log(message: &str) {
println!("\x1b[32mINFO:\x1b[0m {message}");
}
// create a macro that lexes and parses the input string into Nodes
#[macro_export]
macro_rules! dsa {
// Version with formatting arguments
($hash:expr, $input:expr, $($args:expr),+) => {{
let input = format!($input, $($args),+);
let tokens = $crate::lexer::lexer(input, $hash)?;
let parsed = $crate::parser::Parser::parse_nodes(tokens)?;
parsed
}};
// Version without formatting
($hash:expr, $input:expr) => {{
let input = String::from($input);
let tokens = $crate::lexer::lexer(input, $hash)?;
let parsed = $crate::parser::Parser::parse_nodes(tokens)?;
parsed
}};
}
#[macro_export]
macro_rules! expect_token {
($token:expr, Symbol) => {
match $token {
$crate::model::Token::Symbol(value) => Ok(value.clone()),
other => Err($crate::AssembleError::UnexpectedToken(
other.clone(),
$crate::model::TokenType::Symbol,
)),
}
};
($token:expr, Register) => {
match $token {
$crate::model::Token::Register(value) => Ok(value.clone()),
other => Err($crate::AssembleError::UnexpectedToken(
other.clone(),
$crate::model::TokenType::Register,
)),
}
};
($token:expr, Immediate) => {
match $token {
$crate::model::Token::Immediate(value) => Ok(value.clone()),
other => Err($crate::AssembleError::UnexpectedToken(
other.clone(),
$crate::model::TokenType::Immediate,
)),
}
};
($token:expr, StringLit) => {
match $token {
$crate::model::Token::StringLit(value) => Ok(value.clone()),
other => Err($crate::AssembleError::UnexpectedToken(
other.clone(),
$crate::model::TokenType::StringLit,
)),
}
};
($token:expr, Opcode) => {
match $token {
$crate::model::Token::Opcode(value) => Ok(value.clone()),
other => Err($crate::AssembleError::UnexpectedToken(
other.clone(),
$crate::model::TokenType::Opcode,
)),
}
};
}
#[macro_export]
macro_rules! expect_type {
($token:expr, $($variant:ident),+) => {{
let token = $token;
match &token {
$(
$crate::model::Token::$variant(_) => Ok(token.clone()),
)+
other => {
let expected_type = expect_type!(@get_first_type $($variant),+);
Err($crate::AssembleError::UnexpectedToken(
other.clone().clone(),
expected_type,
))
}
}
}};
(@get_first_type Symbol $(, $rest:ident)*) => { $crate::model::TokenType::Symbol };
(@get_first_type Register $(, $rest:ident)*) => { $crate::model::TokenType::Register };
(@get_first_type Immediate $(, $rest:ident)*) => { $crate::model::TokenType::Immediate };
(@get_first_type StringLit $(, $rest:ident)*) => { $crate::model::TokenType::StringLit };
(@get_first_type Opcode $(, $rest:ident)*) => { $crate::model::TokenType::Opcode };
}